3 rcs_id('$Id: dbmlib.php,v 1.7.2.1 2001-08-18 00:35:10 dairiki Exp $');
10 RetrievePage($dbi, $pagename, $pagestore)
11 InsertPage($dbi, $pagename, $pagehash)
12 SaveCopyToArchive($dbi, $pagename, $pagehash)
13 IsWikiPage($dbi, $pagename)
14 InitTitleSearch($dbi, $search)
15 TitleSearchNextMatch($dbi, $res)
16 InitFullSearch($dbi, $search)
17 FullSearchNextMatch($dbi, $res)
18 MakeBackLinkSearchRegexp($pagename)
19 InitBackLinkSearch($dbi, $pagename)
20 BackLinkSearchNextMatch($dbi, &$pos)
21 IncreaseHitCount($dbi, $pagename)
22 GetHitCount($dbi, $pagename)
23 InitMostPopular($dbi, $limit)
24 MostPopularNextMatch($dbi, $res)
28 // open a database and return the handle
29 // loop until we get a handle; php has its own
30 // locking mechanism, thank god.
31 // Suppress ugly error message with @.
33 function OpenDataBase($dbname) {
34 global $WikiDB; // hash of all the DBM file names
37 while (list($key, $file) = each($WikiDB)) {
38 while (($dbi[$key] = @dbmopen($file, "c")) < 1) {
40 if ($numattempts > MAX_DBM_ATTEMPTS) {
41 ExitWiki("Cannot open database '$key' : '$file', giving up.");
50 function CloseDataBase($dbi) {
52 while (list($dbmfile, $dbihandle) = each($dbi)) {
59 // take a serialized hash, return same padded out to
60 // the next largest number bytes divisible by 500. This
61 // is to save disk space in the long run, since DBM files
63 function PadSerializedData($data) {
64 // calculate the next largest number divisible by 500
65 $nextincr = 500 * ceil(strlen($data) / 500);
67 $data = sprintf("%-${nextincr}s", $data);
71 // strip trailing whitespace from the serialized data
73 function UnPadSerializedData($data) {
79 // Return hash of page + attributes or default
80 function RetrievePage($dbi, $pagename, $pagestore) {
81 if ($data = dbmfetch($dbi[$pagestore], $pagename)) {
82 // unserialize $data into a hash
83 $pagehash = unserialize(UnPadSerializedData($data));
91 // Either insert or replace a key/value (a page)
92 function InsertPage($dbi, $pagename, $pagehash, $pagestore='wiki') {
94 if ($pagestore == 'wiki') { // a bit of a hack
95 $linklist = ExtractWikiPageLinks($pagehash['content']);
96 SetWikiPageLinks($dbi, $pagename, $linklist);
99 $pagedata = PadSerializedData(serialize($pagehash));
101 if (dbminsert($dbi[$pagestore], $pagename, $pagedata)) {
102 if (dbmreplace($dbi[$pagestore], $pagename, $pagedata)) {
103 ExitWiki("Error inserting page '$pagename'");
109 // for archiving pages to a separate dbm
110 function SaveCopyToArchive($dbi, $pagename, $pagehash) {
111 global $ArchivePageStore;
113 $pagedata = PadSerializedData(serialize($pagehash));
115 if (dbminsert($dbi[$ArchivePageStore], $pagename, $pagedata)) {
116 if (dbmreplace($dbi['archive'], $pagename, $pagedata)) {
117 ExitWiki("Error storing '$pagename' into archive");
123 function IsWikiPage($dbi, $pagename) {
124 return dbmexists($dbi['wiki'], $pagename);
128 function IsInArchive($dbi, $pagename) {
129 return dbmexists($dbi['archive'], $pagename);
133 function RemovePage($dbi, $pagename) {
135 dbmdelete($dbi['wiki'], $pagename); // report error if this fails?
136 dbmdelete($dbi['archive'], $pagename); // no error if this fails
137 dbmdelete($dbi['hitcount'], $pagename); // no error if this fails
139 $linkinfo = RetrievePage($dbi, $pagename, 'wikilinks');
141 // remove page from fromlinks of pages it had links to
142 if (is_array($linkinfo)) { // page exists?
143 $tolinks = $linkinfo['tolinks'];
145 while (list($tolink, $dummy) = each($tolinks)) {
146 $tolinkinfo = RetrievePage($dbi, $tolink, 'wikilinks');
147 if (is_array($tolinkinfo)) { // page found?
148 $oldFromlinks = $tolinkinfo['fromlinks'];
149 $tolinkinfo['fromlinks'] = array(); // erase fromlinks
150 reset($oldFromlinks);
151 while (list($fromlink, $dummy) = each($oldFromlinks)) {
152 if ($fromlink != $pagename) // not to be erased?
153 $tolinkinfo['fromlinks'][$fromlink] = 1; // put link back
154 } // put link info back in DBM file
155 InsertPage($dbi, $tolink, $tolinkinfo, 'wikilinks');
159 // remove page itself
160 dbmdelete($dbi['wikilinks'], $pagename);
165 // setup for title-search
166 function InitTitleSearch($dbi, $search) {
167 $pos['search'] = $search;
168 $pos['key'] = dbmfirstkey($dbi['wiki']);
174 // iterating through database
175 function TitleSearchNextMatch($dbi, &$pos) {
176 while ($pos['key']) {
178 $pos['key'] = dbmnextkey($dbi['wiki'], $pos['key']);
180 if (eregi($pos['search'], $page)) {
188 // setup for full-text search
189 function InitFullSearch($dbi, $search) {
190 return InitTitleSearch($dbi, $search);
194 //iterating through database
195 function FullSearchNextMatch($dbi, &$pos) {
196 while ($pos['key']) {
198 $pos['key'] = dbmnextkey($dbi['wiki'], $pos['key']);
200 $pagedata = dbmfetch($dbi['wiki'], $key);
201 // test the serialized data
202 if (eregi($pos['search'], $pagedata)) {
203 $page['pagename'] = $key;
204 $pagedata = unserialize(UnPadSerializedData($pagedata));
205 $page['content'] = $pagedata['content'];
213 ////////////////////////
214 // new database features
216 // Compute PCRE suitable for searching for links to the given page.
217 function MakeBackLinkSearchRegexp($pagename) {
218 global $WikiNameRegexp;
220 // Note that in (at least some) PHP 3.x's, preg_quote only takes
221 // (at most) one argument. Also it doesn't quote '/'s.
222 // It does quote '='s, so we'll use that for the delimeter.
223 $quoted_pagename = preg_quote($pagename);
224 if (preg_match("/^$WikiNameRegexp\$/", $pagename)) {
225 # FIXME: This may need modification for non-standard (non-english) $WikiNameRegexp.
226 return "=(?<![A-Za-z0-9!])$quoted_pagename(?![A-Za-z0-9])=";
229 // Note from author: Sorry. :-/
231 . '(?<!\[)\[(?!\[)' // Single, isolated '['
232 . '([^]|]*\|)?' // Optional stuff followed by '|'
233 . '\s*' // Optional space
234 . $quoted_pagename // Pagename
235 . '\s*\]=' ); // Optional space, followed by ']'
236 // FIXME: the above regexp is still not quite right.
237 // Consider the text: " [ [ test page ]". This is a link to a page
238 // named '[ test page'. The above regexp will recognize this
239 // as a link either to '[ test page' (good) or to 'test page' (wrong).
243 // setup for back-link search
244 function InitBackLinkSearch($dbi, $pagename) {
245 return InitTitleSearch($dbi, MakeBackLinkSearchRegexp($pagename));
248 // iterating through back-links
249 function BackLinkSearchNextMatch($dbi, &$pos) {
250 while ($pos['key']) {
252 $pos['key'] = dbmnextkey($dbi['wiki'], $pos['key']);
254 $rawdata = dbmfetch($dbi['wiki'], $page);
255 if ( ! preg_match($pos['search'], $rawdata))
258 $pagedata = unserialize(UnPadSerializedData($rawdata));
259 while (list($i, $line) = each($pagedata['content'])) {
260 if (preg_match($pos['search'], $line))
267 function IncreaseHitCount($dbi, $pagename) {
269 if (dbmexists($dbi['hitcount'], $pagename)) {
270 // increase the hit count
271 // echo "$pagename there, incrementing...<br>\n";
272 $count = dbmfetch($dbi['hitcount'], $pagename);
274 dbmreplace($dbi['hitcount'], $pagename, $count);
276 // add it, set the hit count to one
278 dbminsert($dbi['hitcount'], $pagename, $count);
283 function GetHitCount($dbi, $pagename) {
285 if (dbmexists($dbi['hitcount'], $pagename)) {
286 // increase the hit count
287 $count = dbmfetch($dbi['hitcount'], $pagename);
295 function InitMostPopular($dbi, $limit) {
296 // iterate through the whole dbm file for hit counts
297 // sort the results highest to lowest, and return
300 // Because sorting all the pages may be a lot of work
301 // we only get the top $limit. A page is only added if it's score is
302 // higher than the lowest score in the list. If the list is full then
303 // one of the pages with the lowest scores is removed.
305 $pagename = dbmfirstkey($dbi['hitcount']);
306 $score = dbmfetch($dbi['hitcount'], $pagename);
307 $res = array($pagename => (int) $score);
310 while ($pagename = dbmnextkey($dbi['hitcount'], $pagename)) {
311 $score = dbmfetch($dbi['hitcount'], $pagename);
312 if (count($res) < $limit) { // room left in $res?
313 if ($score < $lowest)
315 $res[$pagename] = (int) $score; // add page to $res
316 } elseif ($score > $lowest) {
317 $oldres = $res; // save old result
319 $removed = 0; // nothing removed yet
320 $newlowest = $score; // new lowest score
321 $res[$pagename] = (int) $score; // add page to $res
323 while(list($pname, $pscore) = each($oldres)) {
324 if (!$removed and ($pscore = $lowest))
325 $removed = 1; // don't copy this entry
327 $res[$pname] = (int) $pscore;
328 if ($pscore < $newlowest)
329 $newlowest = $pscore;
332 $lowest = $newlowest;
336 arsort($res); // sort
343 function MostPopularNextMatch($dbi, &$res) {
345 // the return result is a two element array with 'hits'
346 // and 'pagename' as the keys
348 if (list($pagename, $hits) = each($res)) {
351 "pagename" => $pagename
360 function GetAllWikiPagenames($dbi) {
364 $namelist[$ctr] = $key = dbmfirstkey($dbi);
366 while ($key = dbmnextkey($dbi, $key)) {
368 $namelist[$ctr] = $key;
375 ////////////////////////////////////////////
376 // functionality for the wikilinks DBM file
378 // format of the 'wikilinks' DBM file :
380 // { tolinks => ( pagename => 1}, fromlinks => { pagename => 1 } }
382 // takes a page name, returns array of scored incoming and outgoing links
383 function GetWikiPageLinks($dbi, $pagename) {
385 $linkinfo = RetrievePage($dbi, $pagename, 'wikilinks');
386 if (is_array($linkinfo)) { // page exists?
387 $tolinks = $linkinfo['tolinks']; // outgoing links
388 $fromlinks = $linkinfo['fromlinks']; // incoming links
389 } else { // new page, but pages may already point to it
390 // create info for page
392 $fromlinks = array();
393 // look up pages that link to $pagename
394 $pname = dbmfirstkey($dbi['wikilinks']);
396 $linkinfo = RetrievePage($dbi, $pname, 'wikilinks');
397 if ($linkinfo['tolinks'][$pagename]) // $pname links to $pagename?
398 $fromlinks[$pname] = 1;
399 $pname = dbmnextkey($dbi['wikilinks'], $pname);
403 // get and sort the outgoing links
405 reset($tolinks); // look up scores for tolinks
406 while(list($tolink, $dummy) = each($tolinks)) {
407 $toPage = RetrievePage($dbi, $tolink, 'wikilinks');
408 if (is_array($toPage)) // link to internal page?
409 $outlinks[$tolink] = count($toPage['fromlinks']);
411 arsort($outlinks); // sort on score
412 $links['out'] = array();
413 reset($outlinks); // convert to right format
414 while(list($link, $score) = each($outlinks))
415 $links['out'][] = array($link, $score);
417 // get and sort the incoming links
419 reset($fromlinks); // look up scores for fromlinks
420 while(list($fromlink, $dummy) = each($fromlinks)) {
421 $fromPage = RetrievePage($dbi, $fromlink, 'wikilinks');
422 $inlinks[$fromlink] = count($fromPage['fromlinks']);
424 arsort($inlinks); // sort on score
425 $links['in'] = array();
426 reset($inlinks); // convert to right format
427 while(list($link, $score) = each($inlinks))
428 $links['in'][] = array($link, $score);
430 // sort all the incoming and outgoing links
431 $allLinks = $outlinks; // copy the outlinks
432 reset($inlinks); // add the inlinks
433 while(list($key, $value) = each($inlinks))
434 $allLinks[$key] = $value;
435 reset($allLinks); // lookup hits
436 while(list($key, $value) = each($allLinks))
437 $allLinks[$key] = (int) dbmfetch($dbi['hitcount'], $key);
438 arsort($allLinks); // sort on hits
439 $links['popular'] = array();
440 reset($allLinks); // convert to right format
441 while(list($link, $hits) = each($allLinks))
442 $links['popular'][] = array($link, $hits);
448 // takes page name, list of links it contains
449 // the $linklist is an array where the keys are the page names
450 function SetWikiPageLinks($dbi, $pagename, $linklist) {
454 // Phase 1: fetch the relevant pairs from 'wikilinks' into $cache
455 // ---------------------------------------------------------------
457 // first the info for $pagename
458 $linkinfo = RetrievePage($dbi, $pagename, 'wikilinks');
459 if (is_array($linkinfo)) // page exists?
460 $cache[$pagename] = $linkinfo;
462 // create info for page
463 $cache[$pagename] = array( 'fromlinks' => array(),
466 // look up pages that link to $pagename
467 $pname = dbmfirstkey($dbi['wikilinks']);
469 $linkinfo = RetrievePage($dbi, $pname, 'wikilinks');
470 if ($linkinfo['tolinks'][$pagename])
471 $cache[$pagename]['fromlinks'][$pname] = 1;
472 $pname = dbmnextkey($dbi['wikilinks'], $pname);
476 // then the info for the pages that $pagename used to point to
477 $oldTolinks = $cache[$pagename]['tolinks'];
479 while (list($link, $dummy) = each($oldTolinks)) {
480 $linkinfo = RetrievePage($dbi, $link, 'wikilinks');
481 if (is_array($linkinfo))
482 $cache[$link] = $linkinfo;
485 // finally the info for the pages that $pagename will point to
487 while (list($link, $dummy) = each($linklist)) {
488 $linkinfo = RetrievePage($dbi, $link, 'wikilinks');
489 if (is_array($linkinfo))
490 $cache[$link] = $linkinfo;
493 // Phase 2: delete the old links
494 // ---------------------------------------------------------------
496 // delete the old tolinks for $pagename
497 // $cache[$pagename]['tolinks'] = array();
498 // (overwritten anyway in Phase 3)
500 // remove $pagename from the fromlinks of pages in $oldTolinks
503 while (list($oldTolink, $dummy) = each($oldTolinks)) {
504 if ($cache[$oldTolink]) { // links to existing page?
505 $oldFromlinks = $cache[$oldTolink]['fromlinks'];
506 $cache[$oldTolink]['fromlinks'] = array(); // erase fromlinks
507 reset($oldFromlinks); // comp. new fr.links
508 while (list($fromlink, $dummy) = each($oldFromlinks)) {
509 if ($fromlink != $pagename)
510 $cache[$oldTolink]['fromlinks'][$fromlink] = 1;
515 // Phase 3: add the new links
516 // ---------------------------------------------------------------
518 // set the new tolinks for $pagename
519 $cache[$pagename]['tolinks'] = $linklist;
521 // add $pagename to the fromlinks of pages in $linklist
523 while (list($link, $dummy) = each($linklist)) {
524 if ($cache[$link]) // existing page?
525 $cache[$link]['fromlinks'][$pagename] = 1;
528 // Phase 4: write $cache back to 'wikilinks'
529 // ---------------------------------------------------------------
532 while (list($link,$fromAndTolinks) = each($cache))
533 InsertPage($dbi, $link, $fromAndTolinks, 'wikilinks');