3 rcs_id('$Id: dbmlib.php,v 1.7.2.2 2001-11-06 20:43:11 dairiki Exp $');
10 RetrievePage($dbi, $pagename, $pagestore)
11 InsertPage($dbi, $pagename, $pagehash)
12 SaveCopyToArchive($dbi, $pagename, $pagehash)
13 IsWikiPage($dbi, $pagename)
14 InitTitleSearch($dbi, $search)
15 TitleSearchNextMatch($dbi, $res)
16 InitFullSearch($dbi, $search)
17 FullSearchNextMatch($dbi, $res)
18 MakeBackLinkSearchRegexp($pagename)
19 InitBackLinkSearch($dbi, $pagename)
20 BackLinkSearchNextMatch($dbi, &$pos)
21 IncreaseHitCount($dbi, $pagename)
22 GetHitCount($dbi, $pagename)
23 InitMostPopular($dbi, $limit)
24 MostPopularNextMatch($dbi, $res)
28 // open a database and return the handle
29 // loop until we get a handle; php has its own
30 // locking mechanism, thank god.
31 // Suppress ugly error message with @.
33 function OpenDataBase($dbname) {
34 global $WikiDB; // hash of all the DBM file names
37 while (list($key, $file) = each($WikiDB)) {
38 while (($dbi[$key] = @dbmopen($file, "c")) < 1) {
40 if ($numattempts > MAX_DBM_ATTEMPTS) {
41 ExitWiki("Cannot open database '$key' : '$file', giving up.");
50 function CloseDataBase($dbi) {
52 while (list($dbmfile, $dbihandle) = each($dbi)) {
59 // take a serialized hash, return same padded out to
60 // the next largest number bytes divisible by 500. This
61 // is to save disk space in the long run, since DBM files
63 function PadSerializedData($data) {
64 // calculate the next largest number divisible by 500
65 $nextincr = 500 * ceil(strlen($data) / 500);
67 $data = sprintf("%-${nextincr}s", $data);
71 // strip trailing whitespace from the serialized data
73 function UnPadSerializedData($data) {
79 // Return hash of page + attributes or default
80 function RetrievePage($dbi, $pagename, $pagestore) {
81 if ($data = dbmfetch($dbi[$pagestore], $pagename)) {
82 // unserialize $data into a hash
83 $pagehash = unserialize(UnPadSerializedData($data));
84 $pagehash['pagename'] = $pagename;
92 // Either insert or replace a key/value (a page)
93 function InsertPage($dbi, $pagename, $pagehash, $pagestore='wiki') {
95 if ($pagestore == 'wiki') { // a bit of a hack
96 $linklist = ExtractWikiPageLinks($pagehash['content']);
97 SetWikiPageLinks($dbi, $pagename, $linklist);
100 $pagedata = PadSerializedData(serialize($pagehash));
102 if (dbminsert($dbi[$pagestore], $pagename, $pagedata)) {
103 if (dbmreplace($dbi[$pagestore], $pagename, $pagedata)) {
104 ExitWiki("Error inserting page '$pagename'");
110 // for archiving pages to a separate dbm
111 function SaveCopyToArchive($dbi, $pagename, $pagehash) {
112 global $ArchivePageStore;
114 $pagedata = PadSerializedData(serialize($pagehash));
116 if (dbminsert($dbi[$ArchivePageStore], $pagename, $pagedata)) {
117 if (dbmreplace($dbi['archive'], $pagename, $pagedata)) {
118 ExitWiki("Error storing '$pagename' into archive");
124 function IsWikiPage($dbi, $pagename) {
125 return dbmexists($dbi['wiki'], $pagename);
129 function IsInArchive($dbi, $pagename) {
130 return dbmexists($dbi['archive'], $pagename);
134 function RemovePage($dbi, $pagename) {
136 dbmdelete($dbi['wiki'], $pagename); // report error if this fails?
137 dbmdelete($dbi['archive'], $pagename); // no error if this fails
138 dbmdelete($dbi['hitcount'], $pagename); // no error if this fails
140 $linkinfo = RetrievePage($dbi, $pagename, 'wikilinks');
142 // remove page from fromlinks of pages it had links to
143 if (is_array($linkinfo)) { // page exists?
144 $tolinks = $linkinfo['tolinks'];
146 while (list($tolink, $dummy) = each($tolinks)) {
147 $tolinkinfo = RetrievePage($dbi, $tolink, 'wikilinks');
148 if (is_array($tolinkinfo)) { // page found?
149 $oldFromlinks = $tolinkinfo['fromlinks'];
150 $tolinkinfo['fromlinks'] = array(); // erase fromlinks
151 reset($oldFromlinks);
152 while (list($fromlink, $dummy) = each($oldFromlinks)) {
153 if ($fromlink != $pagename) // not to be erased?
154 $tolinkinfo['fromlinks'][$fromlink] = 1; // put link back
155 } // put link info back in DBM file
156 InsertPage($dbi, $tolink, $tolinkinfo, 'wikilinks');
160 // remove page itself
161 dbmdelete($dbi['wikilinks'], $pagename);
166 // setup for title-search
167 function InitTitleSearch($dbi, $search) {
168 $pos['search'] = $search;
169 $pos['key'] = dbmfirstkey($dbi['wiki']);
175 // iterating through database
176 function TitleSearchNextMatch($dbi, &$pos) {
177 while ($pos['key']) {
179 $pos['key'] = dbmnextkey($dbi['wiki'], $pos['key']);
181 if (eregi($pos['search'], $page)) {
189 // setup for full-text search
190 function InitFullSearch($dbi, $search) {
191 return InitTitleSearch($dbi, $search);
195 //iterating through database
196 function FullSearchNextMatch($dbi, &$pos) {
197 while ($pos['key']) {
199 $pos['key'] = dbmnextkey($dbi['wiki'], $pos['key']);
201 $pagedata = dbmfetch($dbi['wiki'], $key);
202 // test the serialized data
203 if (eregi($pos['search'], $pagedata)) {
204 $page['pagename'] = $key;
205 $pagedata = unserialize(UnPadSerializedData($pagedata));
206 $page['content'] = $pagedata['content'];
214 ////////////////////////
215 // new database features
217 // Compute PCRE suitable for searching for links to the given page.
218 function MakeBackLinkSearchRegexp($pagename) {
219 global $WikiNameRegexp;
221 // Note that in (at least some) PHP 3.x's, preg_quote only takes
222 // (at most) one argument. Also it doesn't quote '/'s.
223 // It does quote '='s, so we'll use that for the delimeter.
224 $quoted_pagename = preg_quote($pagename);
225 if (preg_match("/^$WikiNameRegexp\$/", $pagename)) {
226 # FIXME: This may need modification for non-standard (non-english) $WikiNameRegexp.
227 return "=(?<![A-Za-z0-9!])$quoted_pagename(?![A-Za-z0-9])=";
230 // Note from author: Sorry. :-/
232 . '(?<!\[)\[(?!\[)' // Single, isolated '['
233 . '([^]|]*\|)?' // Optional stuff followed by '|'
234 . '\s*' // Optional space
235 . $quoted_pagename // Pagename
236 . '\s*\]=' ); // Optional space, followed by ']'
237 // FIXME: the above regexp is still not quite right.
238 // Consider the text: " [ [ test page ]". This is a link to a page
239 // named '[ test page'. The above regexp will recognize this
240 // as a link either to '[ test page' (good) or to 'test page' (wrong).
244 // setup for back-link search
245 function InitBackLinkSearch($dbi, $pagename) {
246 return InitTitleSearch($dbi, MakeBackLinkSearchRegexp($pagename));
249 // iterating through back-links
250 function BackLinkSearchNextMatch($dbi, &$pos) {
251 while ($pos['key']) {
253 $pos['key'] = dbmnextkey($dbi['wiki'], $pos['key']);
255 $rawdata = dbmfetch($dbi['wiki'], $page);
256 if ( ! preg_match($pos['search'], $rawdata))
259 $pagedata = unserialize(UnPadSerializedData($rawdata));
260 while (list($i, $line) = each($pagedata['content'])) {
261 if (preg_match($pos['search'], $line))
268 function IncreaseHitCount($dbi, $pagename) {
270 if (dbmexists($dbi['hitcount'], $pagename)) {
271 // increase the hit count
272 // echo "$pagename there, incrementing...<br>\n";
273 $count = dbmfetch($dbi['hitcount'], $pagename);
275 dbmreplace($dbi['hitcount'], $pagename, $count);
277 // add it, set the hit count to one
279 dbminsert($dbi['hitcount'], $pagename, $count);
284 function GetHitCount($dbi, $pagename) {
286 if (dbmexists($dbi['hitcount'], $pagename)) {
287 // increase the hit count
288 $count = dbmfetch($dbi['hitcount'], $pagename);
296 function InitMostPopular($dbi, $limit) {
297 // iterate through the whole dbm file for hit counts
298 // sort the results highest to lowest, and return
301 // Because sorting all the pages may be a lot of work
302 // we only get the top $limit. A page is only added if it's score is
303 // higher than the lowest score in the list. If the list is full then
304 // one of the pages with the lowest scores is removed.
306 $pagename = dbmfirstkey($dbi['hitcount']);
307 $score = dbmfetch($dbi['hitcount'], $pagename);
308 $res = array($pagename => (int) $score);
311 while ($pagename = dbmnextkey($dbi['hitcount'], $pagename)) {
312 $score = dbmfetch($dbi['hitcount'], $pagename);
313 if (count($res) < $limit) { // room left in $res?
314 if ($score < $lowest)
316 $res[$pagename] = (int) $score; // add page to $res
317 } elseif ($score > $lowest) {
318 $oldres = $res; // save old result
320 $removed = 0; // nothing removed yet
321 $newlowest = $score; // new lowest score
322 $res[$pagename] = (int) $score; // add page to $res
324 while(list($pname, $pscore) = each($oldres)) {
325 if (!$removed and ($pscore = $lowest))
326 $removed = 1; // don't copy this entry
328 $res[$pname] = (int) $pscore;
329 if ($pscore < $newlowest)
330 $newlowest = $pscore;
333 $lowest = $newlowest;
337 arsort($res); // sort
344 function MostPopularNextMatch($dbi, &$res) {
346 // the return result is a two element array with 'hits'
347 // and 'pagename' as the keys
349 if (list($pagename, $hits) = each($res)) {
352 "pagename" => $pagename
361 function GetAllWikiPagenames($dbi) {
365 $namelist[$ctr] = $key = dbmfirstkey($dbi);
367 while ($key = dbmnextkey($dbi, $key)) {
369 $namelist[$ctr] = $key;
376 ////////////////////////////////////////////
377 // functionality for the wikilinks DBM file
379 // format of the 'wikilinks' DBM file :
381 // { tolinks => ( pagename => 1}, fromlinks => { pagename => 1 } }
383 // takes a page name, returns array of scored incoming and outgoing links
384 function GetWikiPageLinks($dbi, $pagename) {
386 $linkinfo = RetrievePage($dbi, $pagename, 'wikilinks');
387 if (is_array($linkinfo)) { // page exists?
388 $tolinks = $linkinfo['tolinks']; // outgoing links
389 $fromlinks = $linkinfo['fromlinks']; // incoming links
390 } else { // new page, but pages may already point to it
391 // create info for page
393 $fromlinks = array();
394 // look up pages that link to $pagename
395 $pname = dbmfirstkey($dbi['wikilinks']);
397 $linkinfo = RetrievePage($dbi, $pname, 'wikilinks');
398 if ($linkinfo['tolinks'][$pagename]) // $pname links to $pagename?
399 $fromlinks[$pname] = 1;
400 $pname = dbmnextkey($dbi['wikilinks'], $pname);
404 // get and sort the outgoing links
406 reset($tolinks); // look up scores for tolinks
407 while(list($tolink, $dummy) = each($tolinks)) {
408 $toPage = RetrievePage($dbi, $tolink, 'wikilinks');
409 if (is_array($toPage)) // link to internal page?
410 $outlinks[$tolink] = count($toPage['fromlinks']);
412 arsort($outlinks); // sort on score
413 $links['out'] = array();
414 reset($outlinks); // convert to right format
415 while(list($link, $score) = each($outlinks))
416 $links['out'][] = array($link, $score);
418 // get and sort the incoming links
420 reset($fromlinks); // look up scores for fromlinks
421 while(list($fromlink, $dummy) = each($fromlinks)) {
422 $fromPage = RetrievePage($dbi, $fromlink, 'wikilinks');
423 $inlinks[$fromlink] = count($fromPage['fromlinks']);
425 arsort($inlinks); // sort on score
426 $links['in'] = array();
427 reset($inlinks); // convert to right format
428 while(list($link, $score) = each($inlinks))
429 $links['in'][] = array($link, $score);
431 // sort all the incoming and outgoing links
432 $allLinks = $outlinks; // copy the outlinks
433 reset($inlinks); // add the inlinks
434 while(list($key, $value) = each($inlinks))
435 $allLinks[$key] = $value;
436 reset($allLinks); // lookup hits
437 while(list($key, $value) = each($allLinks))
438 $allLinks[$key] = (int) dbmfetch($dbi['hitcount'], $key);
439 arsort($allLinks); // sort on hits
440 $links['popular'] = array();
441 reset($allLinks); // convert to right format
442 while(list($link, $hits) = each($allLinks))
443 $links['popular'][] = array($link, $hits);
449 // takes page name, list of links it contains
450 // the $linklist is an array where the keys are the page names
451 function SetWikiPageLinks($dbi, $pagename, $linklist) {
455 // Phase 1: fetch the relevant pairs from 'wikilinks' into $cache
456 // ---------------------------------------------------------------
458 // first the info for $pagename
459 $linkinfo = RetrievePage($dbi, $pagename, 'wikilinks');
460 if (is_array($linkinfo)) // page exists?
461 $cache[$pagename] = $linkinfo;
463 // create info for page
464 $cache[$pagename] = array( 'fromlinks' => array(),
467 // look up pages that link to $pagename
468 $pname = dbmfirstkey($dbi['wikilinks']);
470 $linkinfo = RetrievePage($dbi, $pname, 'wikilinks');
471 if ($linkinfo['tolinks'][$pagename])
472 $cache[$pagename]['fromlinks'][$pname] = 1;
473 $pname = dbmnextkey($dbi['wikilinks'], $pname);
477 // then the info for the pages that $pagename used to point to
478 $oldTolinks = $cache[$pagename]['tolinks'];
480 while (list($link, $dummy) = each($oldTolinks)) {
481 $linkinfo = RetrievePage($dbi, $link, 'wikilinks');
482 if (is_array($linkinfo))
483 $cache[$link] = $linkinfo;
486 // finally the info for the pages that $pagename will point to
488 while (list($link, $dummy) = each($linklist)) {
489 $linkinfo = RetrievePage($dbi, $link, 'wikilinks');
490 if (is_array($linkinfo))
491 $cache[$link] = $linkinfo;
494 // Phase 2: delete the old links
495 // ---------------------------------------------------------------
497 // delete the old tolinks for $pagename
498 // $cache[$pagename]['tolinks'] = array();
499 // (overwritten anyway in Phase 3)
501 // remove $pagename from the fromlinks of pages in $oldTolinks
504 while (list($oldTolink, $dummy) = each($oldTolinks)) {
505 if ($cache[$oldTolink]) { // links to existing page?
506 $oldFromlinks = $cache[$oldTolink]['fromlinks'];
507 $cache[$oldTolink]['fromlinks'] = array(); // erase fromlinks
508 reset($oldFromlinks); // comp. new fr.links
509 while (list($fromlink, $dummy) = each($oldFromlinks)) {
510 if ($fromlink != $pagename)
511 $cache[$oldTolink]['fromlinks'][$fromlink] = 1;
516 // Phase 3: add the new links
517 // ---------------------------------------------------------------
519 // set the new tolinks for $pagename
520 $cache[$pagename]['tolinks'] = $linklist;
522 // add $pagename to the fromlinks of pages in $linklist
524 while (list($link, $dummy) = each($linklist)) {
525 if ($cache[$link]) // existing page?
526 $cache[$link]['fromlinks'][$pagename] = 1;
529 // Phase 4: write $cache back to 'wikilinks'
530 // ---------------------------------------------------------------
533 while (list($link,$fromAndTolinks) = each($cache))
534 InsertPage($dbi, $link, $fromAndTolinks, 'wikilinks');