3 rcs_id('$Id: dbalib.php,v 1.2.2.5.2.2 2005-01-07 13:59:58 rurban Exp $');
10 PadSerializedData($data)
11 UnPadSerializedData($data)
12 RetrievePage($dbi, $pagename, $pagestore)
13 InsertPage($dbi, $pagename, $pagehash)
14 SaveCopyToArchive($dbi, $pagename, $pagehash)
15 IsWikiPage($dbi, $pagename)
16 IsInArchive($dbi, $pagename)
17 InitTitleSearch($dbi, $search)
18 TitleSearchNextMatch($dbi, &$pos)
19 InitFullSearch($dbi, $search)
20 FullSearchNextMatch($dbi, &$pos)
21 MakeBackLinkSearchRegexp($pagename)
22 InitBackLinkSearch($dbi, $pagename)
23 BackLinkSearchNextMatch($dbi, &$pos)
24 IncreaseHitCount($dbi, $pagename)
25 GetHitCount($dbi, $pagename)
26 InitMostPopular($dbi, $limit)
27 MostPopularNextMatch($dbi, &$res)
28 GetAllWikiPagenames($dbi)
32 // open a database and return the handle
33 // loop until we get a handle; php has its own
34 // locking mechanism, thank god.
35 // Suppress ugly error message with @.
37 function OpenDataBase($dbname) {
38 global $WikiDB; // hash of all the DBM file names
42 $php_version = substr( str_pad( preg_replace('/\D/','', PHP_VERSION), 3, '0'), 0, 3);
43 if ($php_version > "430") {
44 //PHP 4.3.x Windows lock bug workaround:
45 // http://bugs.php.net/bug.php?id=23975
46 if (substr(PHP_OS,0,3) == 'WIN') {
47 $mode .= "-"; // suppress locking, or
48 } elseif (DBM_FILE_TYPE != 'gdbm') { // gdbm does it internally
49 $mode .= "d"; // else use internal locking
53 while (list($key, $file) = each($WikiDB)) {
54 while (($dbi[$key] = @dba_open($file, $mode, DBM_FILE_TYPE)) < 1) {
55 if (file_exists($file))
57 $secs = 0.5 + ((double)rand(1,32767)/32767);
60 if ($timeout > MAX_DBM_ATTEMPTS) {
61 ExitWiki("Cannot open database '$key' : '$file', giving up.");
69 function CloseDataBase($dbi) {
71 while (list($dbafile, $dbihandle) = each($dbi)) {
72 dba_close($dbihandle);
78 // take a serialized hash, return same padded out to
79 // the next largest number bytes divisible by 500. This
80 // is to save disk space in the long run, since DBM files
82 function PadSerializedData($data) {
83 // calculate the next largest number divisible by 500
84 $nextincr = 500 * ceil(strlen($data) / 500);
86 $data = sprintf("%-${nextincr}s", $data);
90 // strip trailing whitespace from the serialized data
92 function UnPadSerializedData($data) {
98 // Return hash of page + attributes or default
99 function RetrievePage($dbi, $pagename, $pagestore) {
100 if ($data = dba_fetch($pagename, $dbi[$pagestore])) {
101 // unserialize $data into a hash
102 $pagehash = unserialize(UnPadSerializedData($data));
103 $pagehash['pagename'] = $pagename;
111 // Either insert or replace a key/value (a page)
112 function InsertPage($dbi, $pagename, $pagehash, $pagestore='wiki') {
114 if ($pagestore == 'wiki') { // a bit of a hack
115 $linklist = ExtractWikiPageLinks($pagehash['content']);
116 SetWikiPageLinks($dbi, $pagename, $linklist);
119 $pagedata = PadSerializedData(serialize($pagehash));
121 if (!dba_insert($pagename, $pagedata, $dbi[$pagestore])) {
122 if (!dba_replace($pagename, $pagedata, $dbi[$pagestore])) {
123 ExitWiki("Error inserting page '$pagename'");
129 // for archiving pages to a seperate dbm
130 function SaveCopyToArchive($dbi, $pagename, $pagehash) {
131 global $ArchivePageStore;
133 $pagedata = PadSerializedData(serialize($pagehash));
135 if (!dba_insert($pagename, $pagedata, $dbi[$ArchivePageStore])) {
136 if (!dba_replace($pagename, $pagedata, $dbi['archive'])) {
137 ExitWiki("Error storing '$pagename' into archive");
143 function IsWikiPage($dbi, $pagename) {
144 return dba_exists($pagename, $dbi['wiki']);
148 function IsInArchive($dbi, $pagename) {
149 return dba_exists($pagename, $dbi['archive']);
152 function RemovePage($dbi, $pagename) {
154 dba_delete($pagename, $dbi['wiki']); // report error if this fails?
155 dba_delete($pagename, $dbi['archive']); // no error if this fails
156 dba_delete($pagename, $dbi['hitcount']); // no error if this fails
158 $linkinfo = RetrievePage($dbi, $pagename, 'wikilinks');
160 // remove page from fromlinks of pages it had links to
161 if (is_array($linkinfo)) { // page exists?
162 $tolinks = $linkinfo['tolinks'];
164 while (list($tolink, $dummy) = each($tolinks)) {
165 $tolinkinfo = RetrievePage($dbi, $tolink, 'wikilinks');
166 if (is_array($tolinkinfo)) { // page found?
167 $oldFromlinks = $tolinkinfo['fromlinks'];
168 $tolinkinfo['fromlinks'] = array(); // erase fromlinks
169 reset($oldFromlinks);
170 while (list($fromlink, $dummy) = each($oldFromlinks)) {
171 if ($fromlink != $pagename) // not to be erased?
172 $tolinkinfo['fromlinks'][$fromlink] = 1; // put link back
173 } // put link info back in DBM file
174 InsertPage($dbi, $tolink, $tolinkinfo, 'wikilinks');
178 // remove page itself
179 dba_delete($pagename, $dbi['wikilinks']);
184 // setup for title-search
185 function InitTitleSearch($dbi, $search) {
186 $pos['search'] = '=' . preg_quote($search) . '=i';
187 $pos['key'] = dba_firstkey($dbi['wiki']);
193 // iterating through database
194 function TitleSearchNextMatch($dbi, &$pos) {
195 while ($pos['key']) {
197 $pos['key'] = dba_nextkey($dbi['wiki']);
199 if (preg_match($pos['search'], $page)) {
207 // setup for full-text search
208 function InitFullSearch($dbi, $search) {
209 return InitTitleSearch($dbi, $search);
213 //iterating through database
214 function FullSearchNextMatch($dbi, &$pos) {
215 while ($pos['key']) {
217 $pos['key'] = dba_nextkey($dbi['wiki']);
219 $pagedata = dba_fetch($key, $dbi['wiki']);
220 // test the serialized data
221 if (preg_match($pos['search'], $pagedata)) {
222 $page['pagename'] = $key;
223 $pagedata = unserialize(UnPadSerializedData($pagedata));
224 $page['content'] = $pagedata['content'];
232 ////////////////////////
233 // new database features
235 // Compute PCRE suitable for searching for links to the given page.
236 function MakeBackLinkSearchRegexp($pagename) {
237 global $WikiNameRegexp;
239 // Note that in (at least some) PHP 3.x's, preg_quote only takes
240 // (at most) one argument. Also it doesn't quote '/'s.
241 // It does quote '='s, so we'll use that for the delimeter.
242 $quoted_pagename = preg_quote($pagename);
243 if (preg_match("/^$WikiNameRegexp\$/", $pagename)) {
244 // FIXME: This may need modification for non-standard (non-english) $WikiNameRegexp.
245 return "/(?<![A-Za-z0-9!])$quoted_pagename(?![A-Za-z0-9])/";
248 // Note from author: Sorry. :-/
250 . '(?<!\[)\[(?!\[)' // Single, isolated '['
251 . '([^]|]*\|)?' // Optional stuff followed by '|'
252 . '\s*' // Optional space
253 . $quoted_pagename // Pagename
254 . '\s*\]/' ); // Optional space, followed by ']'
255 // FIXME: the above regexp is still not quite right.
256 // Consider the text: " [ [ test page ]". This is a link to a page
257 // named '[ test page'. The above regexp will recognize this
258 // as a link either to '[ test page' (good) or to 'test page' (wrong).
262 // setup for back-link search
263 function InitBackLinkSearch($dbi, $pagename) {
264 $pos['search'] = MakeBackLinkSearchRegexp($pagename);
265 $pos['key'] = dba_firstkey($dbi['wiki']);
270 // iterating through back-links
271 function BackLinkSearchNextMatch($dbi, &$pos) {
272 while ($pos['key']) {
274 $pos['key'] = dba_nextkey($dbi['wiki']);
276 $rawdata = dba_fetch($page, $dbi['wiki']);
277 if ( ! preg_match($pos['search'], $rawdata))
280 $pagedata = unserialize(UnPadSerializedData($rawdata));
281 while (list($i, $line) = each($pagedata['content'])) {
282 if (preg_match($pos['search'], $line))
289 function IncreaseHitCount($dbi, $pagename) {
291 if (dba_exists($pagename, $dbi['hitcount'])) {
292 // increase the hit count
293 // echo "$pagename there, incrementing...<br>\n";
294 $count = dba_fetch($pagename, $dbi['hitcount']);
296 dba_replace($pagename, $count, $dbi['hitcount']);
298 // add it, set the hit count to one
299 // echo "adding $pagename to hitcount...<br>\n";
301 dba_insert($pagename, $count, $dbi['hitcount']);
306 function GetHitCount($dbi, $pagename) {
308 if (dba_exists($pagename, $dbi['hitcount'])) {
309 // increase the hit count
310 $count = dba_fetch($pagename, $dbi['hitcount']);
318 function InitMostPopular($dbi, $limit) {
319 // iterate through the whole dba file for hit counts
320 // sort the results highest to lowest, and return
323 // Because sorting all the pages may be a lot of work
324 // we only get the top $limit. A page is only added if it's score is
325 // higher than the lowest score in the list. If the list is full then
326 // one of the pages with the lowest scores is removed.
328 $pagename = dba_firstkey($dbi['hitcount']);
329 $score = dba_fetch($pagename, $dbi['hitcount']);
330 $res = array($pagename => (int) $score);
333 while ($pagename = dba_nextkey($dbi['hitcount'])) {
334 $score = dba_fetch($pagename, $dbi['hitcount']);
335 if (count($res) < $limit) { // room left in $res?
336 if ($score < $lowest)
338 $res[$pagename] = (int) $score; // add page to $res
339 } elseif ($score > $lowest) {
340 $oldres = $res; // save old result
342 $removed = 0; // nothing removed yet
343 $newlowest = $score; // new lowest score
344 $res[$pagename] = (int) $score; // add page to $res
346 while(list($pname, $pscore) = each($oldres)) {
347 if (!$removed and ($pscore = $lowest))
348 $removed = 1; // don't copy this entry
350 $res[$pname] = (int) $pscore;
351 if ($pscore < $newlowest)
352 $newlowest = $pscore;
355 $lowest = $newlowest;
359 arsort($res); // sort
365 function MostPopularNextMatch($dbi, &$res) {
367 // the return result is a two element array with 'hits'
368 // and 'pagename' as the keys
370 if (count($res) == 0)
373 if (list($pagename, $hits) = each($res)) {
374 //echo "most popular next match called<br>\n";
375 //echo "got $pagename, $hits back<br>\n";
378 "pagename" => $pagename
380 // $dbm_mostpopular_cntr++;
387 function GetAllWikiPagenames($dbi) {
391 $namelist[$ctr] = $key = dba_firstkey($dbi['wiki']);
393 while ($key = dba_nextkey($dbi['wiki'])) {
395 $namelist[$ctr] = $key;
401 ////////////////////////////////////////////
402 // functionality for the wikilinks DBA file
404 // format of the 'wikilinks' DBA file :
406 // { tolinks => ( pagename => 1}, fromlinks => { pagename => 1 } }
408 // takes a page name, returns array of scored incoming and outgoing links
409 function GetWikiPageLinks($dbi, $pagename) {
411 $linkinfo = RetrievePage($dbi, $pagename, 'wikilinks');
412 if (is_array($linkinfo)) { // page exists?
413 $tolinks = $linkinfo['tolinks']; // outgoing links
414 $fromlinks = $linkinfo['fromlinks']; // incoming links
415 } else { // new page, but pages may already point to it
416 // create info for page
418 $fromlinks = array();
419 // look up pages that link to $pagename
420 $pname = dba_firstkey($dbi['wikilinks']);
421 while ($pname != false) {
422 $linkinfo = RetrievePage($dbi, $pname, 'wikilinks');
423 if ($linkinfo['tolinks'][$pagename]) // $pname links to $pagename?
424 $fromlinks[$pname] = 1;
425 $pname = dba_nextkey($dbi['wikilinks']);
429 // get and sort the outgoing links
431 reset($tolinks); // look up scores for tolinks
432 while(list($tolink, $dummy) = each($tolinks)) {
433 $toPage = RetrievePage($dbi, $tolink, 'wikilinks');
434 if (is_array($toPage)) // link to internal page?
435 $outlinks[$tolink] = count($toPage['fromlinks']);
437 arsort($outlinks); // sort on score
438 $links['out'] = array();
439 reset($outlinks); // convert to right format
440 while(list($link, $score) = each($outlinks))
441 $links['out'][] = array($link, $score);
443 // get and sort the incoming links
445 reset($fromlinks); // look up scores for fromlinks
446 while(list($fromlink, $dummy) = each($fromlinks)) {
447 $fromPage = RetrievePage($dbi, $fromlink, 'wikilinks');
448 $inlinks[$fromlink] = count($fromPage['fromlinks']);
450 arsort($inlinks); // sort on score
451 $links['in'] = array();
452 reset($inlinks); // convert to right format
453 while(list($link, $score) = each($inlinks))
454 $links['in'][] = array($link, $score);
456 // sort all the incoming and outgoing links
457 $allLinks = $outlinks; // copy the outlinks
458 reset($inlinks); // add the inlinks
459 while(list($key, $value) = each($inlinks))
460 $allLinks[$key] = $value;
461 reset($allLinks); // lookup hits
462 while(list($key, $value) = each($allLinks))
463 $allLinks[$key] = (int) dba_fetch($key, $dbi['hitcount']);
464 arsort($allLinks); // sort on hits
465 $links['popular'] = array();
466 reset($allLinks); // convert to right format
467 while(list($link, $hits) = each($allLinks))
468 $links['popular'][] = array($link, $hits);
473 // takes page name, list of links it contains
474 // the $linklist is an array where the keys are the page names
475 function SetWikiPageLinks($dbi, $pagename, $linklist) {
479 // Phase 1: fetch the relevant pairs from 'wikilinks' into $cache
480 // ---------------------------------------------------------------
482 // first the info for $pagename
483 $linkinfo = RetrievePage($dbi, $pagename, 'wikilinks');
484 if (is_array($linkinfo)) // page exists?
485 $cache[$pagename] = $linkinfo;
487 // create info for page
488 $cache[$pagename] = array( 'fromlinks' => array(),
491 // look up pages that link to $pagename
492 $pname = dba_firstkey($dbi['wikilinks']);
494 $linkinfo = RetrievePage($dbi, $pname, 'wikilinks');
495 if ($linkinfo['tolinks'][$pagename])
496 $cache[$pagename]['fromlinks'][$pname] = 1;
497 $pname = dba_nextkey($dbi['wikilinks']);
501 // then the info for the pages that $pagename used to point to
502 $oldTolinks = $cache[$pagename]['tolinks'];
504 while (list($link, $dummy) = each($oldTolinks)) {
505 $linkinfo = RetrievePage($dbi, $link, 'wikilinks');
506 if (is_array($linkinfo))
507 $cache[$link] = $linkinfo;
510 // finally the info for the pages that $pagename will point to
512 while (list($link, $dummy) = each($linklist)) {
513 $linkinfo = RetrievePage($dbi, $link, 'wikilinks');
514 if (is_array($linkinfo))
515 $cache[$link] = $linkinfo;
518 // Phase 2: delete the old links
519 // ---------------------------------------------------------------
521 // delete the old tolinks for $pagename
522 // $cache[$pagename]['tolinks'] = array();
523 // (overwritten anyway in Phase 3)
525 // remove $pagename from the fromlinks of pages in $oldTolinks
528 while (list($oldTolink, $dummy) = each($oldTolinks)) {
529 if ($cache[$oldTolink]) { // links to existing page?
530 $oldFromlinks = $cache[$oldTolink]['fromlinks'];
531 $cache[$oldTolink]['fromlinks'] = array(); // erase fromlinks
532 reset($oldFromlinks); // comp. new fr.links
533 while (list($fromlink, $dummy) = each($oldFromlinks)) {
534 if ($fromlink != $pagename)
535 $cache[$oldTolink]['fromlinks'][$fromlink] = 1;
540 // Phase 3: add the new links
541 // ---------------------------------------------------------------
543 // set the new tolinks for $pagename
544 $cache[$pagename]['tolinks'] = $linklist;
546 // add $pagename to the fromlinks of pages in $linklist
548 while (list($link, $dummy) = each($linklist)) {
549 if ($cache[$link]) // existing page?
550 $cache[$link]['fromlinks'][$pagename] = 1;
553 // Phase 4: write $cache back to 'wikilinks'
554 // ---------------------------------------------------------------
557 while (list($link,$fromAndTolinks) = each($cache))
558 InsertPage($dbi, $link, $fromAndTolinks, 'wikilinks');