4 require_once('lib/WikiDB/backend.php');
6 // FIXME:padding of data? Is it needed? dba_optimize() seems to do a good
7 // job at packing 'gdbm' (and 'db2') databases.
13 * Index: 'p' + pagename
14 * Values: latestversion . ':' . flags . ':' serialized hash of page meta data
15 * Currently flags = 1 if latest version has empty content.
18 * Index: 'v' + version:pagename
19 * Value: serialized hash of revision meta data, including:
20 * + quasi-meta-data %content
23 * index: 'o' + pagename
24 * value: serialized list of pages (names) which pagename links to.
25 * index: 'i' + pagename
26 * value: serialized list of pages which link to pagename
29 * Don't keep tables locked the whole time.
32 * - Yes - RecentChanges support. Lists of most recent edits (major, minor, either).
33 * 't' + mtime => 'a|i' + version+':'+pagename ('a': major, 'i': minor)
34 * Cost: Currently we have to get_all_pages and sort it by mtime.
35 * With a seperate t table we have to update this table on every version change.
36 * - No - list of pagenames for get_all_pages (very cheap: iterate page table)
37 * - Maybe - mostpopular list? 'h' + pagename => hits
39 * Separate hit table, so we don't have to update the whole page entry
40 * each time we get a hit. Maybe not so important though.
43 require_once('lib/DbaPartition.php');
45 class WikiDB_backend_dbaBase
46 extends WikiDB_backend
48 function WikiDB_backend_dbaBase (&$dba) {
50 // TODO: page and version tables should be in their own files, probably.
51 // We'll pack them all in one for now (testing).
52 // 2004-07-09 10:07:30 rurban: It's fast enough this way.
53 $this->_pagedb = new DbaPartition($dba, 'p');
54 $this->_versiondb = new DbaPartition($dba, 'v');
55 $linkdbpart = new DbaPartition($dba, 'l');
56 $this->_linkdb = new WikiDB_backend_dbaBase_linktable($linkdbpart);
57 $this->_dbdb = new DbaPartition($dba, 'd');
60 function sortable_columns() {
61 return array('pagename','mtime'/*,'author_id','author'*/);
69 $this->_db->optimize();
76 function rebuild($args=false) {
77 if (!empty($args['all'])) {
80 // rebuild backlink table
81 $this->_linkdb->rebuild();
85 function check($args=false) {
86 // cleanup v?Pagename UNKNOWN
88 $pagedb = &$this->_pagedb;
89 for ($page = $pagedb->firstkey(); $page !== false; $page = $pagedb->nextkey()) {
91 $errs[] = "empty page $page";
92 trigger_error("empty page $page deleted", E_USER_WARNING);
93 $this->purge_page($page);
96 if (!($data = $pagedb->get($page))) continue;
97 list($version,$flags,) = explode(':', $data, 3);
98 $vdata = $this->_versiondb->get($version.":".$page);
100 continue; // linkrelations
101 if (!is_string($vdata)
102 or $vdata == 'UNKNOWN
\0'
103 or !is_array(unserialize($vdata)))
105 $errs[] = "empty revision $version for $page";
106 trigger_error("empty revision $version for $page deleted", E_USER_WARNING);
107 $this->delete_versiondata($page, $version);
110 // check links per default
111 return array_merge($errs, $this->_linkdb->check());
114 function get_pagedata($pagename) {
115 $result = $this->_pagedb->get($pagename);
118 list(,,$packed) = explode(':', $result, 3);
119 $data = unserialize($packed);
123 function update_pagedata($pagename, $newdata) {
124 $result = $this->_pagedb->get($pagename);
126 list($latestversion,$flags,$data) = explode(':', $result, 3);
127 $data = unserialize($data);
130 $latestversion = $flags = 0;
134 foreach ($newdata as $key => $val) {
140 $this->_pagedb->set($pagename,
141 (int)$latestversion . ':'
146 function get_latest_version($pagename) {
147 return (int) $this->_pagedb->get($pagename);
150 function get_previous_version($pagename, $version) {
151 $versdb = &$this->_versiondb;
153 while (--$version > 0) {
154 if ($versdb->exists($version . ":$pagename"))
160 //check $want_content
161 function get_versiondata($pagename, $version, $want_content=false) {
162 $data = $this->_versiondb->get((int)$version . ":$pagename");
163 if (empty($data) or $data == 'UNKNOWN
\0') return false;
165 $vdata = unserialize($data);
166 if (DEBUG and empty($vdata)) { // requires ->check
167 trigger_error("Delete empty revision: $pagename: ".$data, E_USER_WARNING);
168 $this->delete_versiondata($pagename, (int)$version);
171 $vdata['%content'] = !empty($vdata['%content']);
177 * See ADODB for a better delete_page(), which can be undone and is seen in RecentChanges.
180 //function delete_page($pagename) { $this->purge_page($pagename); }
183 * Completely delete page from the database.
185 function purge_page($pagename) {
186 $pagedb = &$this->_pagedb;
187 $versdb = &$this->_versiondb;
189 $version = $this->get_latest_version($pagename);
190 while ($version > 0) {
191 $versdb->set($version-- . ":$pagename", false);
193 $pagedb->set($pagename, false);
195 $this->set_links($pagename, false);
198 function rename_page($pagename, $to) {
199 $result = $this->_pagedb->get($pagename);
201 list($version, $flags, $data) = explode(':', $result, 3);
202 $data = unserialize($data);
207 $links = $this->_linkdb->get_links($pagename, false, false);
208 $this->_pagedb->delete($pagename);
209 $data['pagename'] = $to;
210 $this->_pagedb->set($to,
214 // move over the latest version only
215 $pvdata = $this->get_versiondata($pagename, $version, true);
216 $this->set_versiondata($to, $version, $pvdata);
218 // update links and backlinks
219 $this->_linkdb->set_links($to, $links);
220 // better: update all back-/inlinks for all outlinks.
226 * Delete an old revision of a page.
228 function delete_versiondata($pagename, $version) {
229 $versdb = &$this->_versiondb;
231 $latest = $this->get_latest_version($pagename);
233 assert($version > 0);
234 assert($version <= $latest);
236 $versdb->set((int)$version . ":$pagename", false);
238 if ($version == $latest) {
239 $previous = $this->get_previous_version($pagename, $version);
241 $pvdata = $this->get_versiondata($pagename, $previous);
242 $is_empty = empty($pvdata['%content']);
246 $this->_update_latest_version($pagename, $previous, $is_empty);
251 * Create a new revision of a page.
253 function set_versiondata($pagename, $version, $data) {
254 $versdb = &$this->_versiondb;
255 assert(is_array($data) and !empty($data)); // mtime
256 $versdb->set((int)$version . ":$pagename", serialize($data));
257 if ($version > $this->get_latest_version($pagename))
258 $this->_update_latest_version($pagename, $version, empty($data['%content']));
261 function _update_latest_version($pagename, $latest, $flags) {
262 $pagedb = &$this->_pagedb;
264 $pdata = $pagedb->get($pagename);
266 list(,,$pagedata) = explode(':',$pdata,3);
268 $pagedata = serialize(array());
270 $pagedb->set($pagename, (int)$latest . ':' . (int)$flags . ":$pagedata");
273 function numPages($include_empty=false, $exclude='') {
274 $pagedb = &$this->_pagedb;
276 for ($page = $pagedb->firstkey(); $page!== false; $page = $pagedb->nextkey()) {
278 assert(!empty($page));
281 if ($exclude and in_array($page, $exclude)) continue;
282 if (!$include_empty) {
283 if (!($data = $pagedb->get($page))) continue;
284 list($latestversion,$flags,) = explode(':', $data, 3);
286 if ($latestversion == 0 || $flags != 0)
287 continue; // current content is empty
294 function get_all_pages($include_empty=false, $sortby='', $limit='', $exclude='') {
295 $pagedb = &$this->_pagedb;
297 if ($limit) // extract from,count from limit
298 list($from,$count) = $this->limit($limit);
299 for ($page = $pagedb->firstkey(); $page!== false; $page = $pagedb->nextkey()) {
301 assert(!empty($page));
304 if ($exclude and in_array($page, $exclude)) continue;
305 if ($limit and $from) {
307 if ($i < $from) continue;
309 if ($limit and count($pages) > $count) break;
310 if (!$include_empty) {
311 if (!($data = $pagedb->get($page))) continue;
312 list($latestversion,$flags,) = explode(':', $data, 3);
314 if ($latestversion == 0 || $flags != 0)
315 continue; // current content is empty
319 return new WikiDB_backend_dbaBase_pageiter($this, $pages,
320 array('sortby'=>$sortby/*,
321 'limit' =>$limit*/));
324 function set_links($pagename, $links) {
325 $this->_linkdb->set_links($pagename, $links);
328 function get_links($pagename, $reversed=true, $include_empty=false,
329 $sortby='', $limit='', $exclude='',
330 $want_relations=false)
332 // optimization: if no relation at all is found, mark it in the iterator.
333 $links = $this->_linkdb->get_links($pagename, $reversed, $want_relations);
335 return new WikiDB_backend_dbaBase_pageiter
337 array('sortby'=>$sortby,
340 'want_relations'=>$want_relations,
341 'found_relations' => $want_relations ? $this->_linkdb->found_relations : 0
348 * @return array of all linkrelations
349 * Faster than the dumb WikiDB method.
351 function list_relations($also_attributes=false, $only_attributes=false, $sorted=true) {
352 $linkdb = &$this->_linkdb;
353 $relations = array();
354 for ($link = $linkdb->_db->firstkey(); $link!== false; $link = $linkdb->_db->nextkey()) {
355 if ($link[0] != 'o') continue;
356 $links = $linkdb->_get_links('o', substr($link,1));
357 foreach ($links as $link) { // linkto => page, linkrelation => page
359 and $link['relation']
360 and !in_array($link['relation'], $relations))
362 $is_attribute = empty($link['linkto']); // a relation has both
364 if ($only_attributes or $also_attributes)
365 $relations[] = $link['relation'];
366 } elseif (!$only_attributes) {
367 $relations[] = $link['relation'];
380 * WikiDB_backend_dumb_LinkSearchIter searches over all pages and then all its links.
381 * Since there are less links than pages, and we easily get the pagename from the link key,
382 * we iterate here directly over the linkdb and check the pagematch there.
384 * @param $pages object A TextSearchQuery object for the pagename filter.
385 * @param $query object A SearchQuery object (Text or Numeric) for the linkvalues,
386 * linkto, linkfrom (=backlink), relation or attribute values.
387 * @param $linktype string One of the 4 linktypes "linkto", "linkfrom" (=backlink), "relation" or "attribute".
388 * Maybe also "relation+attribute" for the advanced search.
389 * @param $relation object A TextSearchQuery for the linkname or false.
390 * @param $options array Currently ignored. hash of sortby, limit, exclude.
391 * @return object A WikiDB_backend_iterator.
392 * @see WikiDB::linkSearch
394 function link_search( $pages, $query, $linktype, $relation=false, $options=array() ) {
395 $linkdb = &$this->_linkdb;
398 $want_relations = false;
399 if ($linktype == 'relation') {
400 $want_relations = true;
401 $field = 'linkrelation';
403 if ($linktype == 'attribute') {
404 $want_relations = true;
405 $field = 'attribute';
407 if ($linktype == 'linkfrom') {
411 for ($link = $linkdb->_db->firstkey(); $link!== false; $link = $linkdb->_db->nextkey()) {
412 $type = $reverse ? 'i' : 'o';
413 if ($link[0] != $type) continue;
414 $pagename = substr($link, 1);
415 if (!$pages->match($pagename)) continue;
416 if ($linktype == 'attribute') {
417 $page = $GLOBALS['request']->_dbi->getPage($pagename);
418 $attribs = $page->get('attributes');
420 /* Optimization on expressive searches:
421 for queries with multiple attributes.
422 Just take the defined placeholders from the query(ies)
423 if there are more attributes than query variables.
425 if ($query->getType() != 'text'
427 and ((count($vars = $query->getVars()) > 1)
428 or (count($attribs) > count($vars))))
430 // names must strictly match. no * allowed
431 if (!$query->can_match($attribs)) continue;
432 if (!($result = $query->match($attribs))) continue;
433 foreach ($result as $r) {
434 $r['pagename'] = $pagename;
438 // textsearch or simple value. no strict bind by name needed
439 foreach ($attribs as $attribute => $value) {
440 if ($relation and !$relation->match($attribute)) continue;
441 if (!$query->match($value)) continue;
442 $links[] = array('pagename' => $pagename,
443 'linkname' => $attribute,
444 'linkvalue' => $value);
450 // TODO: honor limits. this can get large.
451 if ($want_relations) {
452 // MAP linkrelation : pagename => thispagename : linkname : linkvalue
453 $_links = $linkdb->_get_links('o', $pagename);
454 foreach ($_links as $link) { // linkto => page, linkrelation => page
455 if (!isset($link['relation']) or !$link['relation']) continue;
456 if ($relation and !$relation->match($link['relation'])) continue;
457 if (!$query->match($link['linkto'])) continue;
458 $links[] = array('pagename' => $pagename,
459 'linkname' => $link['relation'],
460 'linkvalue' => $link['linkto']);
463 $_links = $linkdb->_get_links($reverse ? 'i' : 'o', $pagename);
464 foreach ($_links as $link) { // linkto => page
466 $link = $link['linkto'];
467 if (!$query->match($link)) continue;
468 $links[] = array('pagename' => $pagename,
470 'linkvalue' => $link);
475 $options['want_relations'] = true; // Iter hack to force return of the whole hash
476 return new WikiDB_backend_dbaBase_pageiter($this, $links, $options);
480 * Handle multi-searches for many relations and attributes in one expression.
481 * Bind all required attributes and relations per page together and pass it to one query.
482 * (is_a::city and population < 20000) and (*::city and area > 1000000)
483 * (is_a::city or linkto::CategoryCountry) and population < 20000 and area > 1000000
484 * Note that the 'linkto' and 'linkfrom' links are relations, containing an array.
486 * @param $pages object A TextSearchQuery object for the pagename filter.
487 * @param $query object A SemanticSearchQuery object for the links.
488 * @param $options array Currently ignored. hash of sortby, limit, exclude for the pagelist.
489 * @return object A WikiDB_backend_iterator.
490 * @see WikiDB::linkSearch
492 function relation_search( $pages, $query, $options=array() ) {
493 $linkdb = &$this->_linkdb;
495 // We need to detect which attributes and relation names we should look for. NYI
496 $want_attributes = $query->hasAttributes();
497 $want_relation = $query->hasRelations();
498 $linknames = $query->getLinkNames();
499 // create a hash for faster checks
500 $linkcheck = array();
501 foreach ($linknames as $l) $linkcheck[$l] = 1;
503 for ($link = $linkdb->_db->firstkey(); $link!== false; $link = $linkdb->_db->nextkey()) {
504 $type = $reverse ? 'i' : 'o';
505 if ($link[0] != $type) continue;
506 $pagename = substr($link, 1);
507 if (!$pages->match($pagename)) continue;
508 $pagelinks = array();
509 if ($want_attributes) {
510 $page = $GLOBALS['request']->_dbi->getPage($pagename);
511 $attribs = $page->get('attributes');
512 $pagelinks = $attribs;
514 if ($want_relations) {
515 // all links contain arrays of pagenames, just the attributes
516 // are guaranteed to be singular
517 if (isset($linkcheck['linkfrom'])) {
518 $pagelinks['linkfrom'] = $linkdb->_get_links('i', $pagename);
520 $outlinks = $linkdb->_get_links('o', $pagename);
521 $want_to = isset($linkcheck['linkto']);
522 foreach ($outlinks as $link) { // linkto => page, relation => page
524 if ((isset($link['relation'])) and $link['relation']
525 and isset($linkcheck[$link['relation']]))
526 $pagelinks[$link['relation']][] = $link['linkto'];
528 $pagelinks['linkto'][] = is_array($link) ? $link['linkto'] : $link;
531 if ($result = $query->match($pagelinks)) {
532 $links = array_merge($links, $result);
535 $options['want_relations'] = true; // Iter hack to force return of the whole hash
536 return new WikiDB_backend_dbaBase_pageiter($this, $links, $options);
540 function WikiDB_backend_dbaBase_sortby_pagename_ASC ($a, $b) {
541 return strcasecmp($a, $b);
543 function WikiDB_backend_dbaBase_sortby_pagename_DESC ($a, $b) {
544 return strcasecmp($b, $a);
546 function WikiDB_backend_dbaBase_sortby_mtime_ASC ($a, $b) {
547 return WikiDB_backend_dbaBase_sortby_num($a, $b, 'mtime');
549 function WikiDB_backend_dbaBase_sortby_mtime_DESC ($a, $b) {
550 return WikiDB_backend_dbaBase_sortby_num($b, $a, 'mtime');
553 function WikiDB_backend_dbaBase_sortby_hits_ASC ($a, $b) {
554 return WikiDB_backend_dbaBase_sortby_num($a, $b, 'hits');
556 function WikiDB_backend_dbaBase_sortby_hits_DESC ($a, $b) {
557 return WikiDB_backend_dbaBase_sortby_num($b, $a, 'hits');
560 function WikiDB_backend_dbaBase_sortby_num($aname, $bname, $field) {
562 $dbi = $request->getDbh();
563 // fields are stored in versiondata
564 $av = $dbi->_backend->get_latest_version($aname);
565 $bv = $dbi->_backend->get_latest_version($bname);
566 $a = $dbi->_backend->get_versiondata($aname, $av, false);
568 $b = $dbi->_backend->get_versiondata($bname, $bv, false);
569 if (!$b or !isset($b[$field])) return 0;
570 if (empty($a[$field])) return -1;
571 if ((!isset($a[$field]) and !isset($b[$field])) or ($a[$field] === $b[$field])) {
574 return ($a[$field] < $b[$field]) ? -1 : 1;
578 class WikiDB_backend_dbaBase_pageiter
579 extends WikiDB_backend_iterator
581 // fixed for linkrelations
582 function WikiDB_backend_dbaBase_pageiter(&$backend, &$pages, $options=false) {
583 $this->_backend = $backend;
584 $this->_options = $options;
586 if (!empty($options['sortby'])) {
587 $sortby = WikiDB_backend::sortby($options['sortby'], 'db', array('pagename','mtime'));
588 if ($sortby and !strstr($sortby, "hits ")) { // check for which column to sortby
589 usort($pages, 'WikiDB_backend_dbaBase_sortby_'.str_replace(' ','_',$sortby));
592 if (!empty($options['limit'])) {
593 list($offset,$limit) = WikiDB_backend::limit($options['limit']);
594 $pages = array_slice($pages, $offset, $limit);
596 $this->_pages = $pages;
598 $this->_pages = array();
601 // fixed for relations
603 if ( ! ($page = array_shift($this->_pages)) )
605 if (!empty($this->_options['want_relations'])) {
606 // $linkrelation = $page['linkrelation'];
607 $pagename = $page['pagename'];
608 if (!empty($this->_options['exclude']) and in_array($pagename, $this->_options['exclude']))
609 return $this->next();
612 if (!empty($this->_options['exclude']) and in_array($page, $this->_options['exclude']))
613 return $this->next();
614 return array('pagename' => $page);
618 reset($this->_pages);
621 $this->_pages = array();
625 class WikiDB_backend_dbaBase_linktable
627 function WikiDB_backend_dbaBase_linktable(&$dba) {
631 //TODO: try storing link lists as hashes rather than arrays.
632 // backlink deletion would be faster.
633 function get_links($page, $reversed=true, $want_relations=false) {
634 if ($want_relations) {
635 $this->found_relations = 0;
636 $links = $this->_get_links($reversed ? 'i' : 'o', $page);
637 $linksonly = array();
638 foreach ($links as $link) { // linkto => page, linkrelation => page
639 if (is_array($link) and isset($link['relation'])) {
640 if ($link['relation'])
641 $this->found_relations++;
642 $linksonly[] = array('pagename' => $link['linkto'],
643 'linkrelation' => $link['relation']);
644 } else { // empty relations are stripped
645 $linksonly[] = array('pagename' => $link['linkto']);
650 $links = $this->_get_links($reversed ? 'i' : 'o', $page);
651 $linksonly = array();
652 foreach ($links as $link) {
653 if (is_array($link)) {
654 $linksonly[] = $link['linkto'];
656 $linksonly[] = $link;
662 // fixed: relations ready
663 function set_links($page, $links) {
665 $oldlinks = $this->get_links($page, false, false);
667 if (!is_array($links)) {
668 assert(empty($links));
671 $this->_set_links('o', $page, $links);
673 /* Now for the backlink update we squash the linkto hashes into a simple array */
675 foreach ($links as $hash) {
676 if (!empty($hash['linkto']) and !in_array($hash['linkto'], $newlinks))
677 // for attributes it's empty
678 $newlinks[] = $hash['linkto'];
679 elseif (is_string($hash) and !in_array($hash, $newlinks))
682 //$newlinks = array_unique($newlinks);
688 $new = current($newlinks);
689 $old = current($oldlinks);
690 while ($new !== false || $old !== false) {
691 if ($old === false || ($new !== false && $new < $old)) {
692 // $new is a new link (not in $oldlinks).
693 $this->_add_backlink($new, $page);
694 $new = next($newlinks);
696 elseif ($new === false || $old < $new) {
697 // $old is a obsolete link (not in $newlinks).
698 $this->_delete_backlink($old, $page);
699 $old = next($oldlinks);
702 // Unchanged link (in both $newlist and $oldlinks).
703 assert($new == $old);
704 $new = next($newlinks);
705 $old = next($oldlinks);
711 * Rebuild the back-link index.
713 * This should never be needed, but if the database gets hosed for some reason,
714 * this should put it back into a consistent state.
716 * We assume the forward links in the our table are correct, and recalculate
717 * all the backlinks appropriately.
719 function rebuild () {
722 // Delete the backlink tables, make a list of lo.page keys.
724 for ($key = $db->firstkey(); $key; $key = $db->nextkey()) {
727 elseif ($key[0] == 'o')
730 trigger_error("Bad key in linktable: '$key'", E_USER_WARNING);
734 foreach ($okeys as $key) {
735 $page = substr($key,1);
736 $links = $this->_get_links('o', $page);
738 $this->set_links($page, $links);
745 // FIXME: check for sortedness and uniqueness in links lists.
747 for ($key = $db->firstkey(); $key; $key = $db->nextkey()) {
748 if (strlen($key) < 1 || ($key[0] != 'i' && $key[0] != 'o')) {
749 $errs[] = "Bad key '$key' in table";
752 $page = substr($key, 1);
753 if ($key[0] == 'o') {
755 foreach($this->_get_links('o', $page) as $link) {
756 $link = $link['linkto'];
757 if (!$this->_has_link('i', $link, $page))
758 $errs[] = "backlink entry missing for link '$page'->'$link'";
762 assert($key[0] == 'i');
764 foreach($this->_get_links('i', $page) as $link) {
765 if (!$this->_has_link('o', $link, $page))
766 $errs[] = "link entry missing for backlink '$page'<-'$link'";
770 //if ($errs) $this->rebuild();
771 return isset($errs) ? $errs : false;
774 /* TODO: Add another lrRelationName key for relations.
775 * lrRelationName: frompage => topage
778 function _add_relation($page, $linkedfrom) {
779 $relations = $this->_get_links('r', $page);
780 $backlinks[] = $linkedfrom;
782 $this->_set_links('i', $page, $backlinks);
785 function _add_backlink($page, $linkedfrom) {
786 $backlinks = $this->_get_links('i', $page);
787 $backlinks[] = $linkedfrom;
789 $this->_set_links('i', $page, $backlinks);
792 function _delete_backlink($page, $linkedfrom) {
793 $backlinks = $this->_get_links('i', $page);
794 foreach ($backlinks as $key => $backlink) {
795 if ($backlink == $linkedfrom)
796 unset($backlinks[$key]);
798 $this->_set_links('i', $page, $backlinks);
801 function _has_link($which, $page, $link) {
802 $links = $this->_get_links($which, $page);
803 // since links are always sorted, break if >
804 // TODO: binary search
805 foreach($links as $l) {
806 if ($l['linkto'] == $link)
808 if ($l['linkto'] > $link)
814 function _get_links($which, $page) {
815 $data = $this->_db->get($which . $page);
816 return $data ? unserialize($data) : array();
819 function _set_links($which, $page, &$links) {
820 $key = $which . $page;
822 $this->_db->set($key, serialize($links));
824 $this->_db->set($key, false);
828 // (c-file-style: "gnu")
833 // c-hanging-comment-ender-p: nil
834 // indent-tabs-mode: nil