2 rcs_id('$Id: dbaBase.php,v 1.31 2007-08-25 18:16:23 rurban Exp $');
4 require_once('lib/WikiDB/backend.php');
6 // FIXME:padding of data? Is it needed? dba_optimize() seems to do a good
7 // job at packing 'gdbm' (and 'db2') databases.
13 * Index: 'p' + pagename
14 * Values: latestversion . ':' . flags . ':' serialized hash of page meta data
15 * Currently flags = 1 if latest version has empty content.
18 * Index: 'v' + version:pagename
19 * Value: serialized hash of revision meta data, including:
20 * + quasi-meta-data %content
23 * index: 'o' + pagename
24 * value: serialized list of pages (names) which pagename links to.
25 * index: 'i' + pagename
26 * value: serialized list of pages which link to pagename
29 * Don't keep tables locked the whole time.
32 * - Yes - RecentChanges support. Lists of most recent edits (major, minor, either).
33 * 't' + mtime => 'a|i' + version+':'+pagename ('a': major, 'i': minor)
34 * Cost: Currently we have to get_all_pages and sort it by mtime.
35 * With a seperate t table we have to update this table on every version change.
36 * - No - list of pagenames for get_all_pages (very cheap: iterate page table)
37 * - Maybe - mostpopular list? 'h' + pagename => hits
39 * Separate hit table, so we don't have to update the whole page entry
40 * each time we get a hit. Maybe not so important though.
43 require_once('lib/DbaPartition.php');
45 class WikiDB_backend_dbaBase
46 extends WikiDB_backend
48 function WikiDB_backend_dbaBase (&$dba) {
50 // TODO: page and version tables should be in their own files, probably.
51 // We'll pack them all in one for now (testing).
52 // 2004-07-09 10:07:30 rurban: It's fast enough this way.
53 $this->_pagedb = new DbaPartition($dba, 'p');
54 $this->_versiondb = new DbaPartition($dba, 'v');
55 $linkdbpart = new DbaPartition($dba, 'l');
56 $this->_linkdb = new WikiDB_backend_dbaBase_linktable($linkdbpart);
57 $this->_dbdb = new DbaPartition($dba, 'd');
60 function sortable_columns() {
61 return array('pagename','mtime'/*,'author_id','author'*/);
69 $this->_db->optimize();
76 function rebuild($args=false) {
77 if (!empty($args['all']))
79 // rebuild backlink table
80 $this->_linkdb->rebuild();
84 function check($args=false) {
85 return $this->_linkdb->check();
88 function get_pagedata($pagename) {
89 $result = $this->_pagedb->get($pagename);
92 list(,,$packed) = explode(':', $result, 3);
93 $data = unserialize($packed);
97 function update_pagedata($pagename, $newdata) {
98 $result = $this->_pagedb->get($pagename);
100 list($latestversion,$flags,$data) = explode(':', $result, 3);
101 $data = unserialize($data);
104 $latestversion = $flags = 0;
108 foreach ($newdata as $key => $val) {
114 $this->_pagedb->set($pagename,
115 (int)$latestversion . ':'
120 function get_latest_version($pagename) {
121 return (int) $this->_pagedb->get($pagename);
124 function get_previous_version($pagename, $version) {
125 $versdb = &$this->_versiondb;
127 while (--$version > 0) {
128 if ($versdb->exists($version . ":$pagename"))
134 //check $want_content
135 function get_versiondata($pagename, $version, $want_content=false) {
136 $data = $this->_versiondb->get((int)$version . ":$pagename");
137 if (empty($data)) return false;
139 $data = unserialize($data);
141 $data['%content'] = !empty($data['%content']);
147 * See ADODB for a better delete_page(), which can be undone and is seen in RecentChanges.
150 //function delete_page($pagename) { $this->purge_page($pagename); }
153 * Completely delete page from the database.
155 function purge_page($pagename) {
156 $pagedb = &$this->_pagedb;
157 $versdb = &$this->_versiondb;
159 $version = $this->get_latest_version($pagename);
160 while ($version > 0) {
161 $versdb->set($version-- . ":$pagename", false);
163 $pagedb->set($pagename, false);
165 $this->set_links($pagename, false);
168 function rename_page($pagename, $to) {
169 $result = $this->_pagedb->get($pagename);
171 list($version, $flags, $data) = explode(':', $result, 3);
172 $data = unserialize($data);
177 $links = $this->_linkdb->get_links($pagename, false, false);
178 $this->_pagedb->delete($pagename);
179 $data['pagename'] = $to;
180 $this->_pagedb->set($to,
184 // move over the latest version only
185 $pvdata = $this->get_versiondata($pagename, $version, true);
186 $this->set_versiondata($to, $version, $pvdata);
188 // update links and backlinks
189 $this->_linkdb->set_links($to, $links);
190 // better: update all back-/inlinks for all outlinks.
196 * Delete an old revision of a page.
198 function delete_versiondata($pagename, $version) {
199 $versdb = &$this->_versiondb;
201 $latest = $this->get_latest_version($pagename);
203 assert($version > 0);
204 assert($version <= $latest);
206 $versdb->set((int)$version . ":$pagename", false);
208 if ($version == $latest) {
209 $previous = $this->get_previous_version($version);
211 $pvdata = $this->get_versiondata($pagename, $previous);
212 $is_empty = empty($pvdata['%content']);
216 $this->_update_latest_version($pagename, $previous, $is_empty);
221 * Create a new revision of a page.
223 function set_versiondata($pagename, $version, $data) {
224 $versdb = &$this->_versiondb;
226 $versdb->set((int)$version . ":$pagename", serialize($data));
227 if ($version > $this->get_latest_version($pagename))
228 $this->_update_latest_version($pagename, $version, empty($data['%content']));
231 function _update_latest_version($pagename, $latest, $flags) {
232 $pagedb = &$this->_pagedb;
234 $pdata = $pagedb->get($pagename);
236 list(,,$pagedata) = explode(':',$pdata,3);
238 $pagedata = serialize(array());
240 $pagedb->set($pagename, (int)$latest . ':' . (int)$flags . ":$pagedata");
243 function numPages($include_empty=false, $exclude='') {
244 $pagedb = &$this->_pagedb;
246 for ($page = $pagedb->firstkey(); $page!== false; $page = $pagedb->nextkey()) {
248 assert(!empty($page));
251 if ($exclude and in_array($page, $exclude)) continue;
252 if (!$include_empty) {
253 if (!($data = $pagedb->get($page))) continue;
254 list($latestversion,$flags,) = explode(':', $data, 3);
256 if ($latestversion == 0 || $flags != 0)
257 continue; // current content is empty
264 function get_all_pages($include_empty=false, $sortby='', $limit='', $exclude='') {
265 $pagedb = &$this->_pagedb;
267 if ($limit) // extract from,count from limit
268 list($from,$count) = $this->limit($limit);
269 for ($page = $pagedb->firstkey(); $page!== false; $page = $pagedb->nextkey()) {
271 assert(!empty($page));
274 if ($exclude and in_array($page, $exclude)) continue;
275 if ($limit and $from) {
277 if ($i < $from) continue;
279 if ($limit and count($pages) > $count) break;
280 if (!$include_empty) {
281 if (!($data = $pagedb->get($page))) continue;
282 list($latestversion,$flags,) = explode(':', $data, 3);
284 if ($latestversion == 0 || $flags != 0)
285 continue; // current content is empty
289 return new WikiDB_backend_dbaBase_pageiter($this, $pages,
290 array('sortby'=>$sortby/*,
291 'limit' =>$limit*/));
294 function set_links($pagename, $links) {
295 $this->_linkdb->set_links($pagename, $links);
298 function get_links($pagename, $reversed=true, $include_empty=false,
299 $sortby='', $limit='', $exclude='',
300 $want_relations=false)
302 // optimization: if no relation at all is found, mark it in the iterator.
303 $links = $this->_linkdb->get_links($pagename, $reversed, $want_relations);
305 return new WikiDB_backend_dbaBase_pageiter
307 array('sortby'=>$sortby,
310 'want_relations'=>$want_relations,
311 'found_relations' => $want_relations ? $this->_linkdb->found_relations : 0
318 * @return array of all linkrelations
319 * Faster than the dumb WikiDB method.
321 function list_relations($also_attributes=false, $only_attributes=false, $sorted=true) {
322 $linkdb = &$this->_linkdb;
323 $relations = array();
324 for ($link = $linkdb->_db->firstkey(); $link!== false; $link = $linkdb->_db->nextkey()) {
325 if ($link[0] != 'o') continue;
326 $links = $linkdb->_get_links('o', substr($link,1));
327 foreach ($links as $link) { // linkto => page, linkrelation => page
329 and $link['relation']
330 and !in_array($link['relation'], $relations))
332 $is_attribute = empty($link['linkto']); // a relation has both
334 if ($only_attributes or $also_attributes)
335 $relations[] = $link['relation'];
336 } elseif (!$only_attributes) {
337 $relations[] = $link['relation'];
350 * WikiDB_backend_dumb_LinkSearchIter searches over all pages and then all its links.
351 * Since there are less links than pages, and we easily get the pagename from the link key,
352 * we iterate here directly over the linkdb and check the pagematch there.
354 * @param $pages object A TextSearchQuery object for the pagename filter.
355 * @param $query object A SearchQuery object (Text or Numeric) for the linkvalues,
356 * linkto, linkfrom (=backlink), relation or attribute values.
357 * @param $linktype string One of the 4 linktypes "linkto", "linkfrom" (=backlink), "relation" or "attribute".
358 * Maybe also "relation+attribute" for the advanced search.
359 * @param $relation object A TextSearchQuery for the linkname or false.
360 * @param $options array Currently ignored. hash of sortby, limit, exclude.
361 * @return object A WikiDB_backend_iterator.
362 * @see WikiDB::linkSearch
364 function link_search( $pages, $query, $linktype, $relation=false, $options=array() ) {
365 $linkdb = &$this->_linkdb;
368 $want_relations = false;
369 if ($linktype == 'relation') {
370 $want_relations = true;
371 $field = 'linkrelation';
373 if ($linktype == 'attribute') {
374 $want_relations = true;
375 $field = 'attribute';
377 if ($linktype == 'linkfrom') {
381 for ($link = $linkdb->_db->firstkey(); $link!== false; $link = $linkdb->_db->nextkey()) {
382 $type = $reverse ? 'i' : 'o';
383 if ($link[0] != $type) continue;
384 $pagename = substr($link, 1);
385 if (!$pages->match($pagename)) continue;
386 if ($linktype == 'attribute') {
387 $page = $GLOBALS['request']->_dbi->getPage($pagename);
388 $attribs = $page->get('attributes');
390 /* Optimization on expressive searches:
391 for queries with multiple attributes.
392 Just take the defined placeholders from the query(ies)
393 if there are more attributes than query variables.
395 if ($query->getType() != 'text'
397 and ((count($vars = $query->getVars()) > 1)
398 or (count($attribs) > count($vars))))
400 // names must strictly match. no * allowed
401 if (!$query->can_match($attribs)) continue;
402 if (!($result = $query->match($attribs))) continue;
403 foreach ($result as $r) {
404 $r['pagename'] = $pagename;
408 // textsearch or simple value. no strict bind by name needed
409 foreach ($attribs as $attribute => $value) {
410 if ($relation and !$relation->match($attribute)) continue;
411 if (!$query->match($value)) continue;
412 $links[] = array('pagename' => $pagename,
413 'linkname' => $attribute,
414 'linkvalue' => $value);
420 // TODO: honor limits. this can get large.
421 if ($want_relations) {
422 // MAP linkrelation : pagename => thispagename : linkname : linkvalue
423 $_links = $linkdb->_get_links('o', $pagename);
424 foreach ($_links as $link) { // linkto => page, linkrelation => page
425 if (!isset($link['relation']) or !$link['relation']) continue;
426 if ($relation and !$relation->match($link['relation'])) continue;
427 if (!$query->match($link['linkto'])) continue;
428 $links[] = array('pagename' => $pagename,
429 'linkname' => $link['relation'],
430 'linkvalue' => $link['linkto']);
433 $_links = $linkdb->_get_links($reverse ? 'i' : 'o', $pagename);
434 foreach ($_links as $link) { // linkto => page
436 $link = $link['linkto'];
437 if (!$query->match($link)) continue;
438 $links[] = array('pagename' => $pagename,
440 'linkvalue' => $link);
445 $options['want_relations'] = true; // Iter hack to force return of the whole hash
446 return new WikiDB_backend_dbaBase_pageiter($this, $links, $options);
450 * Handle multi-searches for many relations and attributes in one expression.
451 * Bind all required attributes and relations per page together and pass it to one query.
452 * (is_a::city and population < 20000) and (*::city and area > 1000000)
453 * (is_a::city or linkto::CategoryCountry) and population < 20000 and area > 1000000
454 * Note that the 'linkto' and 'linkfrom' links are relations, containing an array.
456 * @param $pages object A TextSearchQuery object for the pagename filter.
457 * @param $query object A SemanticSearchQuery object for the links.
458 * @param $options array Currently ignored. hash of sortby, limit, exclude for the pagelist.
459 * @return object A WikiDB_backend_iterator.
460 * @see WikiDB::linkSearch
462 function relation_search( $pages, $query, $options=array() ) {
463 $linkdb = &$this->_linkdb;
465 // We need to detect which attributes and relation names we should look for. NYI
466 $want_attributes = $query->hasAttributes();
467 $want_relation = $query->hasRelations();
468 $linknames = $query->getLinkNames();
469 // create a hash for faster checks
470 $linkcheck = array();
471 foreach ($linknames as $l) $linkcheck[$l] = 1;
473 for ($link = $linkdb->_db->firstkey(); $link!== false; $link = $linkdb->_db->nextkey()) {
474 $type = $reverse ? 'i' : 'o';
475 if ($link[0] != $type) continue;
476 $pagename = substr($link, 1);
477 if (!$pages->match($pagename)) continue;
478 $pagelinks = array();
479 if ($want_attributes) {
480 $page = $GLOBALS['request']->_dbi->getPage($pagename);
481 $attribs = $page->get('attributes');
482 $pagelinks = $attribs;
484 if ($want_relations) {
485 // all links contain arrays of pagenames, just the attributes
486 // are guaranteed to be singular
487 if (isset($linkcheck['linkfrom'])) {
488 $pagelinks['linkfrom'] = $linkdb->_get_links('i', $pagename);
490 $outlinks = $linkdb->_get_links('o', $pagename);
491 $want_to = isset($linkcheck['linkto']);
492 foreach ($outlinks as $link) { // linkto => page, relation => page
494 if ((isset($link['relation'])) and $link['relation']
495 and isset($linkcheck[$link['relation']]))
496 $pagelinks[$link['relation']][] = $link['linkto'];
498 $pagelinks['linkto'][] = is_array($link) ? $link['linkto'] : $link;
501 if ($result = $query->match($pagelinks)) {
502 $links = array_merge($links, $result);
505 $options['want_relations'] = true; // Iter hack to force return of the whole hash
506 return new WikiDB_backend_dbaBase_pageiter($this, $links, $options);
510 function WikiDB_backend_dbaBase_sortby_pagename_ASC ($a, $b) {
511 return strcasecmp($a, $b);
513 function WikiDB_backend_dbaBase_sortby_pagename_DESC ($a, $b) {
514 return strcasecmp($b, $a);
516 function WikiDB_backend_dbaBase_sortby_mtime_ASC ($a, $b) {
517 return WikiDB_backend_dbaBase_sortby_num($a, $b, 'mtime');
519 function WikiDB_backend_dbaBase_sortby_mtime_DESC ($a, $b) {
520 return WikiDB_backend_dbaBase_sortby_num($b, $a, 'mtime');
523 function WikiDB_backend_dbaBase_sortby_hits_ASC ($a, $b) {
524 return WikiDB_backend_dbaBase_sortby_num($a, $b, 'hits');
526 function WikiDB_backend_dbaBase_sortby_hits_DESC ($a, $b) {
527 return WikiDB_backend_dbaBase_sortby_num($b, $a, 'hits');
530 function WikiDB_backend_dbaBase_sortby_num($aname, $bname, $field) {
532 $dbi = $request->getDbh();
533 // fields are stored in versiondata
534 $av = $dbi->_backend->get_latest_version($aname);
535 $bv = $dbi->_backend->get_latest_version($bname);
536 $a = $dbi->_backend->get_versiondata($aname, $av, false);
538 $b = $dbi->_backend->get_versiondata($bname, $bv, false);
539 if (!$b or !isset($b[$field])) return 0;
540 if (empty($a[$field])) return -1;
541 if ((!isset($a[$field]) and !isset($b[$field])) or ($a[$field] === $b[$field])) {
544 return ($a[$field] < $b[$field]) ? -1 : 1;
548 class WikiDB_backend_dbaBase_pageiter
549 extends WikiDB_backend_iterator
551 // fixed for linkrelations
552 function WikiDB_backend_dbaBase_pageiter(&$backend, &$pages, $options=false) {
553 $this->_backend = $backend;
554 $this->_options = $options;
556 if (!empty($options['sortby'])) {
557 $sortby = WikiDB_backend::sortby($options['sortby'], 'db', array('pagename','mtime'));
558 if ($sortby and !strstr($sortby, "hits ")) { // check for which column to sortby
559 usort($pages, 'WikiDB_backend_dbaBase_sortby_'.str_replace(' ','_',$sortby));
562 if (!empty($options['limit'])) {
563 list($offset,$limit) = WikiDB_backend::limit($options['limit']);
564 $pages = array_slice($pages, $offset, $limit);
566 $this->_pages = $pages;
568 $this->_pages = array();
571 // fixed for relations
573 if ( ! ($page = array_shift($this->_pages)) )
575 if (!empty($this->_options['want_relations'])) {
576 // $linkrelation = $page['linkrelation'];
577 $pagename = $page['pagename'];
578 if (!empty($this->_options['exclude']) and in_array($pagename, $this->_options['exclude']))
579 return $this->next();
582 if (!empty($this->_options['exclude']) and in_array($page, $this->_options['exclude']))
583 return $this->next();
584 return array('pagename' => $page);
588 $this->_pages = array();
592 class WikiDB_backend_dbaBase_linktable
594 function WikiDB_backend_dbaBase_linktable(&$dba) {
598 //TODO: try storing link lists as hashes rather than arrays.
599 // backlink deletion would be faster.
600 function get_links($page, $reversed=true, $want_relations=false) {
601 if ($want_relations) {
602 $this->found_relations = 0;
603 $links = $this->_get_links($reversed ? 'i' : 'o', $page);
604 $linksonly = array();
605 foreach ($links as $link) { // linkto => page, linkrelation => page
606 if (is_array($link) and isset($link['relation'])) {
607 if ($link['relation'])
608 $this->found_relations++;
609 $linksonly[] = array('pagename' => $link['linkto'],
610 'linkrelation' => $link['relation']);
611 } else { // empty relations are stripped
612 $linksonly[] = array('pagename' => $link['linkto']);
617 $links = $this->_get_links($reversed ? 'i' : 'o', $page);
618 $linksonly = array();
619 foreach ($links as $link) {
620 if (is_array($link)) {
621 $linksonly[] = $link['linkto'];
623 $linksonly[] = $link;
629 // fixed: relations ready
630 function set_links($page, $links) {
632 $oldlinks = $this->get_links($page, false, false);
634 if (!is_array($links)) {
635 assert(empty($links));
638 $this->_set_links('o', $page, $links);
640 /* Now for the backlink update we squash the linkto hashes into a simple array */
642 foreach ($links as $hash) {
643 if (!empty($hash['linkto']) and !in_array($hash['linkto'], $newlinks))
644 // for attributes it's empty
645 $newlinks[] = $hash['linkto'];
646 elseif (is_string($hash) and !in_array($hash, $newlinks))
649 //$newlinks = array_unique($newlinks);
655 $new = current($newlinks);
656 $old = current($oldlinks);
657 while ($new !== false || $old !== false) {
658 if ($old === false || ($new !== false && $new < $old)) {
659 // $new is a new link (not in $oldlinks).
660 $this->_add_backlink($new, $page);
661 $new = next($newlinks);
663 elseif ($new === false || $old < $new) {
664 // $old is a obsolete link (not in $newlinks).
665 $this->_delete_backlink($old, $page);
666 $old = next($oldlinks);
669 // Unchanged link (in both $newlist and $oldlinks).
670 assert($new == $old);
671 $new = next($newlinks);
672 $old = next($oldlinks);
678 * Rebuild the back-link index.
680 * This should never be needed, but if the database gets hosed for some reason,
681 * this should put it back into a consistent state.
683 * We assume the forward links in the our table are correct, and recalculate
684 * all the backlinks appropriately.
686 function rebuild () {
689 // Delete the backlink tables, make a list of lo.page keys.
691 for ($key = $db->firstkey(); $key; $key = $db->nextkey()) {
694 elseif ($key[0] == 'o')
697 trigger_error("Bad key in linktable: '$key'", E_USER_WARNING);
701 foreach ($okeys as $key) {
702 $page = substr($key,1);
703 $links = $this->_get_links('o', $page);
705 $this->set_links($page, $links);
712 // FIXME: check for sortedness and uniqueness in links lists.
714 for ($key = $db->firstkey(); $key; $key = $db->nextkey()) {
715 if (strlen($key) < 1 || ($key[0] != 'i' && $key[0] != 'o')) {
716 $errs[] = "Bad key '$key' in table";
719 $page = substr($key, 1);
720 if ($key[0] == 'o') {
722 foreach($this->_get_links('o', $page) as $link) {
723 $link = $link['linkto'];
724 if (!$this->_has_link('i', $link, $page))
725 $errs[] = "backlink entry missing for link '$page'->'$link'";
729 assert($key[0] == 'i');
731 foreach($this->_get_links('i', $page) as $link) {
732 if (!$this->_has_link('o', $link, $page))
733 $errs[] = "link entry missing for backlink '$page'<-'$link'";
737 //if ($errs) $this->rebuild();
738 return isset($errs) ? $errs : false;
741 /* TODO: Add another lrRelationName key for relations.
742 * lrRelationName: frompage => topage
745 function _add_relation($page, $linkedfrom) {
746 $relations = $this->_get_links('r', $page);
747 $backlinks[] = $linkedfrom;
749 $this->_set_links('i', $page, $backlinks);
752 function _add_backlink($page, $linkedfrom) {
753 $backlinks = $this->_get_links('i', $page);
754 $backlinks[] = $linkedfrom;
756 $this->_set_links('i', $page, $backlinks);
759 function _delete_backlink($page, $linkedfrom) {
760 $backlinks = $this->_get_links('i', $page);
761 foreach ($backlinks as $key => $backlink) {
762 if ($backlink == $linkedfrom)
763 unset($backlinks[$key]);
765 $this->_set_links('i', $page, $backlinks);
768 function _has_link($which, $page, $link) {
769 $links = $this->_get_links($which, $page);
770 // since links are always sorted, break if >
771 // TODO: binary search
772 foreach($links as $l) {
773 if ($l['linkto'] == $link)
775 if ($l['linkto'] > $link)
781 function _get_links($which, $page) {
782 $data = $this->_db->get($which . $page);
783 return $data ? unserialize($data) : array();
786 function _set_links($which, $page, &$links) {
787 $key = $which . $page;
789 $this->_db->set($key, serialize($links));
791 $this->_db->set($key, false);
795 // $Log: not supported by cvs2svn $
796 // Revision 1.30 2007/07/15 17:39:25 rurban
797 // update links at rename. optimize _has_link from O(n) to O(n/2) - binary search O(log n) in work
799 // Revision 1.29 2007/05/24 18:39:10 rurban
800 // limits for get_all_pages, improved WantedPages
802 // Revision 1.28 2007/01/03 21:26:01 rurban
803 // Fix dba searching for relations. Optimize link_search for strict attribute search. Add relation_search()
805 // Revision 1.27 2007/01/02 13:19:33 rurban
806 // faster list_relations method. new native link_search method. additions to rebuild() (still very slow), fix iterator options (for want_relations and exclude). Clarify API: sortby,limit and exclude are strings
808 // Revision 1.26 2006/12/22 00:27:37 rurban
812 // (c-file-style: "gnu")
817 // c-hanging-comment-ender-p: nil
818 // indent-tabs-mode: nil