2 rcs_id('$Id: backend.php,v 1.33 2007-06-07 21:35:04 rurban Exp $');
9 //:deleted (*) (Set if latest content is empty.)
17 %content (?should this be here?)
18 _supplanted : Time version ceased to be the current version
20 mtime (*) : Time of version edit.
23 author : nominal author
24 author_id : authenticated author
33 (types are scalars: strings, ints, bools)
37 * A WikiDB_backend handles the storage and retrieval of data for a WikiDB.
39 * A WikiDB_backend handles the storage and retrieval of data for a WikiDB.
40 * It does not have to be this way, of course, but the standard WikiDB uses
41 * a WikiDB_backend. (Other WikiDB's could be written which use some other
42 * method to access their underlying data store.)
44 * The interface outlined here seems to work well with both RDBM based
45 * and flat DBM/hash based methods of data storage.
47 * Though it contains some default implementation of certain methods,
48 * this is an abstract base class. It is expected that most effificient
49 * backends will override nearly all the methods in this class.
57 * Get page meta-data from database.
59 * @param $pagename string Page name.
61 * Returns a hash containing the page meta-data.
62 * Returns an empty array if there is no meta-data for the requested page.
63 * Keys which might be present in the hash are:
65 * <dt> locked <dd> If the page is locked.
66 * <dt> hits <dd> The page hit count.
67 * <dt> created <dd> Unix time of page creation. (FIXME: Deprecated: I
68 * don't think we need this...)
71 function get_pagedata($pagename) {
72 trigger_error("virtual", E_USER_ERROR);
76 * Update the page meta-data.
80 * Only meta-data whose keys are preset in $newdata is affected.
84 * $backend->update_pagedata($pagename, array('locked' => 1));
86 * will set the value of 'locked' to 1 for the specified page, but it
87 * will not affect the value of 'hits' (or whatever other meta-data
88 * may have been stored for the page.)
90 * To delete a particular piece of meta-data, set it's value to false.
92 * $backend->update_pagedata($pagename, array('locked' => false));
95 * @param $pagename string Page name.
96 * @param $newdata hash New meta-data.
98 function update_pagedata($pagename, $newdata) {
99 trigger_error("virtual", E_USER_ERROR);
104 * Get the current version number for a page.
106 * @param $pagename string Page name.
107 * @return int The latest version number for the page. Returns zero if
108 * no versions of a page exist.
110 function get_latest_version($pagename) {
111 trigger_error("virtual", E_USER_ERROR);
115 * Get preceding version number.
117 * @param $pagename string Page name.
118 * @param $version int Find version before this one.
119 * @return int The version number of the version in the database which
120 * immediately preceeds $version.
122 function get_previous_version($pagename, $version) {
123 trigger_error("virtual", E_USER_ERROR);
127 * Get revision meta-data and content.
129 * @param $pagename string Page name.
130 * @param $version integer Which version to get.
131 * @param $want_content boolean
132 * Indicates the caller really wants the page content. If this
133 * flag is not set, the backend is free to skip fetching of the
134 * page content (as that may be expensive). If the backend omits
135 * the content, the backend might still want to set the value of
136 * '%content' to the empty string if it knows there's no content.
138 * @return hash The version data, or false if specified version does not
141 * Some keys which might be present in the $versiondata hash are:
144 * <dd> This is a pseudo-meta-data element (since it's actually
145 * the page data, get it?) containing the page content.
146 * If the content was not fetched, this key may not be present.
148 * For description of other version meta-data see WikiDB_PageRevision::get().
149 * @see WikiDB_PageRevision::get
151 function get_versiondata($pagename, $version, $want_content = false) {
152 trigger_error("virtual", E_USER_ERROR);
156 * Delete page from the database with backup possibility.
157 * This should remove all links (from the named page) from
160 * @param $pagename string Page name.
161 * i.e save_page('') and DELETE nonempty id
162 * Can be undone and is seen in RecentChanges.
164 function delete_page($pagename) {
166 $user =& $GLOBALS['request']->_user;
167 $vdata = array('author' => $user->getId(),
168 'author_id' => $user->getAuthenticatedId(),
171 $this->lock(); // critical section:
172 $version = $this->get_latest_version($pagename);
173 $this->set_versiondata($pagename, $version+1, $vdata);
174 $this->set_links($pagename, false); // links are purged.
175 // SQL needs to invalidate the non_empty id
176 if (! WIKIDB_NOCACHE_MARKUP) {
177 // need the hits, perms and LOCKED, otherwise you can reset the perm
178 // by action=remove and re-create it with default perms
179 $pagedata = $this->get_pagedata($pagename);
180 unset($pagedata['_cached_html']);
181 $this->update_pagedata($pagename, $pagedata);
187 * Delete page (and all it's revisions) from the database.
190 function purge_page($pagename) {
191 trigger_error("virtual", E_USER_ERROR);
195 * Delete an old revision of a page.
197 * Note that one is never allowed to delete the most recent version,
198 * but that this requirement is enforced by WikiDB not by the backend.
200 * In fact, to be safe, backends should probably allow the deletion of
201 * the most recent version.
203 * @param $pagename string Page name.
204 * @param $version integer Version to delete.
206 function delete_versiondata($pagename, $version) {
207 trigger_error("virtual", E_USER_ERROR);
211 * Create a new page revision.
213 * If the given ($pagename,$version) is already in the database,
214 * this method completely overwrites any stored data for that version.
216 * @param $pagename string Page name.
217 * @param $version int New revisions content.
218 * @param $data hash New revision metadata.
220 * @see get_versiondata
222 function set_versiondata($pagename, $version, $data) {
223 trigger_error("virtual", E_USER_ERROR);
227 * Update page version meta-data.
229 * If the given ($pagename,$version) is already in the database,
230 * this method only changes those meta-data values whose keys are
231 * explicity listed in $newdata.
233 * @param $pagename string Page name.
234 * @param $version int New revisions content.
235 * @param $newdata hash New revision metadata.
236 * @see set_versiondata, get_versiondata
238 function update_versiondata($pagename, $version, $newdata) {
239 $data = $this->get_versiondata($pagename, $version, true);
244 foreach ($newdata as $key => $val) {
250 $this->set_versiondata($pagename, $version, $data);
254 * Set links for page.
256 * @param $pagename string Page name.
258 * @param $links array List of page(names) which page links to.
260 function set_links($pagename, $links) {
261 trigger_error("virtual", E_USER_ERROR);
265 * Find pages which link to or are linked from a page.
267 * @param $pagename string Page name.
268 * @param $reversed boolean True to get backlinks.
270 * FIXME: array or iterator?
271 * @return object A WikiDB_backend_iterator.
273 function get_links($pagename, $reversed, $include_empty=false,
274 $sortby='', $limit='', $exclude='') {
275 //FIXME: implement simple (but slow) link finder.
276 die("FIXME get_links");
280 * Get all revisions of a page.
282 * @param $pagename string The page name.
283 * @return object A WikiDB_backend_iterator.
285 function get_all_revisions($pagename) {
286 include_once('lib/WikiDB/backend/dumb/AllRevisionsIter.php');
287 return new WikiDB_backend_dumb_AllRevisionsIter($this, $pagename);
291 * Get all pages in the database.
293 * Pages should be returned in alphabetical order if that is
298 * @param $include_defaulted boolean
299 * If set, even pages with no content will be returned
300 * --- but still only if they have at least one revision (not
301 * counting the default revision 0) entered in the database.
303 * Normally pages whose current revision has empty content
304 * are not returned as these pages are considered to be
307 * @return object A WikiDB_backend_iterator.
309 function get_all_pages($include_defaulted, $orderby=false, $limit='', $exclude='') {
310 trigger_error("virtual", E_USER_ERROR);
314 * Title or full text search.
316 * Pages should be returned in alphabetical order if that is
321 * @param $search object A TextSearchQuery object describing the parsed query string,
322 * with efficient methods for SQL and PCRE match.
324 * @param $fullsearch boolean If true, a full text search is performed,
325 * otherwise a title search is performed.
327 * @return object A WikiDB_backend_iterator.
329 * @see WikiDB::titleSearch
331 function text_search($search, $fulltext=false, $sortby='',
332 $limit='', $exclude='')
334 // This is method implements a simple linear search
335 // through all the pages in the database.
337 // It is expected that most backends will overload
338 // this method with something more efficient.
339 include_once('lib/WikiDB/backend/dumb/TextSearchIter.php');
341 $pages = $this->get_all_pages(false, $sortby, false, $exclude);
342 return new WikiDB_backend_dumb_TextSearchIter($this, $pages, $search, $fulltext,
343 array('limit' => $limit,
344 'exclude' => $exclude));
351 * @param $pages object A TextSearchQuery object.
352 * @param $linkvalue object A TextSearchQuery object for the linkvalues
353 * (linkto, relation or backlinks or attribute values).
354 * @param $linktype string One of the 4 linktypes.
355 * @param $relation object A TextSearchQuery object or false.
356 * @param $options array Currently ignored. hash of sortby, limit, exclude.
357 * @return object A WikiDB_backend_iterator.
358 * @see WikiDB::linkSearch
360 function link_search( $pages, $linkvalue, $linktype, $relation=false, $options=array() ) {
361 include_once('lib/WikiDB/backend/dumb/LinkSearchIter.php');
362 $pageiter = $this->text_search($pages);
363 return new WikiDB_backend_dumb_LinkSearchIter($this, $pageiter, $linkvalue, $linktype, $relation, $options);
367 * Find pages with highest hit counts.
369 * Find the pages with the highest hit counts. The pages should
370 * be returned in reverse order by hit count.
373 * @param integer $limit No more than this many pages
374 * @return object A WikiDB_backend_iterator.
376 function most_popular($limit, $sortby='-hits') {
377 // This is method fetches all pages, then
378 // sorts them by hit count.
379 // (Not very efficient.)
381 // It is expected that most backends will overload
382 // method with something more efficient.
383 include_once('lib/WikiDB/backend/dumb/MostPopularIter.php');
384 $pages = $this->get_all_pages(false, $sortby, false);
385 return new WikiDB_backend_dumb_MostPopularIter($this, $pages, $limit);
389 * Find recent changes.
392 * @param $params hash See WikiDB::mostRecent for a description
393 * of parameters which can be included in this hash.
394 * @return object A WikiDB_backend_iterator.
395 * @see WikiDB::mostRecent
397 function most_recent($params) {
398 // This method is very inefficient and searches through
399 // all pages for the most recent changes.
401 // It is expected that most backends will overload
402 // method with something more efficient.
403 include_once('lib/WikiDB/backend/dumb/MostRecentIter.php');
404 $pages = $this->get_all_pages(true, '-mtime');
405 return new WikiDB_backend_dumb_MostRecentIter($this, $pages, $params);
408 function wanted_pages($exclude_from='', $exclude='', $sortby='', $limit='') {
409 include_once('lib/WikiDB/backend/dumb/WantedPagesIter.php');
410 $allpages = $this->get_all_pages(true,false,false,$exclude_from);
411 return new WikiDB_backend_dumb_WantedPagesIter($this, $allpages, $exclude, $sortby, $limit);
415 * Lock backend database.
417 * Calls may be nested.
419 * @param $write_lock boolean Unless this is set to false, a write lock
420 * is acquired, otherwise a read lock. If the backend doesn't support
421 * read locking, then it should make a write lock no matter which type
422 * of lock was requested.
424 * All backends <em>should</em> support write locking.
426 function lock($write_lock = true) {
430 * Unlock backend database.
432 * @param $force boolean Normally, the database is not unlocked until
433 * unlock() is called as many times as lock() has been. If $force is
434 * set to true, the the database is unconditionally unlocked.
436 function unlock($force = false) {
447 * Synchronize with filesystem.
449 * This should flush all unwritten data to the filesystem.
455 * Optimize the database.
457 function optimize() {
461 * Check database integrity.
463 * This should check the validity of the internal structure of the database.
464 * Errors should be reported via:
466 * trigger_error("Message goes here.", E_USER_WARNING);
469 * @return boolean True iff database is in a consistent state.
475 * Put the database into a consistent state
476 * by reparsing and restoring all pages.
478 * This should put the database into a consistent state.
479 * (I.e. rebuild indexes, etc...)
481 * @return boolean True iff successful.
485 $dbh = $request->getDbh();
486 $iter = $dbh->getAllPages(false);
487 while ($page = $iter->next()) {
488 $current = $page->getCurrentRevision(true);
489 $pagename = $page->getName();
490 $meta = $current->_data;
491 $version = $current->getVersion();
492 $content =& $meta['%content'];
493 $formatted = new TransformedText($page, $content, $current->getMetaData());
494 $type = $formatted->getType();
495 $meta['pagetype'] = $type->getName();
496 $links = $formatted->getWikiPageLinks(); // linkto => relation
497 $this->lock(array('version','page','recent','link','nonempty'));
498 $this->set_versiondata($pagename, $version, $meta);
499 $this->set_links($pagename, $links);
500 $this->unlock(array('version','page','recent','link','nonempty'));
504 function _parse_searchwords($search) {
505 $search = strtolower(trim($search));
507 return array(array(),array());
509 $words = preg_split('/\s+/', $search);
511 foreach ($words as $key => $word) {
512 if ($word[0] == '-' && $word != '-') {
513 $word = substr($word, 1);
514 $exclude[] = preg_quote($word);
518 return array($words, $exclude);
522 * Split the given limit parameter into offset,limit. (offset is optional. default: 0)
523 * Duplicate the PageList function here to avoid loading the whole PageList.php
525 * list($offset,$count) = $this->limit($args['limit']);
527 function limit($limit) {
528 if (strstr($limit, ','))
529 return split(',', $limit);
531 return array(0, $limit);
535 * Handle sortby requests for the DB iterator and table header links.
536 * Prefix the column with + or - like "+pagename","-mtime", ...
537 * supported actions: 'flip_order' "mtime" => "+mtime" => "-mtime" ...
538 * 'db' "-pagename" => "pagename DESC"
539 * In PageList all columns are sortable. (patch by DanFr)
540 * Here with the backend only some, the rest is delayed to PageList.
541 * (some kind of DumbIter)
542 * Duplicate the PageList function here to avoid loading the whole
543 * PageList.php, and it forces the backend specific sortable_columns()
545 function sortby ($column, $action, $sortable_columns=false) {
546 if (empty($column)) return '';
547 //support multiple comma-delimited sortby args: "+hits,+pagename"
548 if (strstr($column, ',')) {
550 foreach (explode(',', $column) as $col) {
552 $result[] = WikiDB_backend::sortby($col, $action);
554 $result[] = $this->sortby($col, $action);
556 return join(",",$result);
558 if (substr($column,0,1) == '+') {
559 $order = '+'; $column = substr($column,1);
560 } elseif (substr($column,0,1) == '-') {
561 $order = '-'; $column = substr($column,1);
563 // default order: +pagename, -mtime, -hits
565 if (in_array($column,array('mtime','hits')))
569 if ($action == 'flip_order') {
570 return ($order == '+' ? '-' : '+') . $column;
571 } elseif ($action == 'init') {
572 $this->_sortby[$column] = $order;
573 return $order . $column;
574 } elseif ($action == 'check') {
575 return (!empty($this->_sortby[$column]) or
576 ($GLOBALS['request']->getArg('sortby') and
577 strstr($GLOBALS['request']->getArg('sortby'),$column)));
578 } elseif ($action == 'db') {
579 // native sort possible?
580 if (!empty($this) and !$sortable_columns)
581 $sortable_columns = $this->sortable_columns();
582 if (in_array($column, $sortable_columns))
583 // asc or desc: +pagename, -pagename
584 return $column . ($order == '+' ? ' ASC' : ' DESC');
591 function sortable_columns() {
592 return array('pagename'/*,'mtime','author_id','author'*/);
595 // adds surrounding quotes
596 function quote ($s) { return "'".$s."'"; }
597 // no surrounding quotes because we know it's a string
598 function qstr ($s) { return $s; }
601 return in_array(DATABASE_TYPE, array('SQL','ADODB','PDO'));
604 function write_accesslog(&$entry) {
607 $log_tbl = $entry->_accesslog->logtable;
608 // duration problem: sprintf "%f" might use comma e.g. "100,201" in european locales
609 $dbh->query("INSERT INTO $log_tbl"
610 . " (time_stamp,remote_host,remote_user,request_method,request_line,request_uri,"
611 . "request_args,request_time,status,bytes_sent,referer,agent,request_duration)"
612 . " VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)",
614 // Problem: date formats are backend specific. Either use unixtime as %d (long),
615 // or the native timestamp format.
619 $entry->request_method,
622 $entry->request_args,
623 $entry->_ncsa_time($entry->time),
633 * Iterator returned by backend methods which (possibly) return
636 * FIXME: This might be two seperate classes: page_iter and version_iter.
637 * For the versions we have WikiDB_backend_dumb_AllRevisionsIter.
639 class WikiDB_backend_iterator
642 * Get the next record in the iterator set.
644 * This returns a hash. The hash may contain the following keys:
646 * <dt> pagename <dt> (string) the page name or linked page name on link iterators
647 * <dt> version <dt> (int) the version number
648 * <dt> pagedata <dt> (hash) page meta-data (as returned from backend::get_pagedata().)
649 * <dt> versiondata <dt> (hash) page meta-data (as returned from backend::get_versiondata().)
650 * <dt> linkrelation <dt> (string) the page naming the relation (e.g. isa:=page <=> isa)
652 * If this is a page iterator, it must contain the 'pagename' entry --- the others
655 * If this is a version iterator, the 'pagename', 'version', <strong>and</strong> 'versiondata'
656 * entries are mandatory. ('pagedata' is optional.)
658 * If this is a link iterator, the 'pagename' is mandatory, 'linkrelation' is optional.
661 trigger_error("virtual", E_USER_ERROR);
665 if (!empty($this->_pages))
666 return count($this->_pages);
672 if (!empty($this->_pages)) {
673 reset($this->_pages);
674 return $this->_pages;
677 while ($page = $this->next())
684 * Release resources held by this iterator.
691 * search baseclass, pcre-specific
693 class WikiDB_backend_search
695 function WikiDB_backend_search($search, &$dbh) {
697 $this->_case_exact = $search->_case_exact;
698 $this->_stoplist =& $search->_stoplist;
699 $this->stoplisted = array();
701 function _quote($word) {
702 return preg_quote($word, "/");
704 //TODO: use word anchors
705 function EXACT($word) { return "^".$this->_quote($word)."$"; }
706 function STARTS_WITH($word) { return "^".$this->_quote($word); }
707 function ENDS_WITH($word) { return $this->_quote($word)."$"; }
708 function WORD($word) { return $this->_quote($word); }
709 function REGEX($word) { return $word; }
711 function _pagename_match_clause($node) {
713 $word = $this->$method($node->word);
714 return "preg_match(\"/\".$word.\"/\"".($this->_case_exact ? "i":"").")";
716 /* Eliminate stoplist words.
717 * Keep a list of Stoplisted words to inform the poor user.
719 function isStoplisted ($node) {
720 // check only on WORD or EXACT fulltext search
721 if ($node->op != 'WORD' and $node->op != 'EXACT')
723 if (preg_match("/^".$this->_stoplist."$/i", $node->word)) {
724 array_push($this->stoplisted, $node->word);
729 function getStoplisted($word) {
730 return $this->stoplisted;
735 * search baseclass, sql-specific
737 class WikiDB_backend_search_sql extends WikiDB_backend_search
739 function _pagename_match_clause($node) {
740 // word already quoted by TextSearchQuery_node_word::_sql_quote()
741 $word = $node->sql();
742 if ($word == '%') // ALL shortcut
745 return ($this->_case_exact
746 ? "pagename LIKE '$word'"
747 : "LOWER(pagename) LIKE '$word'");
749 function _fulltext_match_clause($node) {
750 // force word-style %word% for fulltext search
751 $word = '%' . $node->_sql_quote($node->word) . '%';
752 // eliminate stoplist words
753 if ($this->isStoplisted($node))
754 return "1=1"; // and (pagename or 1) => and 1
756 return $this->_pagename_match_clause($node)
757 // probably convert this MATCH AGAINST or SUBSTR/POSITION without wildcards
758 . ($this->_case_exact ? " OR content LIKE '$word'"
759 : " OR LOWER(content) LIKE '$word'");
763 // $Log: not supported by cvs2svn $
764 // Revision 1.32 2007/02/17 14:14:41 rurban
765 // enforce accesslog types
767 // Revision 1.31 2007/01/28 22:49:55 rurban
768 // use backend specific SQL write_accesslog
770 // Revision 1.30 2007/01/02 13:20:26 rurban
771 // added link_search. Clarify API: sortby,limit and exclude are strings.
779 // c-hanging-comment-ender-p: nil
780 // indent-tabs-mode: nil