2 rcs_id('$Id: FullTextSearch.php,v 1.27 2007-01-04 16:46:40 rurban Exp $');
4 Copyright 1999,2000,2001,2002,2004,2005 $ThePhpWikiProgrammingTeam
6 This file is part of PhpWiki.
8 PhpWiki is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 PhpWiki is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with PhpWiki; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 require_once('lib/TextSearchQuery.php');
24 require_once("lib/PageList.php");
27 * Case insensitive fulltext search
28 * Options: case_exact, regex, hilight
31 * TODO: Hooks to search in external documents: ExternalTextSearch
32 * Only uploaded: textfiles, PDF, HTML, DOC, XLS, ... or
33 * External apps: xapian-omages seems to be the best, over lucene.net,
34 * swish, nakamazu, ...
36 * See http://sf.net/tracker/index.php?aid=927395&group_id=6121&atid=106121
37 * Wordaround to let the dead locks occur somewhat later:
38 * increased the memory limit of PHP4 from 8 MB to 32 MB
39 * php.ini: memory_limit = 32 MB
41 class WikiPlugin_FullTextSearch
45 return _("FullTextSearch");
48 function getDescription() {
49 return _("Search the content of all pages in this wiki.");
52 function getVersion() {
53 return preg_replace("/[Revision: $]/", '',
54 "\$Revision: 1.27 $");
57 function getDefaultArguments() {
60 PageList::supportedArgs(), // paging and more.
63 'case_exact' => false,
66 'exclude' => false, //comma-seperated list of glob
68 'quiet' => false)); // be less verbose
71 function run($dbi, $argstr, &$request, $basepage) {
73 $args = $this->getArgs($argstr, $request);
74 if (empty($args['s']))
78 $query = new TextSearchQuery($s, $case_exact, $regex);
79 $pages = $dbi->fullSearch($query, $sortby, $limit, $exclude);
81 $hilight_re = $hilight ? $query->getHighlightRegexp() : false;
84 if ($quiet) { // see how easy it is with PageList...
85 $list = new PageList(false,$exclude,$args);
86 while ($page = $pages->next() and (!$limit or ($count < $limit))) {
87 $list->addPage( $page );
92 // Todo: we should better define a new PageListDL class for dl/dt/dd lists
93 // But the new column types must have a callback then. (showhits)
94 // See e.g. WikiAdminSearchReplace for custom pagelist columns
96 if (!$limit or !is_int($limit))
98 // expand all page wildcards to a list of pages which should be ignored
99 if ($exclude) $exclude = explodePageList($exclude);
100 while ($page = $pages->next() and (!$limit or ($count < $limit))) {
101 $name = $page->getName();
102 if ($exclude and in_array($name,$exclude)) continue;
104 $list->pushContent(HTML::dt(WikiLink($name)));
106 $list->pushContent($this->showhits($page, $hilight_re));
109 if ($limit and $count >= $limit) //todo: pager link to list of next matches
110 $list->pushContent(HTML::dd(fmt("only %d pages displayed",$limit)));
111 if (!$list->getContent())
112 $list->pushContent(HTML::dd(_("<no matches>")));
114 if (!empty($pages->stoplisted))
115 $list = HTML(HTML::p(fmt(_("Ignored stoplist words '%s'"),
116 join(', ', $pages->stoplisted))),
120 return HTML(HTML::p(fmt("Full text search results for '%s'", $s)),
124 function showhits($page, $hilight_re) {
125 $current = $page->getCurrentRevision();
126 $matches = preg_grep("/$hilight_re/i", $current->getContent());
128 foreach ($matches as $line) {
129 $line = $this->highlight_line($line, $hilight_re);
130 $html[] = HTML::dd(HTML::small(array('class' => 'search-context'),
136 function highlight_line ($line, $hilight_re) {
137 while (preg_match("/^(.*?)($hilight_re)/i", $line, $m)) {
138 $line = substr($line, strlen($m[0]));
139 $html[] = $m[1]; // prematch
140 $html[] = HTML::strong(array('class' => 'search-term'), $m[2]); // match
142 $html[] = $line; // postmatch
147 // $Log: not supported by cvs2svn $
148 // Revision 1.26 2005/11/14 22:33:04 rurban
149 // print ignored stoplist words
151 // Revision 1.25 2005/09/11 14:55:05 rurban
152 // implement fulltext stoplist
154 // Revision 1.24 2004/11/26 18:39:02 rurban
155 // new regex search parser and SQL backends (90% complete, glob and pcre backends missing)
157 // Revision 1.23 2004/11/23 15:17:19 rurban
158 // better support for case_exact search (not caseexact for consistency),
159 // plugin args simplification:
160 // handle and explode exclude and pages argument in WikiPlugin::getArgs
161 // and exclude in advance (at the sql level if possible)
162 // handle sortby and limit from request override in WikiPlugin::getArgs
163 // ListSubpages: renamed pages to maxpages
165 // Revision 1.22 2004/05/28 11:01:58 rurban
166 // support to disable highlighting
167 // example: s=ReiniUrban&hilight=0&noheader=1
169 // Revision 1.21 2004/04/18 01:11:52 rurban
170 // more numeric pagename fixes.
171 // fixed action=upload with merge conflict warnings.
172 // charset changed from constant to global (dynamic utf-8 switching)
174 // Revision 1.20 2004/02/28 21:14:08 rurban
175 // generally more PHPDOC docs
176 // see http://xarch.tu-graz.ac.at/home/rurban/phpwiki/xref/
177 // fxied WikiUserNew pref handling: empty theme not stored, save only
178 // changed prefs, sql prefs improved, fixed password update,
179 // removed REPLACE sql (dangerous)
180 // moved gettext init after the locale was guessed
181 // + some minor changes
183 // Revision 1.19 2004/02/26 04:27:39 rurban
184 // wrong limit notification
186 // Revision 1.18 2004/02/26 04:24:03 rurban
187 // simplify quiet handling by using PageList
189 // Revision 1.17 2004/02/26 04:03:39 rurban
190 // added quiet, limit and exclude to FullTextSearch,
191 // fixed explodePageList with previously unloaded PageList
193 // Revision 1.16 2004/02/17 12:11:36 rurban
194 // added missing 4th basepage arg at plugin->run() to almost all plugins. This caused no harm so far, because it was silently dropped on normal usage. However on plugin internal ->run invocations it failed. (InterWikiSearch, IncludeSiteMap, ...)
196 // Revision 1.15 2003/01/18 21:41:01 carstenklapp
198 // Reformatting & tabs to spaces;
199 // Added copyleft, getVersion, getDescription, rcs_id.
206 // c-hanging-comment-ender-p: nil
207 // indent-tabs-mode: nil