4 * Copyright 1999-2002,2004,2005,2007,2009 $ThePhpWikiProgrammingTeam
6 * This file is part of PhpWiki.
8 * PhpWiki is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * PhpWiki is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with PhpWiki; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 require_once 'lib/TextSearchQuery.php';
24 require_once 'lib/PageList.php';
27 * Case insensitive fulltext search
28 * Options: case_exact, regex, hilight
32 * Hooks to search in external documents: ExternalTextSearch
33 * Only uploaded: textfiles, PDF, HTML, DOC, XLS, ... or
34 * External apps: xapian-omages seems to be the better than lucene,
35 * lucene.net, swish, nakamazu, ...
37 * See http://sf.net/tracker/index.php?aid=927395&group_id=6121&atid=106121
38 * Wordaround to let the dead locks occur somewhat later:
39 * Increase the memory limit of PHP from 8 MB to 32 MB
40 * php.ini: memory_limit = 32 MB
42 class WikiPlugin_FullTextSearch
45 function getDescription()
47 return _("Search the content of all pages in this wiki.");
50 function getDefaultArguments()
52 // All PageList::supportedArgs, except 'pagename'
54 PageList::supportedArgs(), // paging and more.
57 'case_exact' => false,
59 'sortby' => '-hi_content',
61 'exclude' => false, // comma-separated list of glob
62 'quiet' => true)); // be less verbose
63 unset($args['pagename']);
69 * @param string $argstr
70 * @param WikiRequest $request
71 * @param string $basepage
72 * @return $this|bool|HtmlElement|PageList|XmlContent
74 function run($dbi, $argstr, &$request, $basepage)
77 $args = $this->getArgs($argstr, $request);
79 $hilight = $args['hilight'];
80 $case_exact = $args['case_exact'];
81 $regex = $args['regex'];
82 $sortby = $args['sortby'];
83 $noheader = $args['noheader'];
84 $exclude = $args['exclude'];
85 $quiet = $args['quiet'];
86 $limit = $args['limit'];
89 return HTML::p(array('class' => 'warning'),
90 _("You must enter a search term."));
93 $query = new TextSearchQuery($s, $case_exact, $regex);
94 $pages = $dbi->fullSearch($query, $sortby, $limit, $exclude);
95 $hilight_re = $hilight ? $query->getHighlightRegexp() : false;
98 if ($quiet) { // see how easy it is with PageList...
100 $args['listtype'] = 'dl';
101 $args['types'] = array(new _PageList_Column_content
102 ('rev:hi_content', _("Content"), "left", $s, $hilight_re));
103 $list = new PageList(false, $exclude, $args);
104 $list->setCaption(fmt("Full text search results for “%s”", $s));
105 while ($page = $pages->next()) {
106 $list->addPage($page);
111 // Todo: we should better define a new PageListDL class for dl/dt/dd lists
112 // But the new column types must have a callback then. (showhits)
113 // See e.g. WikiAdminSearchReplace for custom pagelist columns
115 if (!$limit or !is_int($limit))
117 // expand all page wildcards to a list of pages which should be ignored
119 $exclude = explodePageList($exclude);
121 while ($page = $pages->next() and (!$limit or ($count < $limit))) {
122 $name = $page->getName();
123 if ($exclude and in_array($name, $exclude)) continue;
125 $list->pushContent(HTML::dt(WikiLink($page)));
127 $list->pushContent($this->showhits($page, $hilight_re));
130 if ($limit and $count >= $limit) //todo: pager link to list of next matches
131 $list->pushContent(HTML::dd(fmt("only %d pages displayed", $limit)));
132 if (!$list->getContent())
133 $list->pushContent(HTML::dd(_("No matches")));
135 if (!empty($pages->stoplisted))
136 $list = HTML(HTML::p(fmt(_("Ignored stoplist words “%s”"),
137 join(', ', $pages->stoplisted))),
141 return HTML(HTML::p(fmt("Full text search results for “%s”", $s)),
146 * @param WikiDB_Page $page
147 * @param string $hilight_re
150 function showhits($page, $hilight_re)
152 $current = $page->getCurrentRevision();
153 $matches = preg_grep("/$hilight_re/i", $current->getContent());
155 foreach ($matches as $line) {
156 $line = $this->highlight_line($line, $hilight_re);
157 $html[] = HTML::dd(HTML::small(array('class' => 'search-context'),
163 function highlight_line($line, $hilight_re)
165 while (preg_match("/^(.*?)($hilight_re)/i", $line, $m)) {
166 $line = substr($line, strlen($m[0]));
167 $html[] = $m[1]; // prematch
168 $html[] = HTML::strong(array('class' => 'search-term'), $m[2]); // match
170 $html[] = $line; // postmatch
176 * List of Links and link to ListLinks
178 class _PageList_Column_hilight extends _PageList_Column
182 function _PageList_Column_WantedPages_links(&$params)
184 $this->parentobj =& $params[3];
185 $this->_PageList_Column($params[0], $params[1], $params[2]);
188 function _getValue(&$page, $revision_handle)
190 $pagename = $page->getName();
191 $count = count($this->parentobj->_wpagelist[$pagename]);
192 return LinkURL(WikiURL($page, array('action' => 'BackLinks'), false),
193 fmt("(%d Links)", $count));
201 // c-hanging-comment-ender-p: nil
202 // indent-tabs-mode: nil