4 * Copyright 2007 Reini Urban
5 * Copyright 2009 Marc-Etienne Vargenau, Alcatel-Lucent
7 * This file is part of PhpWiki.
9 * PhpWiki is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * PhpWiki is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License along
20 * with PhpWiki; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 require_once 'lib/PageList.php';
25 require_once 'lib/TextSearchQuery.php';
26 require_once 'lib/Units.php';
27 require_once 'lib/SemanticWeb.php';
30 * Search for relations/attributes and its values.
31 * page - relation::object. e.g list all cities: is_a::city => relation=is_a&s=city
32 * We search for both a relation and if the search is valid for attributes also,
33 * and OR combine the result.
35 * An attribute has just a value, which is a number, and which is for sure no pagename,
36 * and its value goes through some units unification. (not yet)
37 * We can also do numerical comparison and unit lifting with attributes.
38 * population > 1000000
39 * population > 1 million
42 * - The backends can already do simple AND/OR combination of multiple
43 * relations and attributes to search for. Just the UI not. TODO: implement the AND/OR buttons.
44 * population < 1 million AND area > 50 km2
45 * - Due to attribute internals a relation search with matching attribute names will also
46 * find those attribute names, but not the values. You must explicitly search for attributes then.
48 * The Advanced query can do a freeform query expression with multiple comparison and nesting.
49 * "is_a::city and population > 1.000.000 and population < 10.000.000"
50 * "(is_a::city or is_a::country) and population < 10.000.000"
52 * @author: Reini Urban
54 class WikiPlugin_SemanticSearch
59 return _("SemanticSearch");
62 function getDescription()
64 return _("Search relations and attributes.");
67 function getDefaultArguments()
71 PageList::supportedArgs(), // paging and more.
73 's' => "*", // linkvalue query string
74 'page' => "*", // which pages (glob allowed), default: all
75 'relation' => '', // linkname. which relations. default all
76 'attribute' => '', // linkname. which attributes. default all
77 'attr_op' => ':=', // a funny written way for equality for pure aesthetic pleasure
78 // "All attributes which have this value set"
81 'regex' => 'auto', // is different here.
82 // no word splitting, if no regex op is present, defaults to exact match
83 'noform' => false, // don't show form with results.
84 'noheader' => false, // no caption
85 'info' => false // valid: pagename,relation,linkto,attribute,value and all other pagelist columns
89 function showForm(&$dbi, &$request, $args)
92 $action = $request->getPostURL();
93 $hiddenfield = HiddenInputs($request->getArgs(), '',
94 array('action', 'page', 's', 'semsearch',
95 'relation', 'attribute'));
96 $pagefilter = HTML::input(array('name' => 'page',
97 'value' => $args['page'],
98 'title' => _("Search only in these pages. With autocompletion."),
99 'class' => 'dropdown',
100 'acdropdown' => 'true',
101 'autocomplete_complete' => 'true',
102 'autocomplete_matchsubstring' => 'false',
103 'autocomplete_list' => 'xmlrpc:wiki.titleSearch ^[S] 4'
105 $allrelations = $dbi->listRelations(false, false, true);
106 $svalues = empty($allrelations) ? "" : join("','", $allrelations);
107 $reldef = JavaScript("var semsearch_relations = new Array('" . $svalues . "')");
108 $relation = HTML::input(array('name' => 'relation',
109 'value' => $args['relation'],
110 'title' => _("Filter by this relation. With autocompletion."),
111 'class' => 'dropdown',
112 'style' => 'width:10em',
113 'acdropdown' => 'true',
114 'autocomplete_assoc' => 'false',
115 'autocomplete_complete' => 'true',
116 'autocomplete_matchsubstring' => 'true',
117 'autocomplete_list' => 'array:semsearch_relations'
119 $queryrel = HTML::input(array('name' => 's',
120 'value' => $args['s'],
121 'title' => _("Filter by this link. These are pagenames. With autocompletion."),
122 'class' => 'dropdown',
123 'acdropdown' => 'true',
124 'autocomplete_complete' => 'true',
125 'autocomplete_matchsubstring' => 'true',
126 'autocomplete_list' => 'xmlrpc:wiki.titleSearch ^[S] 4'
128 $relsubmit = Button('submit:semsearch[relations]', _("Relations"), false);
129 // just testing some dhtml... not yet done
130 $enhancements = HTML();
131 $nbsp = HTML::raw(' ');
132 $this_uri = $_SERVER['REQUEST_URI'] . '#';
133 $andbutton = new Button(_("AND"), $this_uri, 'wikiaction',
135 'onclick' => "addquery('rel', 'and')",
136 'title' => _("Add an AND query")));
137 $orbutton = new Button(_("OR"), $this_uri, 'wikiaction',
139 'onclick' => "addquery('rel', 'or')",
140 'title' => _("Add an OR query")));
142 $enhancements = HTML::span($andbutton, $nbsp, $orbutton);
143 $instructions = _("Search in pages for a relation with that value (a pagename).");
144 $form1 = HTML::form(array('action' => $action,
146 'accept-charset' => $GLOBALS['charset']),
148 $hiddenfield, HiddenInputs(array('attribute' => '')),
149 $instructions, HTML::br(),
151 (array('border' => 0, 'cellspacing' => 2),
152 HTML::colgroup(array('span' => 6)),
155 HTML::th('Pagefilter'),
156 HTML::th('Relation'),
163 HTML::td($pagefilter, _(": ")),
165 HTML::td(HTML::strong(HTML::tt(' :: '))),
167 HTML::td($nbsp, $relsubmit, $nbsp, $enhancements)))));
169 $allattrs = $dbi->listRelations(false, true, true);
170 if (empty($allrelations) and empty($allattrs)) // be nice to the dummy.
171 $this->_norelations_warning = 1;
172 $svalues = empty($allattrs) ? "" : join("','", $allattrs);
173 $attdef = JavaScript("var semsearch_attributes = new Array('" . $svalues . "')\n"
174 . "var semsearch_op = new Array('"
175 . join("','", $this->_supported_operators)
177 // TODO: We want some more tricks: Autofill the base unit of the selected
178 // attribute into the s area.
179 $attribute = HTML::input(array('name' => 'attribute',
180 'value' => $args['attribute'],
181 'title' => _("Filter by this attribute name. With autocompletion."),
182 'class' => 'dropdown',
183 'style' => 'width:10em',
184 'acdropdown' => 'true',
185 'autocomplete_complete' => 'true',
186 'autocomplete_matchsubstring' => 'true',
187 'autocomplete_assoc' => 'false',
188 'autocomplete_list' => 'array:semsearch_attributes'
189 /* 'autocomplete_onselect' => 'check_unit' */
191 $attr_op = HTML::input(array('name' => 'attr_op',
192 'value' => $args['attr_op'],
193 'title' => _("Comparison operator. With autocompletion."),
194 'class' => 'dropdown',
195 'style' => 'width:2em',
196 'acdropdown' => 'true',
197 'autocomplete_complete' => 'true',
198 'autocomplete_matchsubstring' => 'true',
199 'autocomplete_assoc' => 'false',
200 'autocomplete_list' => 'array:semsearch_op'
202 $queryatt = HTML::input(array('name' => 's',
203 'value' => $args['s'],
204 'title' => _("Filter by this numeric attribute value. With autocompletion."), //?
205 'class' => 'dropdown',
206 'acdropdown' => 'false',
207 'autocomplete_complete' => 'true',
208 'autocomplete_matchsubstring' => 'false',
209 'autocomplete_assoc' => 'false',
210 'autocomplete_list' => 'plugin:SemanticSearch page=' . $args['page'] . ' attribute=^[S] attr_op==~'
212 $andbutton = new Button(_("AND"), $this_uri, 'wikiaction',
214 'onclick' => "addquery('attr', 'and')",
215 'title' => _("Add an AND query")));
216 $orbutton = new Button(_("OR"), $this_uri, 'wikiaction',
218 'onclick' => "addquery('attr', 'or')",
219 'title' => _("Add an OR query")));
221 $enhancements = HTML::span($andbutton, $nbsp, $orbutton);
222 $attsubmit = Button('submit:semsearch[attributes]', _("Attributes"), false);
223 $instructions = HTML::span(_("Search in pages for an attribute with that numeric value."), "\n");
225 $instructions->pushContent
226 (HTML(" ", new Button(_("Advanced..."), _("SemanticSearchAdvanced"))));
227 $form2 = HTML::form(array('action' => $action,
229 'accept-charset' => $GLOBALS['charset']),
231 $hiddenfield, HiddenInputs(array('relation' => '')),
232 $instructions, HTML::br(),
234 (array('border' => 0, 'cellspacing' => 2),
235 HTML::colgroup(array('span' => 6)),
238 HTML::th('Pagefilter'),
239 HTML::th('Attribute'),
246 HTML::td($pagefilter, _(": ")),
247 HTML::td($attribute),
250 HTML::td($nbsp, $attsubmit, $nbsp, $enhancements)))));
252 return HTML($form1, $form2);
255 function regex_query($string, $case_exact, $regex)
257 if ($string != '*' and $regex == 'auto') {
258 if (strcspn($string, ".+*?^$\"") == strlen($string)) {
259 // performance hack: construct an exact query w/o parsing. pcre is fastest.
260 $q = new TextSearchQuery($string, $case_exact, 'pcre');
261 // and now override the fields
262 unset ($q->_stoplist);
263 $q->_regex = TSQ_REGEX_NONE;
265 $q->_tree = new TextSearchQuery_node_exact($string); // hardcode this string
267 $q->_tree = new TextSearchQuery_node_word($string);
269 //$string = "\"" . $string ."\"";
270 //$regex = 'none'; // EXACT or WORD match
273 return new TextSearchQuery($string, $case_exact, $regex);
276 function run($dbi, $argstr, &$request, $basepage)
280 $this->_supported_operators = array(':=', '<', '<=', '>', '>=', '!=', '==', '=~');
281 $this->_text_operators = array(':=', '==', '=~', '!=');
282 $args = $this->getArgs($argstr, $request);
283 if (empty($args['page']))
285 if (!isset($args['s'])) // it might be (integer) 0
287 $posted = $request->getArg("semsearch");
288 $form = $this->showForm($dbi, $request, $args);
289 if (isset($this->_norelations_warning))
291 (HTML::div(array('class' => 'warning'),
292 _("Warning:"), HTML::br(),
293 _("No relations nor attributes in the whole wikidb defined!")
295 , fmt("See %s", WikiLink(_("Help:SemanticRelations")))));
297 // for convenience and harmony we allow GET requests also.
298 if (!$request->isPost()) {
299 if ($relation or $attribute) // check for good GET request
302 return $form; // nobody called us, so just display our supadupa form
304 $pagequery = $this->regex_query($page, $args['case_exact'], $args['regex']);
305 // we might want to check for semsearch['relations'] and semsearch['attributes'] also
306 if (empty($relation) and empty($attribute)) {
307 // so we just clicked without selecting any relation.
308 // hmm. check which button we clicked, before we do the massive alltogether search.
309 if (isset($posted['relations']) and $posted['relations'])
311 elseif (isset($posted['attributes']) and $posted['attributes']) {
313 // here we have to check for invalid text operators. ignore it then
314 if (!in_array($attr_op, $this->_text_operators))
318 $searchtype = "Text";
319 if (!empty($relation)) {
320 $querydesc = $relation . "::" . $s;
321 $linkquery = $this->regex_query($s, $args['case_exact'], $args['regex']);
322 $relquery = $this->regex_query($relation, $args['case_exact'], $args['regex']);
323 $links = $dbi->linkSearch($pagequery, $linkquery, 'relation', $relquery);
324 $pagelist = new PageList($info, $exclude, $args);
325 $pagelist->_links = array();
326 while ($link = $links->next()) {
327 $pagelist->addPage($link['pagename']);
328 $pagelist->_links[] = $link;
330 // default (=empty info) wants all three. but we want to be able to override this.
331 // $pagelist->_columns_seen is the exploded info
332 if (!$info or ($info and isset($pagelist->_columns_seen['relation'])))
333 $pagelist->addColumnObject
334 (new _PageList_Column_SemanticSearch_relation('relation', _("Relation"), $pagelist));
335 if (!$args['info'] or ($args['info'] and isset($pagelist->_columns_seen['linkto'])))
336 $pagelist->addColumnObject
337 (new _PageList_Column_SemanticSearch_link('linkto', _("Link"), $pagelist));
339 // can we merge two different pagelist?
340 if (!empty($attribute)) {
341 $relquery = $this->regex_query($attribute, $args['case_exact'], $args['regex']);
342 if (!in_array($attr_op, $this->_supported_operators)) {
343 return HTML($form, $this->error(fmt("Illegal operator: %s",
344 HTML::tt($attr_op))));
346 $s_base = preg_replace("/,/", "", $s);
347 $units = new Units();
348 if (!is_numeric($s_base)) {
349 $s_base = $units->basevalue($s_base);
350 $is_numeric = is_numeric($s_base);
354 // check which type to search with:
355 // at first check if forced text matcher
356 if ($attr_op == '=~') {
357 if ($s == '*') $s = '.*'; // help the poor user. we need pcre syntax.
358 $linkquery = new TextSearchQuery("$s", $args['case_exact'], 'pcre');
359 $querydesc = "$attribute $attr_op $s";
360 } elseif ($is_numeric) { // do comparison with numbers
361 /* We want to search for multiple attributes also. linkSearch can do this.
362 * But we have to construct the query somehow. (that's why we try the AND OR dhtml)
363 * population < 1 million AND area > 50 km2
364 * Here we check only for one attribute per page.
365 * See SemanticSearchAdvanced for the full expression.
367 // it might not be the best idea to use '*' as variable to expand. hmm.
368 if ($attribute == '*') $attribute = '_star_';
369 $searchtype = "Numeric";
370 $query = $attribute . " " . $attr_op . " " . $s_base;
371 $linkquery = new SemanticAttributeSearchQuery($query, $attribute,
372 $units->baseunit($s));
373 if ($attribute == '_star_') $attribute = '*';
374 $querydesc = $attribute . " " . $attr_op . " " . $s;
376 // no number or unit: check other text matchers or '*' MATCH_ALL
377 } elseif (in_array($attr_op, $this->_text_operators)) {
378 if ($attr_op == '=~') {
379 if ($s == '*') $s = '.*'; // help the poor user. we need pcre syntax.
380 $linkquery = new TextSearchQuery("$s", $args['case_exact'], 'pcre');
382 $linkquery = $this->regex_query($s, $args['case_exact'], $args['regex']);
383 $querydesc = "$attribute $attr_op $s";
385 // should we fail or skip when the user clicks on Relations?
386 } elseif (isset($posted['relations']) and $posted['relations']) {
387 $linkquery = false; // skip
389 $querydesc = $attribute . " " . $attr_op . " " . $s;
390 return HTML($form, $this->error(fmt("Only text operators can be used with strings: %s",
391 HTML::tt($querydesc))));
395 $links = $dbi->linkSearch($pagequery, $linkquery, 'attribute', $relquery);
396 if (empty($relation)) {
397 $pagelist = new PageList($args['info'], $args['exclude'], $args);
398 $pagelist->_links = array();
400 while ($link = $links->next()) {
401 $pagelist->addPage($link['pagename']);
402 $pagelist->_links[] = $link;
404 // default (=empty info) wants all three. but we want to override this.
405 if (!$args['info'] or
406 ($args['info'] and isset($pagelist->_columns_seen['attribute']))
408 $pagelist->addColumnObject
409 (new _PageList_Column_SemanticSearch_relation('attribute',
410 _("Attribute"), $pagelist));
411 if (!$args['info'] or
412 ($args['info'] and isset($pagelist->_columns_seen['value']))
414 $pagelist->addColumnObject
415 (new _PageList_Column_SemanticSearch_link('value',
416 _("Value"), $pagelist));
419 if (!isset($pagelist)) {
420 $querydesc = _("<empty>");
421 $pagelist = new PageList();
424 // We put the form into the caption just to be able to return one pagelist object,
425 // and to still have the convenience form at the top. we could workaround this by
426 // putting the form as WikiFormRich into the actionpage. but thid doesnt look as
427 // nice as this here.
428 $pagelist->setCaption
429 ( // on mozilla the form doesn't fit into the caption very well.
430 HTML($noform ? '' : HTML($form, HTML::hr()),
431 fmt("Semantic %s Search Result for \"%s\" in pages \"%s\"",
432 $searchtype, $querydesc, $page)));
438 class _PageList_Column_SemanticSearch_relation
439 extends _PageList_Column
441 function _PageList_Column_SemanticSearch_relation($field, $heading, &$pagelist)
443 $this->_field = $field;
444 $this->_heading = $heading;
445 $this->_need_rev = false;
446 $this->_iscustom = true;
447 $this->_pagelist =& $pagelist;
450 function _getValue(&$page, $revision_handle)
452 if (is_object($page)) $text = $page->getName();
454 $link = $this->_pagelist->_links[$this->current_row];
455 return WikiLink($link['linkname'], 'if_known');
459 class _PageList_Column_SemanticSearch_link
460 extends _PageList_Column_SemanticSearch_relation
462 function _getValue(&$page, $revision_handle)
464 if (is_object($page)) $text = $page->getName();
466 $link = $this->_pagelist->_links[$this->current_row];
467 if ($this->_field != 'value')
468 return WikiLink($link['linkvalue'], 'if_known');
470 return $link['linkvalue'];
478 // c-hanging-comment-ender-p: nil
479 // indent-tabs-mode: nil