4 Copyright 2007 Reini Urban
6 This file is part of PhpWiki.
8 PhpWiki is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 PhpWiki is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with PhpWiki; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 require_once('lib/PageList.php');
24 require_once('lib/TextSearchQuery.php');
25 require_once('lib/Units.php');
26 require_once("lib/SemanticWeb.php");
29 * Search for relations/attributes and its values.
30 * page - relation::object. e.g list all cities: is_a::city => relation=is_a&s=city
31 * We search for both a relation and if the search is valid for attributes also,
32 * and OR combine the result.
34 * An attribute has just a value, which is a number, and which is for sure no pagename,
35 * and its value goes through some units unification. (not yet)
36 * We can also do numerical comparison and unit lifting with attributes.
37 * population > 1000000
38 * population > 1 million
41 * - The backends can already do simple AND/OR combination of multiple
42 * relations and attributes to search for. Just the UI not. TODO: implement the AND/OR buttons.
43 * population < 1 million AND area > 50 km2
44 * - Due to attribute internals a relation search with matching attribute names will also
45 * find those attribute names, but not the values. You must explicitly search for attributes then.
47 * The Advanced query can do a freeform query expression with multiple comparison and nesting.
48 * "is_a::city and population > 1.000.000 and population < 10.000.000"
49 * "(is_a::city or is_a::country) and population < 10.000.000"
51 * @author: Reini Urban
53 class WikiPlugin_SemanticSearch
57 return _("SemanticSearch");
59 function getDescription() {
60 return _("Search relations and attributes");
62 function getVersion() {
63 return preg_replace("/[Revision: $]/", '',
66 function getDefaultArguments() {
69 PageList::supportedArgs(), // paging and more.
71 's' => "*", // linkvalue query string
72 'page' => "*", // which pages (glob allowed), default: all
73 'relation' => '', // linkname. which relations. default all
74 'attribute' => '', // linkname. which attributes. default all
75 'attr_op' => ':=', // a funny written way for equality for pure aesthetic pleasure
76 // "All attributes which have this value set"
79 'regex' => 'auto',// is different here.
80 // no word splitting, if no regex op is present, defaults to exact match
81 'noform' => false, // don't show form with results.
82 'noheader' => false, // no caption
83 'info' => false // valid: pagename,relation,linkto,attribute,value and all other pagelist columns
87 function showForm (&$dbi, &$request, $args) {
89 $action = $request->getPostURL();
90 $hiddenfield = HiddenInputs($request->getArgs(),'',
91 array('action','page','s','semsearch',
92 'relation','attribute'));
93 $pagefilter = HTML::input(array('name' => 'page',
94 'value' => $args['page'],
95 'title' => _("Search only in these pages. With autocompletion."),
96 'class' => 'dropdown',
97 'acdropdown' => 'true',
98 'autocomplete_complete' => 'true',
99 'autocomplete_matchsubstring' => 'false',
100 'autocomplete_list' => 'xmlrpc:wiki.titleSearch ^[S] 4'
102 $allrelations = $dbi->listRelations(false,false,true);
103 $svalues = empty($allrelations) ? "" : join("','", $allrelations);
104 $reldef = JavaScript("var semsearch_relations = new Array('".$svalues."')");
105 $relation = HTML::input(array('name' => 'relation',
106 'value' => $args['relation'],
107 'title' => _("Filter by this relation. With autocompletion."),
108 'class' => 'dropdown',
109 'style' => 'width:10em',
110 'acdropdown' => 'true',
111 'autocomplete_assoc' => 'false',
112 'autocomplete_complete' => 'true',
113 'autocomplete_matchsubstring' => 'true',
114 'autocomplete_list' => 'array:semsearch_relations'
116 $queryrel = HTML::input(array('name' => 's',
117 'value' => $args['s'],
118 'title' => _("Filter by this link. These are pagenames. With autocompletion."),
119 'class' => 'dropdown',
120 'acdropdown' => 'true',
121 'autocomplete_complete' => 'true',
122 'autocomplete_matchsubstring' => 'true',
123 'autocomplete_list' => 'xmlrpc:wiki.titleSearch ^[S] 4'
125 $relsubmit = Button('submit:semsearch[relations]', _("Relations"), false);
126 // just testing some dhtml... not yet done
127 $enhancements = HTML();
128 $nbsp = HTML::raw(' ');
129 $this_uri = $_SERVER['REQUEST_URI'].'#';
130 $andbutton = new Button(_("AND"),$this_uri,'wikiaction',
132 'onclick' => "addquery('rel', 'and')",
133 'title' => _("Add an AND query")));
134 $orbutton = new Button(_("OR"),$this_uri,'wikiaction',
136 'onclick' => "addquery('rel', 'or')",
137 'title' => _("Add an OR query")));
139 $enhancements = HTML::span($andbutton, $nbsp, $orbutton);
140 $instructions = _("Search in pages for a relation with that value (a pagename).");
141 $form1 = HTML::form(array('action' => $action,
143 'accept-charset' => $GLOBALS['charset']),
145 $hiddenfield, HiddenInputs(array('attribute'=>'')),
146 $instructions, HTML::br(),
148 (array('border' => 0,'cellspacing' => 2),
149 HTML::colgroup(array('span' => 6)),
151 (HTML::th(''),HTML::th('Pagefilter'),HTML::th('Relation'),
152 HTML::th(''),HTML::th(array('span' => 2),'Links')),
155 HTML::td($nbsp,$nbsp,$nbsp),
156 HTML::td($pagefilter, ": "),
158 HTML::td(HTML::strong(HTML::tt(' :: '))),
160 HTML::td($nbsp, $relsubmit,
161 $nbsp, $enhancements)))));
163 $allattrs = $dbi->listRelations(false,true,true);
164 if (empty($allrelations) and empty($allattrs)) // be nice to the dummy.
165 $this->_norelations_warning = 1;
166 $svalues = empty($allattrs) ? "" : join("','", $allattrs);
167 $attdef = JavaScript("var semsearch_attributes = new Array('".$svalues."')\n"
168 ."var semsearch_op = new Array('"
169 .join("','", $this->_supported_operators)
171 // TODO: We want some more tricks: Autofill the base unit of the selected
172 // attribute into the s area.
173 $attribute = HTML::input(array('name' => 'attribute',
174 'value' => $args['attribute'],
175 'title' => _("Filter by this attribute name. With autocompletion."),
176 'class' => 'dropdown',
177 'style' => 'width:10em',
178 'acdropdown' => 'true',
179 'autocomplete_complete' => 'true',
180 'autocomplete_matchsubstring' => 'true',
181 'autocomplete_assoc' => 'false',
182 'autocomplete_list' => 'array:semsearch_attributes'
183 /* 'autocomplete_onselect' => 'check_unit' */
185 $attr_op = HTML::input(array('name' => 'attr_op',
186 'value' => $args['attr_op'],
187 'title' => _("Comparison operator. With autocompletion."),
188 'class' => 'dropdown',
189 'style' => 'width:2em',
190 'acdropdown' => 'true',
191 'autocomplete_complete' => 'true',
192 'autocomplete_matchsubstring' => 'true',
193 'autocomplete_assoc' => 'false',
194 'autocomplete_list' => 'array:semsearch_op'
196 $queryatt = HTML::input(array('name' => 's',
197 'value' => $args['s'],
198 'title' => _("Filter by this numeric attribute value. With autocompletion."), //?
199 'class' => 'dropdown',
200 'acdropdown' => 'false',
201 'autocomplete_complete' => 'true',
202 'autocomplete_matchsubstring' => 'false',
203 'autocomplete_assoc' => 'false',
204 'autocomplete_list' => 'plugin:SemanticSearch page='.$args['page'].' attribute=^[S] attr_op==~'
206 $andbutton = new Button(_("AND"),$this_uri,'wikiaction',
208 'onclick' => "addquery('attr', 'and')",
209 'title' => _("Add an AND query")));
210 $orbutton = new Button(_("OR"),$this_uri,'wikiaction',
212 'onclick' => "addquery('attr', 'or')",
213 'title' => _("Add an OR query")));
215 $enhancements = HTML::span($andbutton, $nbsp, $orbutton);
216 $attsubmit = Button('submit:semsearch[attributes]', _("Attributes"), false);
217 $instructions = HTML::span(_("Search in pages for an attribute with that numeric value."),"\n");
219 $instructions->pushContent
220 (HTML(" ", new Button(_("Advanced..."),_("SemanticSearchAdvanced"))));
221 $form2 = HTML::form(array('action' => $action,
223 'accept-charset' => $GLOBALS['charset']),
225 $hiddenfield, HiddenInputs(array('relation'=>'')),
226 $instructions, HTML::br(),
228 (array('border' => 0,'cellspacing' => 2),
229 HTML::colgroup(array('span' => 6)),
231 (HTML::th(''),HTML::th('Pagefilter'),HTML::th('Attribute'),
232 HTML::th('Op'),HTML::th(array('span' => 2),'Value')),
235 HTML::td($nbsp,$nbsp,$nbsp),
236 HTML::td($pagefilter, ": "),
237 HTML::td($attribute),
240 HTML::td($nbsp, $attsubmit,
241 $nbsp, $enhancements)))));
243 return HTML($form1, $form2);
246 function regex_query ($string, $case_exact, $regex) {
247 if ($string != '*' and $regex == 'auto') {
248 if (strcspn($string, ".+*?^$\"") == strlen($string)) {
249 // performance hack: construct an exact query w/o parsing. pcre is fastest.
250 $q = new TextSearchQuery($string, $case_exact, 'pcre');
251 // and now override the fields
252 unset ($q->_stoplist);
253 $q->_regex = TSQ_REGEX_NONE;
255 $q->_tree = new TextSearchQuery_node_exact($string); // hardcode this string
257 $q->_tree = new TextSearchQuery_node_word($string);
259 //$string = "\"" . $string ."\"";
260 //$regex = 'none'; // EXACT or WORD match
263 return new TextSearchQuery($string, $case_exact, $regex);
266 function run ($dbi, $argstr, &$request, $basepage) {
269 $this->_supported_operators = array(':=','<','<=','>','>=','!=','==','=~');
270 $this->_text_operators = array(':=','==','=~','!=');
271 $args = $this->getArgs($argstr, $request);
272 if (empty($args['page']))
274 if (!isset($args['s'])) // it might be (integer) 0
276 $posted = $request->getArg("semsearch");
277 $form = $this->showForm($dbi, $request, $args);
278 if (isset($this->_norelations_warning))
280 (HTML::div(array('class' => 'warning'),
281 _("Warning:"),HTML::br(),
282 _("No relations nor attributes in the whole wikidb defined!")
284 ,fmt("See %s",WikiLink(_("Help:SemanticRelations")))));
286 // for convenience and harmony we allow GET requests also.
287 if (!$request->isPost()) {
288 if ($relation or $attribute) // check for good GET request
291 return $form; // nobody called us, so just display our supadupa form
293 $pagequery = $this->regex_query($page, $args['case_exact'], $args['regex']);
294 // we might want to check for semsearch['relations'] and semsearch['attributes'] also
295 if (empty($relation) and empty($attribute)) {
296 // so we just clicked without selecting any relation.
297 // hmm. check which button we clicked, before we do the massive alltogether search.
298 if (isset($posted['relations']) and $posted['relations'])
300 elseif (isset($posted['attributes']) and $posted['attributes']) {
302 // here we have to check for invalid text operators. ignore it then
303 if (!in_array($attr_op, $this->_text_operators))
307 $searchtype = "Text";
308 if (!empty($relation)) {
309 $querydesc = $relation."::".$s;
310 $linkquery = $this->regex_query($s, $args['case_exact'], $args['regex']);
311 $relquery = $this->regex_query($relation, $args['case_exact'], $args['regex']);
312 $links = $dbi->linkSearch($pagequery, $linkquery, 'relation', $relquery);
313 $pagelist = new PageList($info, $exclude, $args);
314 $pagelist->_links = array();
315 while ($link = $links->next()) {
316 $pagelist->addPage($link['pagename']);
317 $pagelist->_links[] = $link;
319 // default (=empty info) wants all three. but we want to be able to override this.
320 // $pagelist->_columns_seen is the exploded info
321 if (!$info or ($info and isset($pagelist->_columns_seen['relation'])))
322 $pagelist->addColumnObject
323 (new _PageList_Column_SemanticSearch_relation('relation', _("Relation"), $pagelist));
324 if (!$args['info'] or ($args['info'] and isset($pagelist->_columns_seen['linkto'])))
325 $pagelist->addColumnObject
326 (new _PageList_Column_SemanticSearch_link('linkto', _("Link"), $pagelist));
328 // can we merge two different pagelist?
329 if (!empty($attribute)) {
330 $relquery = $this->regex_query($attribute, $args['case_exact'], $args['regex']);
331 if (!in_array($attr_op, $this->_supported_operators)) {
332 return HTML($form, $this->error(fmt("Illegal operator: %s",
333 HTML::tt($attr_op))));
335 $s_base = preg_replace("/,/","", $s);
336 $units = new Units();
337 if (!is_numeric($s_base)) {
338 $s_base = $units->basevalue($s_base);
339 $is_numeric = is_numeric($s_base);
343 // check which type to search with:
344 // at first check if forced text matcher
345 if ($attr_op == '=~') {
346 if ($s == '*') $s = '.*'; // help the poor user. we need pcre syntax.
347 $linkquery = new TextSearchQuery("$s", $args['case_exact'], 'pcre');
348 $querydesc = "$attribute $attr_op $s";
349 } elseif ($is_numeric) { // do comparison with numbers
350 /* We want to search for multiple attributes also. linkSearch can do this.
351 * But we have to construct the query somehow. (that's why we try the AND OR dhtml)
352 * population < 1 million AND area > 50 km2
353 * Here we check only for one attribute per page.
354 * See SemanticSearchAdvanced for the full expression.
356 // it might not be the best idea to use '*' as variable to expand. hmm.
357 if ($attribute == '*') $attribute = '_star_';
358 $searchtype = "Numeric";
359 $query = $attribute." ".$attr_op." ".$s_base;
360 $linkquery = new SemanticAttributeSearchQuery($query, $attribute,
361 $units->baseunit($s));
362 if ($attribute == '_star_') $attribute = '*';
363 $querydesc = $attribute." ".$attr_op." ".$s;
365 // no number or unit: check other text matchers or '*' MATCH_ALL
366 } elseif (in_array($attr_op, $this->_text_operators)) {
367 if ($attr_op == '=~') {
368 if ($s == '*') $s = '.*'; // help the poor user. we need pcre syntax.
369 $linkquery = new TextSearchQuery("$s", $args['case_exact'], 'pcre');
372 $linkquery = $this->regex_query($s, $args['case_exact'], $args['regex']);
373 $querydesc = "$attribute $attr_op $s";
375 // should we fail or skip when the user clicks on Relations?
376 } elseif (isset($posted['relations']) and $posted['relations']) {
377 $linkquery = false; // skip
379 $querydesc = $attribute." ".$attr_op." ".$s;
380 return HTML($form, $this->error(fmt("Only text operators can be used with strings: %s",
381 HTML::tt($querydesc))));
385 $links = $dbi->linkSearch($pagequery, $linkquery, 'attribute', $relquery);
386 if (empty($relation)) {
387 $pagelist = new PageList($args['info'], $args['exclude'], $args);
388 $pagelist->_links = array();
390 while ($link = $links->next()) {
391 $pagelist->addPage($link['pagename']);
392 $pagelist->_links[] = $link;
394 // default (=empty info) wants all three. but we want to override this.
395 if (!$args['info'] or
396 ($args['info'] and isset($pagelist->_columns_seen['attribute'])))
397 $pagelist->addColumnObject
398 (new _PageList_Column_SemanticSearch_relation('attribute',
399 _("Attribute"), $pagelist));
400 if (!$args['info'] or
401 ($args['info'] and isset($pagelist->_columns_seen['value'])))
402 $pagelist->addColumnObject
403 (new _PageList_Column_SemanticSearch_link('value',
404 _("Value"), $pagelist));
407 if (!isset($pagelist)) {
408 $querydesc = _("<empty>");
409 $pagelist = new PageList();
412 // We put the form into the caption just to be able to return one pagelist object,
413 // and to still have the convenience form at the top. we could workaround this by
414 // putting the form as WikiFormRich into the actionpage. but thid doesnt look as
415 // nice as this here.
416 $pagelist->setCaption
417 ( // on mozilla the form doesn't fit into the caption very well.
418 HTML($noform ? '' : HTML($form,HTML::hr()),
419 fmt("Semantic %s Search Result for \"%s\" in pages \"%s\"",
420 $searchtype, $querydesc, $page)));
426 class _PageList_Column_SemanticSearch_relation
427 extends _PageList_Column
429 function _PageList_Column_SemanticSearch_relation ($field, $heading, &$pagelist) {
430 $this->_field = $field;
431 $this->_heading = $heading;
432 $this->_need_rev = false;
433 $this->_iscustom = true;
434 $this->_pagelist =& $pagelist;
436 function _getValue(&$page, $revision_handle) {
437 if (is_object($page)) $text = $page->getName();
439 $link = $this->_pagelist->_links[$this->current_row];
440 return WikiLink($link['linkname'],'if_known');
443 class _PageList_Column_SemanticSearch_link
444 extends _PageList_Column_SemanticSearch_relation
446 function _getValue(&$page, $revision_handle) {
447 if (is_object($page)) $text = $page->getName();
449 $link = $this->_pagelist->_links[$this->current_row];
450 if ($this->_field != 'value')
451 return WikiLink($link['linkvalue'],'if_known');
453 return $link['linkvalue'];
457 // $Log: not supported by cvs2svn $
458 // Revision 1.4 2007/01/04 16:44:22 rurban
459 // Fix the info argument: e.g. try info=pagename or info=relation,linkto. Sorry, the pagename is always there.
461 // Revision 1.3 2007/01/03 21:23:15 rurban
462 // Use Units and SemanticWeb: "population > 0.5 million or area < 100m^2" will work. Add help link if no relations are defined yet. Add attr_op=~ to attribute livesearch
464 // Revision 1.2 2007/01/02 13:23:06 rurban
465 // add SemanticSearch with internal form
467 // Revision 1.1 2006/03/07 20:52:01 rurban
468 // not yet working good enough
475 // c-hanging-comment-ender-p: nil
476 // indent-tabs-mode: nil