From b8eef46b71e6d875cf451bb22c9984d2996119ba Mon Sep 17 00:00:00 2001 From: rurban Date: Sun, 11 Sep 2005 14:55:05 +0000 Subject: [PATCH] implement fulltext stoplist git-svn-id: svn://svn.code.sf.net/p/phpwiki/code/trunk@4821 96ab9672-09ca-45d6-a79d-3d69d39ca109 --- lib/PageList.php | 7 ++- lib/TextSearchQuery.php | 3 +- lib/WikiDB/backend/ADODB.php | 17 ++++-- lib/WikiDB/backend/PDO_pgsql.php | 63 ++++++++++++++++++++++ lib/WikiDB/backend/PearDB.php | 30 ++++++----- lib/WikiDB/backend/dumb/TextSearchIter.php | 6 ++- lib/plugin/FullTextSearch.php | 9 ++-- 7 files changed, 111 insertions(+), 24 deletions(-) create mode 100644 lib/WikiDB/backend/PDO_pgsql.php diff --git a/lib/PageList.php b/lib/PageList.php index bf8c5a859..c4478fa1a 100644 --- a/lib/PageList.php +++ b/lib/PageList.php @@ -1,4 +1,4 @@ -_tree = $parser->parse($search_query, $case_exact, $this->_regex); //$this->_optimize(); // broken under certain circumstances: "word -word -word" + $this->_stoplist = '(A|An|And|But|By|For|From|In|Is|It|Of|On|Or|The|To|With)'; } function _optimize() { diff --git a/lib/WikiDB/backend/ADODB.php b/lib/WikiDB/backend/ADODB.php index 191f96ec2..65a014180 100644 --- a/lib/WikiDB/backend/ADODB.php +++ b/lib/WikiDB/backend/ADODB.php @@ -1,5 +1,5 @@ _dbh =& $dbh; $this->_case_exact = $search->_case_exact; + $this->_stoplist =& $search->_stoplist; } function _pagename_match_clause($node) { $word = $node->sql(); @@ -1279,9 +1280,14 @@ extends WikiDB_backend_search } function _fulltext_match_clause($node) { $word = $node->sql(); - return ($this->_case_exact - ? "pagename LIKE '$word' OR content LIKE '$word'" - : "LOWER(pagename) LIKE '$word' OR content LIKE '$word'"); + // eliminate stoplist words + if (preg_match("/^%".$this->_stoplist."%/i", $word) + or preg_match("/^".$this->_stoplist."$/i", $word)) + return $this->_pagename_match_clause($node); + else + return ($this->_case_exact + ? "pagename LIKE '$word' OR content LIKE '$word'" + : "LOWER(pagename) LIKE '$word' OR content LIKE '$word'"); } } @@ -1445,6 +1451,9 @@ extends WikiDB_backend_search } // $Log: not supported by cvs2svn $ +// Revision 1.76 2005/09/11 13:25:12 rurban +// enhance LIMIT support +// // Revision 1.75 2005/09/10 21:30:16 rurban // enhance titleSearch // diff --git a/lib/WikiDB/backend/PDO_pgsql.php b/lib/WikiDB/backend/PDO_pgsql.php new file mode 100644 index 000000000..cd7c72080 --- /dev/null +++ b/lib/WikiDB/backend/PDO_pgsql.php @@ -0,0 +1,63 @@ +limit($limit); + if ($offset) + $limit = " LIMIT $count OFFSET $from"; + else + $limit = " LIMIT $count"; + } else + $limit = ''; + return $limit; + } + + function backendType() { + return 'pgsql'; + } +} + +// $Log: not supported by cvs2svn $ + +// (c-file-style: "gnu") +// Local Variables: +// mode: php +// tab-width: 8 +// c-basic-offset: 4 +// c-hanging-comment-ender-p: nil +// indent-tabs-mode: nil +// End: +?> \ No newline at end of file diff --git a/lib/WikiDB/backend/PearDB.php b/lib/WikiDB/backend/PearDB.php index b2affb24b..a7408391a 100644 --- a/lib/WikiDB/backend/PearDB.php +++ b/lib/WikiDB/backend/PearDB.php @@ -1,5 +1,5 @@ _dbh = $dbh; $this->_case_exact = $search->_case_exact; + $this->_stoplist =& $search->_stoplist; } function _pagename_match_clause($node) { $word = $node->sql(); - if ($node->op == 'REGEX') { // posix regex extensions - if (preg_match("/mysql/i", $this->_dbh->phptype)) - return "pagename REGEXP '$word'"; - } else { - return ($this->_case_exact - ? "pagename LIKE '$word'" - : "LOWER(pagename) LIKE '$word'"); - } + return ($this->_case_exact + ? "pagename LIKE '$word'" + : "LOWER(pagename) LIKE '$word'"); } function _fulltext_match_clause($node) { $word = $node->sql(); - return $this->_pagename_match_clause($node) - // probably convert this MATCH AGAINST or SUBSTR/POSITION without wildcards - . ($this->_case_exact ? " OR content LIKE '$word'" - : " OR LOWER(content) LIKE '$word'"); + // eliminate stoplist words + if (preg_match("/^%".$this->_stoplist."%/i", $word) + or preg_match("/^".$this->_stoplist."$/i", $word)) + return $this->_pagename_match_clause($node); + else + return $this->_pagename_match_clause($node) + // probably convert this MATCH AGAINST or SUBSTR/POSITION without wildcards + . ($this->_case_exact ? " OR content LIKE '$word'" + : " OR LOWER(content) LIKE '$word'"); } } // $Log: not supported by cvs2svn $ +// Revision 1.90 2005/09/11 13:25:12 rurban +// enhance LIMIT support +// // Revision 1.89 2005/09/10 21:30:16 rurban // enhance titleSearch // diff --git a/lib/WikiDB/backend/dumb/TextSearchIter.php b/lib/WikiDB/backend/dumb/TextSearchIter.php index c101ba452..a13ebf685 100644 --- a/lib/WikiDB/backend/dumb/TextSearchIter.php +++ b/lib/WikiDB/backend/dumb/TextSearchIter.php @@ -1,5 +1,5 @@ _fulltext = $fulltext; $this->_search = $search; $this->_index = 0; + $this->_stoplist =& $search->_stoplist; if (isset($options['limit'])) $this->_limit = $options['limit']; else $this->_limit = 0; @@ -36,6 +37,9 @@ extends WikiDB_backend_iterator return $result; if ($this->_fulltext) { + // eliminate stoplist words from fulltext search + if (preg_match("/^".$this->_stoplist."$/i", $text)) + return $result; $text .= "\n" . $this->_get_content($page); return $this->_search->match($text); } else diff --git a/lib/plugin/FullTextSearch.php b/lib/plugin/FullTextSearch.php index 496341efd..a5a31bb4d 100644 --- a/lib/plugin/FullTextSearch.php +++ b/lib/plugin/FullTextSearch.php @@ -1,5 +1,5 @@ fullSearch($query); + $pages = $dbi->fullSearch($query, $sortby, $limit, $exclude); $lines = array(); $hilight_re = $hilight ? $query->getHighlightRegexp() : false; $count = 0; @@ -136,6 +136,9 @@ extends WikiPlugin }; // $Log: not supported by cvs2svn $ +// Revision 1.24 2004/11/26 18:39:02 rurban +// new regex search parser and SQL backends (90% complete, glob and pcre backends missing) +// // Revision 1.23 2004/11/23 15:17:19 rurban // better support for case_exact search (not caseexact for consistency), // plugin args simplification: -- 2.45.0