]> CyberLeo.Net >> Repos - SourceForge/phpwiki.git/blob - lib/plugin/FuzzyPages.php
Activated Id substitution for Subversion
[SourceForge/phpwiki.git] / lib / plugin / FuzzyPages.php
1 <?php // -*-php-*-
2 rcs_id('$Id$');
3 /*
4  Copyright 1999, 2000, 2001, 2002 $ThePhpWikiProgrammingTeam
5
6  This file is part of PhpWiki.
7
8  PhpWiki is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12
13  PhpWiki is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  GNU General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with PhpWiki; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
21  */
22
23 //require_once('lib/PageList.php');
24
25 /**
26  * FuzzyPages is plugin which searches for similar page titles.
27  *
28  * Pages are considered similar by averaging the similarity scores of
29  * the spelling comparison and the metaphone comparison for each page
30  * title in the database (php's metaphone() is an improved soundex
31  * function).
32  *
33  * http://www.php.net/manual/en/function.similar-text.php
34  * http://www.php.net/manual/en/function.metaphone.php
35  */
36 class WikiPlugin_FuzzyPages
37 extends WikiPlugin
38 {
39     function getName() {
40         return _("FuzzyPages");
41     }
42
43     function getDescription() {
44         return sprintf(_("Search for page titles similar to %s."),
45                        '[pagename]');
46     }
47
48     function getVersion() {
49         return preg_replace("/[Revision: $]/", '',
50                             "\$Revision: 1.12 $");
51     }
52
53     function getDefaultArguments() {
54         return array('s'     => false,
55                      'debug' => false);
56     }
57
58     function spelling_similarity($subject) {
59         $spelling_similarity_score = 0;
60         similar_text($subject, $this->_searchterm,
61                      $spelling_similarity_score);
62         return $spelling_similarity_score;
63     }
64
65     function sound_similarity($subject) {
66         $sound_similarity_score = 0;
67         similar_text(metaphone($subject), $this->_searchterm_metaphone,
68                      $sound_similarity_score);
69         return $sound_similarity_score;
70     }
71
72     function averageSimilarities($subject) {
73         return ($this->spelling_similarity($subject)
74                 + $this->sound_similarity($subject)) / 2;
75     }
76
77     function collectSimilarPages(&$list, &$dbi) {
78         if (! defined('MIN_SCORE_CUTOFF'))
79             define('MIN_SCORE_CUTOFF', 33);
80
81         $this->_searchterm_metaphone = metaphone($this->_searchterm);
82
83         $allPages = $dbi->getAllPages();
84
85         while ($pagehandle = $allPages->next()) {
86             $pagename = $pagehandle->getName();
87             $similarity_score = $this->averageSimilarities($pagename);
88             if ($similarity_score > MIN_SCORE_CUTOFF)
89                 $list[$pagename] = $similarity_score;
90         }
91     }
92
93     function sortCollectedPages(&$list) {
94         arsort($list, SORT_NUMERIC);
95     }
96
97     function addTableCaption(&$table, &$dbi) {
98         if ($dbi->isWikiPage($this->_searchterm))
99             $link = WikiLink($this->_searchterm, 'auto');
100         else
101             $link = $this->_searchterm;
102         $caption = fmt("These page titles match fuzzy with '%s'", $link);
103         $table->pushContent(HTML::caption(array('align'=>'top'), $caption));
104     }
105
106     function addTableHead(&$table) {
107         $row = HTML::tr(HTML::th(_("Name")),
108                         HTML::th(array('align' => 'right'), _("Score")));
109         if ($this->debug)
110             $this->_pushDebugHeadingTDinto($row);
111
112         $table->pushContent(HTML::thead($row));
113     }
114
115     function addTableBody(&$list, &$table) {
116         if (! defined('HIGHLIGHT_ROWS_CUTOFF_SCORE'))
117             define('HIGHLIGHT_ROWS_CUTOFF_SCORE', 60);
118
119         $tbody = HTML::tbody();
120         foreach ($list as $found_pagename => $score) {
121             $row = HTML::tr(array('class' =>
122                                   $score > HIGHLIGHT_ROWS_CUTOFF_SCORE
123                                   ? 'evenrow' : 'oddrow'),
124                             HTML::td(WikiLink($found_pagename)),
125                             HTML::td(array('align' => 'right'),
126                                      round($score)));
127
128             if ($this->debug)
129                 $this->_pushDebugTDinto($row, $found_pagename);
130
131             $tbody->pushContent($row);
132         }
133         $table->pushContent($tbody);
134     }
135
136     function formatTable(&$list, &$dbi) {
137
138         $table = HTML::table(array('cellpadding' => 2,
139                                    'cellspacing' => 1,
140                                    'border'      => 0,
141                                    'class' => 'pagelist'));
142         $this->addTableCaption($table, $dbi);
143         $this->addTableHead($table);
144         $this->addTableBody($list, $table);
145         return $table;
146     }
147
148
149     function run($dbi, $argstr, &$request, $basepage) {
150         $args = $this->getArgs($argstr, $request);
151         extract($args);
152         if (empty($s))
153             return '';
154         $this->debug = $debug;
155
156         $this->_searchterm = $s;
157         $this->_list = array();
158
159         $this->collectSimilarPages($this->_list, $dbi);
160         $this->sortCollectedPages($this->_list);
161         return $this->formatTable($this->_list, $dbi);
162     }
163
164
165
166     function _pushDebugHeadingTDinto(&$row) {
167         $row->pushContent(HTML::td(_("Spelling Score")),
168                           HTML::td(_("Sound Score")),
169                           HTML::td('Metaphones'));
170     }
171
172     function _pushDebugTDinto(&$row, $pagename) {
173         // This actually calculates everything a second time for each pagename
174         // so the individual scores can be displayed separately for debugging.
175         $debug_spelling = round($this->spelling_similarity($pagename), 1);
176         $debug_sound = round($this->sound_similarity($pagename), 1);
177         $debug_metaphone = sprintf("(%s, %s)", metaphone($pagename),
178                                    $this->_searchterm_metaphone);
179
180         $row->pushcontent(HTML::td(array('align' => 'center'), $debug_spelling),
181                           HTML::td(array('align' => 'center'), $debug_sound),
182                           HTML::td($debug_metaphone));
183     }
184 };
185
186 // $Log: not supported by cvs2svn $
187 // Revision 1.11  2004/02/17 12:11:36  rurban
188 // added missing 4th basepage arg at plugin->run() to almost all plugins. This caused no harm so far, because it was silently dropped on normal usage. However on plugin internal ->run invocations it failed. (InterWikiSearch, IncludeSiteMap, ...)
189 //
190 // Revision 1.10  2003/02/22 20:49:55  dairiki
191 // Fixes for "Call-time pass by reference has been deprecated" errors.
192 //
193 // Revision 1.9  2003/01/18 21:41:02  carstenklapp
194 // Code cleanup:
195 // Reformatting & tabs to spaces;
196 // Added copyleft, getVersion, getDescription, rcs_id.
197 //
198
199 // Local Variables:
200 // mode: php
201 // tab-width: 8
202 // c-basic-offset: 4
203 // c-hanging-comment-ender-p: nil
204 // indent-tabs-mode: nil
205 // End:
206 ?>