]> CyberLeo.Net >> Repos - SourceForge/phpwiki.git/blob - lib/plugin/FuzzyPages.php
New FSF address
[SourceForge/phpwiki.git] / lib / plugin / FuzzyPages.php
1 <?php // -*-php-*-
2 // $Id$
3 /*
4  * Copyright 1999, 2000, 2001, 2002 $ThePhpWikiProgrammingTeam
5  * Copyright 2009 Marc-Etienne Vargenau, Alcatel-Lucent
6  *
7  * This file is part of PhpWiki.
8  *
9  * PhpWiki is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * PhpWiki is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with PhpWiki; if not, write to the Free Software Foundation, Inc.,
21  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22  */
23
24 /**
25  * FuzzyPages is plugin which searches for similar page titles.
26  *
27  * Pages are considered similar by averaging the similarity scores of
28  * the spelling comparison and the metaphone comparison for each page
29  * title in the database (php's metaphone() is an improved soundex
30  * function).
31  *
32  * http://www.php.net/manual/en/function.similar-text.php
33  * http://www.php.net/manual/en/function.metaphone.php
34  */
35 class WikiPlugin_FuzzyPages
36 extends WikiPlugin
37 {
38     function getName() {
39         return _("FuzzyPages");
40     }
41
42     function getDescription() {
43         return sprintf(_("Search for page titles similar to %s."),
44                        '[pagename]');
45     }
46
47     function getDefaultArguments() {
48         return array('s'     => false,
49                      'debug' => false);
50     }
51
52     function spelling_similarity($subject) {
53         $spelling_similarity_score = 0;
54         similar_text($subject, $this->_searchterm,
55                      $spelling_similarity_score);
56         return $spelling_similarity_score;
57     }
58
59     function sound_similarity($subject) {
60         $sound_similarity_score = 0;
61         similar_text(metaphone($subject), $this->_searchterm_metaphone,
62                      $sound_similarity_score);
63         return $sound_similarity_score;
64     }
65
66     function averageSimilarities($subject) {
67         return ($this->spelling_similarity($subject)
68                 + $this->sound_similarity($subject)) / 2;
69     }
70
71     function collectSimilarPages(&$list, &$dbi) {
72         if (! defined('MIN_SCORE_CUTOFF'))
73             define('MIN_SCORE_CUTOFF', 33);
74
75         $this->_searchterm_metaphone = metaphone($this->_searchterm);
76
77         $allPages = $dbi->getAllPages();
78
79         while ($pagehandle = $allPages->next()) {
80             $pagename = $pagehandle->getName();
81             $similarity_score = $this->averageSimilarities($pagename);
82             if ($similarity_score > MIN_SCORE_CUTOFF)
83                 $list[$pagename] = $similarity_score;
84         }
85     }
86
87     function sortCollectedPages(&$list) {
88         arsort($list, SORT_NUMERIC);
89     }
90
91     function addTableCaption(&$table, &$dbi) {
92         if ($dbi->isWikiPage($this->_searchterm))
93             $link = WikiLink($this->_searchterm, 'auto');
94         else
95             $link = $this->_searchterm;
96         $caption = fmt("These page titles match fuzzy with '%s'", $link);
97         $table->pushContent(HTML::caption(array('align'=>'top'), $caption));
98     }
99
100     function addTableHead(&$table) {
101         $row = HTML::tr(HTML::th(_("Name")),
102                         HTML::th(array('align' => 'right'), _("Score")));
103
104         if (defined('DEBUG') && DEBUG && $this->debug) {
105             $this->_pushDebugHeadingTDinto($row);
106         }
107
108         $table->pushContent(HTML::thead($row));
109     }
110
111     function addTableBody(&$list, &$table) {
112         if (! defined('HIGHLIGHT_ROWS_CUTOFF_SCORE'))
113             define('HIGHLIGHT_ROWS_CUTOFF_SCORE', 60);
114
115         $tbody = HTML::tbody();
116         foreach ($list as $found_pagename => $score) {
117             $row = HTML::tr(array('class' =>
118                                   $score > HIGHLIGHT_ROWS_CUTOFF_SCORE
119                                   ? 'evenrow' : 'oddrow'),
120                             HTML::td(WikiLink($found_pagename)),
121                             HTML::td(array('align' => 'right'),
122                                      round($score)));
123
124             if (defined('DEBUG') && DEBUG && $this->debug) {
125                 $this->_pushDebugTDinto($row, $found_pagename);
126             }
127
128             $tbody->pushContent($row);
129         }
130         $table->pushContent($tbody);
131     }
132
133     function formatTable(&$list, &$dbi) {
134
135         if (empty($list)) {
136            return HTML::p(fmt("No fuzzy matches with '%s'", $this->_searchterm));
137         }
138         $table = HTML::table(array('cellpadding' => 2,
139                                    'cellspacing' => 1,
140                                    'border'      => 0,
141                                    'class' => 'pagelist'));
142         $this->addTableCaption($table, $dbi);
143         $this->addTableHead($table);
144         $this->addTableBody($list, $table);
145         return $table;
146     }
147
148
149     function run($dbi, $argstr, &$request, $basepage) {
150         $args = $this->getArgs($argstr, $request);
151         extract($args);
152         if (empty($s)) {
153             return HTML();
154         }
155
156         if (defined('DEBUG') && DEBUG) {
157             $this->debug = $debug;
158         }
159
160         $this->_searchterm = $s;
161         $this->_list = array();
162
163         $this->collectSimilarPages($this->_list, $dbi);
164         $this->sortCollectedPages($this->_list);
165         return $this->formatTable($this->_list, $dbi);
166     }
167
168     function _pushDebugHeadingTDinto(&$row) {
169         $row->pushContent(HTML::td(_("Spelling Score")),
170                           HTML::td(_("Sound Score")),
171                           HTML::td('Metaphones'));
172     }
173
174     function _pushDebugTDinto(&$row, $pagename) {
175         // This actually calculates everything a second time for each pagename
176         // so the individual scores can be displayed separately for debugging.
177         $debug_spelling = round($this->spelling_similarity($pagename), 1);
178         $debug_sound = round($this->sound_similarity($pagename), 1);
179         $debug_metaphone = sprintf("(%s, %s)", metaphone($pagename),
180                                    $this->_searchterm_metaphone);
181
182         $row->pushcontent(HTML::td(array('align' => 'center'), $debug_spelling),
183                           HTML::td(array('align' => 'center'), $debug_sound),
184                           HTML::td($debug_metaphone));
185     }
186 };
187
188 // Local Variables:
189 // mode: php
190 // tab-width: 8
191 // c-basic-offset: 4
192 // c-hanging-comment-ender-p: nil
193 // indent-tabs-mode: nil
194 // End:
195 ?>