]> CyberLeo.Net >> Repos - SourceForge/phpwiki.git/blob - lib/plugin/FuzzyPages.php
This is a complete rewrite of the FuzzyPages plugin. The page-title comparson criteri...
[SourceForge/phpwiki.git] / lib / plugin / FuzzyPages.php
1 <?php // -*-php-*-
2 rcs_id('$Id: FuzzyPages.php,v 1.6 2002-03-02 02:59:56 carstenklapp Exp $');
3 /*
4  Copyright 1999, 2000, 2001, 2002 $ThePhpWikiProgrammingTeam
5
6  This file is part of PhpWiki.
7
8  PhpWiki is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12
13  PhpWiki is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  GNU General Public License for more details.
17
18  You should have received a copy of the GNU General Public License
19  along with PhpWiki; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
21  */
22
23
24 //require_once('lib/PageList.php');
25
26 /**
27  * FuzzyPages is plugin which searches for similar page titles.
28  *
29  * Pages are considered similar by averaging the similarity scores of
30  * the spelling comparison and the metaphone comparison for each page
31  * title in the database (php's metaphone() is an improved soundex
32  * function).
33  *
34  * http://www.php.net/manual/en/function.similar-text.php
35  * http://www.php.net/manual/en/function.metaphone.php
36  */
37 class WikiPlugin_FuzzyPages
38 extends WikiPlugin
39 {
40     function getName() {
41         return _("FuzzyPages");
42     }
43
44     function getDescription() {
45         return sprintf(_("List FuzzyPages for %s"), '[pagename]');
46     }
47
48     function getDefaultArguments() {
49         return array('page'  => '[pagename]',
50                      's'     => false,
51                      'debug' => false);
52     }
53
54
55
56     function spelling_similarity($subject) {
57         $spelling_similarity_score = 0;
58         similar_text($subject, $this->_searchterm,
59                      &$spelling_similarity_score);
60         return $spelling_similarity_score;
61     }
62
63     function sound_similarity($subject) {
64         $sound_similarity_score = 0;
65         // similar_text() automatically calculates a percentage.
66         // http://www.php.net/manual/en/function.similar-text.php
67         similar_text(metaphone($subject), $this->_searchterm_metaphone,
68                      &$sound_similarity_score);
69         return $sound_similarity_score;
70     }
71
72     function averageSimilarities($subject) {
73         return ($this->spelling_similarity($subject)
74                 + $this->sound_similarity($subject)) / 2;
75     }
76
77     function collectSimilarPages(&$list, &$dbi) {
78         if (! defined('MIN_SCORE_CUTOFF'))
79             define('MIN_SCORE_CUTOFF', 33);
80
81         $this->_searchterm_metaphone = metaphone($this->_searchterm);
82
83         $allPages = $dbi->getAllPages();
84
85         while ($pagehandle = $allPages->next()) {
86             $pagename = $pagehandle->getName();
87             $similarity_score = $this->averageSimilarities($pagename);
88             if ($similarity_score > MIN_SCORE_CUTOFF)
89                 $list = array_merge($list,
90                                     array($pagename => $similarity_score));
91         }
92     }
93
94     function sortCollectedPages(&$list) {
95         array_multisort(&$list, SORT_NUMERIC, SORT_DESC);
96     }
97
98
99
100     function addTableCaption(&$table, &$dbi) {
101         if ($dbi->isWikiPage($this->_searchterm))
102             $link = WikiLink($this->_searchterm, 'auto');
103         else
104             $link = $this->_searchterm;
105         $caption = fmt("These page titles match fuzzy with '%s'", $link);
106         $table->pushContent(HTML::caption(array('align'=>'top'), $caption));
107     }
108
109     function addTableHead(&$table) {
110         $row = HTML::tr(HTML::td(_("Name")),
111                         HTML::td(array('align' => 'right'), _("Score")));
112         if ($this->debug)
113             $this->_pushDebugHeadingTDinto($row);
114
115         $table->pushContent(HTML::thead($row));
116     }
117
118     function addTableBody(&$list, &$table) {
119         if (! defined('HIGHLIGHT_ROWS_CUTOFF_SCORE'))
120             define('HIGHLIGHT_ROWS_CUTOFF_SCORE', 60);
121
122         $tbody = HTML::tbody();
123         foreach ($list as $found_pagename => $score) {
124             $row = HTML::tr(array('class' =>
125                                   $score > HIGHLIGHT_ROWS_CUTOFF_SCORE
126                                   ? 'oddrow' : 'evenrow'),
127                             HTML::td(WikiLink($found_pagename)),
128                             HTML::td(array('align' => 'right'),
129                                      round($score)));
130
131             if ($this->debug)
132                 $this->_pushDebugTDinto($row, $found_pagename);
133
134             $tbody->pushContent($row);
135         }
136         $table->pushContent($tbody);
137     }
138
139     function formatTable(&$list, &$dbi) {
140
141         $table = HTML::table(array('cellpadding' => 2,
142                                    'cellspacing' => 1,
143                                    'border'      => 0,
144                                    'class' => 'pagelist'));
145         $this->addTableCaption($table, &$dbi);
146         $this->addTableHead($table);
147         $this->addTableBody($list, $table);
148         return $table;
149     }
150
151
152
153     function run($dbi, $argstr, $request) {
154         $args = $this->getArgs($argstr, $request);
155         extract($args);
156         if (empty($page))
157             return '';
158         $this->debug = $debug;
159
160         $this->_searchterm = $s ? $s : $page;
161         $this->_list = array();
162
163         $this->collectSimilarPages($this->_list, &$dbi);
164
165         $this->sortCollectedPages($this->_list);
166
167         return $this->formatTable($this->_list, &$dbi);
168     }
169
170
171
172     function _pushDebugHeadingTDinto(&$row) {
173             $row->pushContent(HTML::td(_("Spelling Score")),
174                               HTML::td(_("Sound Score")),
175                               HTML::td('Metaphones'));
176     }
177
178     function _pushDebugTDinto(&$row, $pagename) {
179         // This actually calculates everything a second time for each page
180         // but the individual scores are retained separately.
181         $debug_spelling = round($this->spelling_similarity($pagename), 1);
182         $debug_sound = round($this->sound_similarity($pagename), 1);
183         $debug_metaphone = sprintf("(%s, %s)", metaphone($pagename),
184                                    $this->_searchterm_metaphone);
185
186         $row->pushcontent(HTML::td(array('align' => 'center'), $debug_spelling),
187                           HTML::td(array('align' => 'center'), $debug_sound),
188                           HTML::td($debug_metaphone));
189     }
190 };
191
192 // Local Variables:
193 // mode: php
194 // tab-width: 8
195 // c-basic-offset: 4
196 // c-hanging-comment-ender-p: nil
197 // indent-tabs-mode: nil
198 // End:
199 ?>