2 rcs_id('$Id: FuzzyPages.php,v 1.6 2002-03-02 02:59:56 carstenklapp Exp $');
4 Copyright 1999, 2000, 2001, 2002 $ThePhpWikiProgrammingTeam
6 This file is part of PhpWiki.
8 PhpWiki is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 PhpWiki is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with PhpWiki; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 //require_once('lib/PageList.php');
27 * FuzzyPages is plugin which searches for similar page titles.
29 * Pages are considered similar by averaging the similarity scores of
30 * the spelling comparison and the metaphone comparison for each page
31 * title in the database (php's metaphone() is an improved soundex
34 * http://www.php.net/manual/en/function.similar-text.php
35 * http://www.php.net/manual/en/function.metaphone.php
37 class WikiPlugin_FuzzyPages
41 return _("FuzzyPages");
44 function getDescription() {
45 return sprintf(_("List FuzzyPages for %s"), '[pagename]');
48 function getDefaultArguments() {
49 return array('page' => '[pagename]',
56 function spelling_similarity($subject) {
57 $spelling_similarity_score = 0;
58 similar_text($subject, $this->_searchterm,
59 &$spelling_similarity_score);
60 return $spelling_similarity_score;
63 function sound_similarity($subject) {
64 $sound_similarity_score = 0;
65 // similar_text() automatically calculates a percentage.
66 // http://www.php.net/manual/en/function.similar-text.php
67 similar_text(metaphone($subject), $this->_searchterm_metaphone,
68 &$sound_similarity_score);
69 return $sound_similarity_score;
72 function averageSimilarities($subject) {
73 return ($this->spelling_similarity($subject)
74 + $this->sound_similarity($subject)) / 2;
77 function collectSimilarPages(&$list, &$dbi) {
78 if (! defined('MIN_SCORE_CUTOFF'))
79 define('MIN_SCORE_CUTOFF', 33);
81 $this->_searchterm_metaphone = metaphone($this->_searchterm);
83 $allPages = $dbi->getAllPages();
85 while ($pagehandle = $allPages->next()) {
86 $pagename = $pagehandle->getName();
87 $similarity_score = $this->averageSimilarities($pagename);
88 if ($similarity_score > MIN_SCORE_CUTOFF)
89 $list = array_merge($list,
90 array($pagename => $similarity_score));
94 function sortCollectedPages(&$list) {
95 array_multisort(&$list, SORT_NUMERIC, SORT_DESC);
100 function addTableCaption(&$table, &$dbi) {
101 if ($dbi->isWikiPage($this->_searchterm))
102 $link = WikiLink($this->_searchterm, 'auto');
104 $link = $this->_searchterm;
105 $caption = fmt("These page titles match fuzzy with '%s'", $link);
106 $table->pushContent(HTML::caption(array('align'=>'top'), $caption));
109 function addTableHead(&$table) {
110 $row = HTML::tr(HTML::td(_("Name")),
111 HTML::td(array('align' => 'right'), _("Score")));
113 $this->_pushDebugHeadingTDinto($row);
115 $table->pushContent(HTML::thead($row));
118 function addTableBody(&$list, &$table) {
119 if (! defined('HIGHLIGHT_ROWS_CUTOFF_SCORE'))
120 define('HIGHLIGHT_ROWS_CUTOFF_SCORE', 60);
122 $tbody = HTML::tbody();
123 foreach ($list as $found_pagename => $score) {
124 $row = HTML::tr(array('class' =>
125 $score > HIGHLIGHT_ROWS_CUTOFF_SCORE
126 ? 'oddrow' : 'evenrow'),
127 HTML::td(WikiLink($found_pagename)),
128 HTML::td(array('align' => 'right'),
132 $this->_pushDebugTDinto($row, $found_pagename);
134 $tbody->pushContent($row);
136 $table->pushContent($tbody);
139 function formatTable(&$list, &$dbi) {
141 $table = HTML::table(array('cellpadding' => 2,
144 'class' => 'pagelist'));
145 $this->addTableCaption($table, &$dbi);
146 $this->addTableHead($table);
147 $this->addTableBody($list, $table);
153 function run($dbi, $argstr, $request) {
154 $args = $this->getArgs($argstr, $request);
158 $this->debug = $debug;
160 $this->_searchterm = $s ? $s : $page;
161 $this->_list = array();
163 $this->collectSimilarPages($this->_list, &$dbi);
165 $this->sortCollectedPages($this->_list);
167 return $this->formatTable($this->_list, &$dbi);
172 function _pushDebugHeadingTDinto(&$row) {
173 $row->pushContent(HTML::td(_("Spelling Score")),
174 HTML::td(_("Sound Score")),
175 HTML::td('Metaphones'));
178 function _pushDebugTDinto(&$row, $pagename) {
179 // This actually calculates everything a second time for each page
180 // but the individual scores are retained separately.
181 $debug_spelling = round($this->spelling_similarity($pagename), 1);
182 $debug_sound = round($this->sound_similarity($pagename), 1);
183 $debug_metaphone = sprintf("(%s, %s)", metaphone($pagename),
184 $this->_searchterm_metaphone);
186 $row->pushcontent(HTML::td(array('align' => 'center'), $debug_spelling),
187 HTML::td(array('align' => 'center'), $debug_sound),
188 HTML::td($debug_metaphone));
196 // c-hanging-comment-ender-p: nil
197 // indent-tabs-mode: nil