1 <!-- $Id: wiki_diff.php3,v 1.4 2000-07-02 15:54:44 wainstead Exp $ -->
5 // A PHP diff engine for phpwiki.
7 // Copyright (C) 2000 Geoffrey T. Dairiki <dairiki@dairiki.org>
8 // You may copy this code freely under the conditions of the GPL.
13 * Class used internally by WikiDiff to actually compute the diffs.
15 * The algorithm and much of the code for this class is lifted straight
16 * from analyze.c (from GNU diffutils-2.7).
18 * GNU diffutils can be found at:
19 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
21 class _WikiDiffEngine {
22 var $xv, $yv; // Vectors being compared.
23 var $xchanged, $ychanged; // Flags for changed lines in XV and YV.
24 var $edits; // List of editing operation to convert XV to YV.
26 function _WikiDiffEngine ($from_lines, $to_lines)
28 $this->xv = $from_lines;
29 $this->yv = $to_lines;
34 $xlim = sizeof($this->xv);
35 $ylim = sizeof($this->yv);
37 // Compute xchanged and ychanged.
38 $this->_compareseq(0, $xlim, 0, $ylim);
40 $this->edits = array();
42 while ($x < $xlim || $y < $ylim)
46 // Skip matching "snake".
47 while ($x < $xlim && $y < $ylim
48 && !$this->xchanged[$x] && !$this->ychanged[$y])
56 while ($x < $xlim && $this->xchanged[$x])
64 while ($y < $ylim && $this->ychanged[$y])
65 $edit['add'][] = $this->yv[$y++];
67 if ($edit['del'] || $edit['add'])
68 $this->edits[] = $edit;
72 /* Find the midpoint of the shortest edit script for a specified
73 portion of the two files.
75 Scan from the beginnings of the files, and simultaneously from the ends,
76 doing a breadth-first search through the space of edit-sequence.
77 When the two searches meet, we have found the midpoint of the shortest
80 Returns (C, XMID, YMID). C is the approximate edit cost; this is
81 the total number of lines inserted or deleted (counting only lines
82 before the midpoint). (XMID, YMID) is the midpoint.
84 This function assumes that the first lines of the specified portions
85 of the two files do not match, and likewise that the last lines do not
86 match. The caller must trim matching lines from the beginning and end
87 of the portions it is going to specify. */
88 function _diag ($xoff, $xlim, $yoff, $ylim)
90 $dmin = $xoff - $ylim; // Minimum valid diagonal.
91 $doff = 1 - $dmin; // Offset to assure index is always non-neg.
93 $_dmax = $xlim - $yoff + $doff; // Maximum valid diagonal
94 $_fmid = $xoff - $yoff + $doff; // Center diagonal of top-down search
95 $_bmid = $xlim - $ylim + $doff; // Center diagonal of bottom-up search
96 $_fmin = $_fmid; $_fmax = $_fmid; // Limits of top-down search.
97 $_bmin = $_bmid; $_bmax = $_bmid; // Limits of bottom-up search.
98 $is_odd = ($_fmid - $_bmid) & 1; /* True if southeast corner is on
99 * an odd diagonal with respect to
107 //int d; /* Active diagonal. */
110 // Extend the top-down search by an edit step in each diagonal.
112 $fd[--$_fmin - 1] = -1;
116 $fd[++$_fmax + 1] = -1;
120 for ($_d = $_fmax; $_d >= $_fmin; $_d -= 2)
130 $y = $x - ($_d - $doff);
131 while ($x < $xlim && $y < $ylim
132 && $this->xv[$x] == $this->yv[$y])
138 if ($is_odd && $_bmin <= $_d && $_d <= $_bmax
141 return array(2 * $c - 1, $x, $y);
145 /* Similarly extend the bottom-up search. */
147 $bd[--$_bmin - 1] = $xlim + 1;
150 $bd[++$_bmax + 1] = $xlim + 1;
153 for ($_d = $_bmax; $_d >= $_bmin; $_d -= 2)
155 $tlo = $bd[$_d - 1]; $thi = $bd[$_d + 1];
162 $y = $x - ($_d - $doff);
163 while ($x > $xoff && $y > $yoff
164 && $this->xv[$x - 1] == $this->yv[$y - 1])
170 if (!$is_odd && $_fmin <= $_d && $_d <= $_fmax
173 return array(2 * $c, $x, $y);
176 /* FIXME: add heuristics to avoid slowness?
178 * I've deleted from the diffutils algorithm two hairy
179 * heuristics which are desingned to keep the algorithm
180 * efficient even on large files. (The algorithm as
181 * implemented here on O(n^2).)
183 * I believe that As long as we're dealing with files
184 * under a few hundred lines long this is not really
190 /* Compare in detail contiguous subsequences of the two files
191 which are known, as a whole, to match each other.
193 The results are recorded in the vectors $this->{x,y}changed[], by
194 storing a 1 in the element for each line that is an insertion
197 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
199 Note that XLIM, YLIM are exclusive bounds.
200 All line numbers are origin-0 and discarded lines are not counted. */
201 function _compareseq ($xoff, $xlim, $yoff, $ylim)
203 /* Slide down the bottom initial diagonal. */
204 while ($xoff < $xlim && $yoff < $ylim
205 && $this->xv[$xoff] == $this->yv[$yoff])
211 /* Slide up the top initial diagonal. */
212 while ($xlim > $xoff && $ylim > $yoff
213 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1])
219 /* Handle simple cases. */
222 while ($yoff < $ylim)
223 $this->ychanged[$yoff++] = 1;
225 else if ($yoff == $ylim)
227 while ($xoff < $xlim)
228 $this->xchanged[$xoff++] = 1;
232 // Find a point of correspondence in the middle of the files.
233 list ($c, $xmid, $ymid) = $this->_diag($xoff, $xlim,
238 /* This should be impossible, because it implies that
239 one of the two subsequences is empty,
240 and that case was handled above without calling `diag'.
241 Let's verify that this is true. */
242 die("This is impossible"); //FIXME
245 // Use the partitions to split this problem into subproblems.
246 $this->_compareseq ($xoff, $xmid, $yoff, $ymid);
247 $this->_compareseq ($xmid, $xlim, $ymid, $ylim);
258 function WikiDiff($from_lines = false, $to_lines = false)
260 if ($from_lines && $to_lines)
262 $compute = new _WikiDiffEngine($from_lines, $to_lines);
263 $this->edits = $compute->edits;
265 else if ($from_lines)
267 // $from_lines is not really from_lines, but rather
268 // a serialized WikiDiff.
269 $this->edits = unserialize($from_lines);
273 function apply ($from_lines)
276 $xlim = sizeof($from_lines);
278 for ( reset($this->edits);
279 $edit = current($this->edits);
283 for ($i = 0; $i < $edit['cp']; $i++)
284 $output[] = $from_lines[$x++];
286 if ($adds = $edit['add'])
287 for ( reset($adds); $add = current($adds); next($adds) )
291 $output[] = $from_lines[$x++];
293 die("WikiDiff::apply: line count mismatch: $x != $xlim");
297 function serialize ()
299 return serialize($this->edits);
302 function unserialize ($serial)
304 $this->edits = unserialize($serial);
308 class WikiDiffFormatter {
311 var $context_prefix, $deletes_prefix, $adds_prefix;
313 function WikiDiffFormatter ()
315 $this->context_lines = 0;
316 $this->context_prefix = ' ';
317 $this->deletes_prefix = '< ';
318 $this->adds_prefix = '> ';
321 function format ($diff, $from_lines)
323 $html = '<table width="100%" bgcolor="black"' .
324 "cellspacing=2 cellpadding=2 border=0\n";
325 $html .= $this->_format($diff->edits, $from_lines);
326 $html .= "</table>\n";
331 function _format ($edits, $from_lines)
334 $xlim = sizeof($from_lines);
337 while ($edit = current($edits))
342 // Copy leading context.
344 $cc = max(0, $x - $this->context_lines);
348 $yoff = $xoff + $y - $x;
351 $hunk['context'][] = $from_lines[$cc++];
354 for ($i = 0; $i < $edit['del']; $i++)
355 $hunk['deletes'][] = $from_lines[$x++];
358 if ($adds = $edit['add'])
359 for ( reset($adds); $add = current($adds); next($adds) )
361 $hunk['adds'][] = $add;
368 // Update context pointer
371 if (!($edit = next($edits))
372 || $edit['cp'] > 2 * $this->context_lines)
374 // Copy trailing context
375 while ($cc < min($x + $this->context_lines, $xlim))
376 $hunk['context'][] = $from_lines[$cc++];
378 $ylen = $cc + $y - $x - $yoff;;
383 $xbeg = $xlen ? $xoff + 1 : $xoff;
384 $ybeg = $ylen ? $yoff + 1 : $yoff;
385 $html .= $this->_emit_diff( $xbeg,$xlen,$ybeg,$ylen, $hunks );
393 function _emit_lines($lines, $prefix, $color)
395 if (! is_array($lines))
398 for (reset($lines); $line = current($lines); next($lines))
400 $html .= "<tr bgcolor=\"$color\"><td><tt>$prefix</tt>";
401 $html .= htmlspecialchars($line) . "</td></tr>\n";
407 function _emit_diff ($xbeg,$xlen,$ybeg,$ylen,$hunks)
409 $html = '<tr><td><table width="100%" bgcolor="white"'
410 . " cellspacing=0 border=0 cellpadding=4>\n"
411 . '<tr bgcolor="#cccccc"><td><tt>'
412 . $this->_diff_header($xbeg, $xlen, $ybeg, $ylen)
413 . "</tt></td></tr>\n<tr><td>\n"
414 . "<table width=\"100%\" cellspacing=0 border=0 cellpadding=2>\n";
416 for (reset($hunks); $hunk = current($hunks); next($hunks))
418 $html .= $this->_emit_lines($hunk['context'],
419 $this->context_prefix,
421 $html .= $this->_emit_lines($hunk['deletes'],
422 $this->deletes_prefix,
424 $html .= $this->_emit_lines($hunk['adds'],
429 $html .= "</table></td></tr></table></td></tr>\n";
433 function _diff_header ($xbeg,$xlen,$ybeg,$ylen)
435 $what = $xlen ? ($ylen ? 'c' : 'd') : 'a';
436 $xlen = $xlen > 1 ? "," . ($xbeg + $xlen - 1) : '';
437 $ylen = $ylen > 1 ? "," . ($ybeg + $ylen - 1) : '';
439 return "$xbeg$xlen$what$ybeg$ylen";
443 class WikiUnifiedDiffFormatter extends WikiDiffFormatter {
444 function WikiUnifiedDiffFormatter ($context_lines = 3)
446 $this->context_lines = $context_lines;
447 $this->context_prefix = ' ';
448 $this->deletes_prefix = '-';
449 $this->adds_prefix = '+';
452 function _diff_header ($xbeg,$xlen,$ybeg,$ylen)
454 $xlen = $xlen == 1 ? '' : ",$xlen";
455 $ylen = $ylen == 1 ? '' : ",$ylen";
457 return "@@ -$xbeg$xlen +$ybeg$ylen @@";
463 /////////////////////////////////////////////////////////////////
467 $wiki = RetrievePage($dbi, $pagename);
468 $dba = OpenDataBase($ArchiveDataBase);
469 $archive= RetrievePage($dba, $pagename);
471 $html = '<table><tr><td align="right">Current page:</td>';
473 $html .= "<td>version $wiki[version],</td><td>last modified on "
474 . date($datetimeformat, $wiki['lastmodified'])
475 . "</td><td>by $wiki[author]</td>";
477 $html .= "<td colspan=3><em>None</em></td>";
478 $html .= '</tr><tr><td align="right">Archived page:</td>';
479 if (is_array($archive))
480 $html .= "<td>version $archive[version],</td><td>last modified on "
481 . date($datetimeformat, $archive['lastmodified'])
482 . "</td><td>by $archive[author]</td>";
484 $html .= "<td colspan=3><em>None</em></td>";
485 $html .= "</tr></table><p>\n";
488 if (is_array($wiki) && is_array($archive))
490 $diff = new WikiDiff($archive['content'], $wiki['content']);
491 //$fmt = new WikiDiffFormatter();
492 $fmt = new WikiUnifiedDiffFormatter();
493 $html .= $fmt->format($diff, $archive['content']);
496 GeneratePage('MESSAGE', $html, 'Diff of '.htmlspecialchars($pagename), 0);