4 // A PHP diff engine for phpwiki.
6 // Copyright (C) 2000 Geoffrey T. Dairiki <dairiki@dairiki.org>
7 // You may copy this code freely under the conditions of the GPL.
12 * Class used internally by WikiDiff to actually compute the diffs.
14 * The algorithm and much of the code for this class is lifted straight
15 * from analyze.c (from GNU diffutils-2.7).
17 * GNU diffutils can be found at:
18 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
20 class _WikiDiffEngine {
21 var $xv, $yv; // Vectors being compared.
22 var $xchanged, $ychanged; // Flags for changed lines in XV and YV.
23 var $edits; // List of editing operation to convert XV to YV.
25 function _WikiDiffEngine ($from_lines, $to_lines)
27 $this->xv = $from_lines;
28 $this->yv = $to_lines;
33 $xlim = sizeof($this->xv);
34 $ylim = sizeof($this->yv);
36 // Compute xchanged and ychanged.
37 $this->_compareseq(0, $xlim, 0, $ylim);
39 while ($x < $xlim || $y < $ylim)
43 // Skip matching "snake".
44 while ($x < $xlim && $y < $ylim
45 && !$this->xchanged[$x] && !$this->ychanged[$y])
53 while ($x < $xlim && $this->xchanged[$x])
61 while ($y < $ylim && $this->ychanged[$y])
62 $edit['add'][] = $this->yv[$y++];
64 if ($edit['del'] || $edit['add'])
65 $this->edits[] = $edit;
69 /* Find the midpoint of the shortest edit script for a specified
70 portion of the two files.
72 Scan from the beginnings of the files, and simultaneously from the ends,
73 doing a breadth-first search through the space of edit-sequence.
74 When the two searches meet, we have found the midpoint of the shortest
77 Returns (C, XMID, YMID). C is the approximate edit cost; this is
78 the total number of lines inserted or deleted (counting only lines
79 before the midpoint). (XMID, YMID) is the midpoint.
81 This function assumes that the first lines of the specified portions
82 of the two files do not match, and likewise that the last lines do not
83 match. The caller must trim matching lines from the beginning and end
84 of the portions it is going to specify. */
85 function _diag ($xoff, $xlim, $yoff, $ylim)
87 $dmin = $xoff - $ylim; // Minimum valid diagonal.
88 $doff = 1 - $dmin; // Offset to assure index is always non-neg.
90 $_dmax = $xlim - $yoff + $doff; // Maximum valid diagonal
91 $_fmid = $xoff - $yoff + $doff; // Center diagonal of top-down search
92 $_bmid = $xlim - $ylim + $doff; // Center diagonal of bottom-up search
93 $_fmin = $_fmid; $_fmax = $_fmid; // Limits of top-down search.
94 $_bmin = $_bmid; $_bmax = $_bmid; // Limits of bottom-up search.
95 $is_odd = ($_fmid - $_bmid) & 1; /* True if southeast corner is on
96 * an odd diagonal with respect to
104 //int d; /* Active diagonal. */
107 // Extend the top-down search by an edit step in each diagonal.
109 $fd[--$_fmin - 1] = -1;
113 $fd[++$_fmax + 1] = -1;
117 for ($_d = $_fmax; $_d >= $_fmin; $_d -= 2)
127 $y = $x - ($_d - $doff);
128 while ($x < $xlim && $y < $ylim
129 && $this->xv[$x] == $this->yv[$y])
135 if ($is_odd && $_bmin <= $_d && $_d <= $_bmax
138 return array(2 * $c - 1, $x, $y);
142 /* Similarly extend the bottom-up search. */
144 $bd[--$_bmin - 1] = $xlim + 1;
147 $bd[++$_bmax + 1] = $xlim + 1;
150 for ($_d = $_bmax; $_d >= $_bmin; $_d -= 2)
152 $tlo = $bd[$_d - 1]; $thi = $bd[$_d + 1];
159 $y = $x - ($_d - $doff);
160 while ($x > $xoff && $y > $yoff
161 && $this->xv[$x - 1] == $this->yv[$y - 1])
167 if (!$is_odd && $_fmin <= $_d && $_d <= $_fmax
170 return array(2 * $c, $x, $y);
173 /* FIXME: add heuristics to avoid slowness?
175 * I've deleted from the diffutils algorithm two hairy
176 * heuristics which are desingned to keep the algorithm
177 * efficient even on large files. (The algorithm as
178 * implemented here on O(n^2).)
180 * I believe that As long as we're dealing with files
181 * under a few hundred lines long this is not really
187 /* Compare in detail contiguous subsequences of the two files
188 which are known, as a whole, to match each other.
190 The results are recorded in the vectors $this->{x,y}changed[], by
191 storing a 1 in the element for each line that is an insertion
194 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
196 Note that XLIM, YLIM are exclusive bounds.
197 All line numbers are origin-0 and discarded lines are not counted. */
198 function _compareseq ($xoff, $xlim, $yoff, $ylim)
200 /* Slide down the bottom initial diagonal. */
201 while ($xoff < $xlim && $yoff < $ylim
202 && $this->xv[$xoff] == $this->yv[$yoff])
208 /* Slide up the top initial diagonal. */
209 while ($xlim > $xoff && $ylim > $yoff
210 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1])
216 /* Handle simple cases. */
219 while ($yoff < $ylim)
220 $this->ychanged[$yoff++] = 1;
222 else if ($yoff == $ylim)
224 while ($xoff < $xlim)
225 $this->xchanged[$xoff++] = 1;
229 // Find a point of correspondence in the middle of the files.
230 list ($c, $xmid, $ymid) = $this->_diag($xoff, $xlim,
235 /* This should be impossible, because it implies that
236 one of the two subsequences is empty,
237 and that case was handled above without calling `diag'.
238 Let's verify that this is true. */
239 die("This is impossible"); //FIXME
242 // Use the partitions to split this problem into subproblems.
243 $this->_compareseq ($xoff, $xmid, $yoff, $ymid);
244 $this->_compareseq ($xmid, $xlim, $ymid, $ylim);
255 function WikiDiff($from_lines = false, $to_lines = false)
257 if ($from_lines && $to_lines)
259 $compute = new _WikiDiffEngine($from_lines, $to_lines);
260 $this->edits = $compute->edits;
262 else if ($from_lines)
264 // $from_lines is not really from_lines, but rather
265 // a serialized WikiDiff.
266 $this->edits = unserialize($from_lines);
270 function apply ($from_lines)
273 $xlim = sizeof($from_lines);
275 for ( reset($this->edits);
276 $edit = current($this->edits);
280 for ($i = 0; $i < $edit['cp']; $i++)
281 $output[] = $from_lines[$x++];
283 if ($adds = $edit['add'])
284 for ( reset($adds); $add = current($adds); next($adds) )
288 $output[] = $from_lines[$x++];
290 die("WikiDiff::apply: line count mismatch: $x != $xlim");
294 function serialize ()
296 return serialize($this->edits);
299 function unserialize ($serial)
301 $this->edits = unserialize($serial);
305 class WikiDiffFormatter {
308 var $context_prefix, $deletes_prefix, $adds_prefix;
310 function WikiDiffFormatter ()
312 $this->context_lines = 0;
313 $this->context_prefix = ' ';
314 $this->deletes_prefix = '< ';
315 $this->adds_prefix = '> ';
318 function format ($diff, $from_lines)
320 $html = '<table width="100%" bgcolor="black"' .
321 "cellspacing=2 cellpadding=2 border=0\n";
322 $html .= $this->_format($diff->edits, $from_lines);
323 $html .= "</table>\n";
328 function _format ($edits, $from_lines)
331 $end_last_context = 0;
332 $xlim = sizeof($from_lines);
335 while ($edit = current($edits))
340 // Copy leading context.
341 $cc = max($end_last_context, $x - $this->context_lines);
345 $yoff = $xoff + $y - $x;
348 $hunk['context'][] = $from_lines[$cc++];
351 for ($i = 0; $i < $edit['del']; $i++)
352 $hunk['deletes'][] = $from_lines[$x++];
355 if ($adds = $edit['add'])
356 for ( reset($adds); $add = current($adds); next($adds) )
358 $hunk['adds'][] = $add;
365 // Copy trailing context.
367 while ($cc < min($x + $this->context_lines, $xlim))
368 $hunk['context'][] = $from_lines[$cc++];
369 $end_last_context = $cc;
371 $ylen = $cc + $y - $x - $yoff;;
373 if (!($edit = next($edits))
374 || $edit['cp'] > 2 * $this->context_lines)
378 $xbeg = $xlen ? $xoff + 1 : $xoff;
379 $ybeg = $ylen ? $yoff + 1 : $yoff;
380 $html .= $this->_emit_diff( $xbeg,$xlen,$ybeg,$ylen, $hunks );
388 function _emit_lines($lines, $prefix, $color)
390 if (! is_array($lines))
393 for (reset($lines); $line = current($lines); next($lines))
395 $html .= "<tr bgcolor=\"$color\"><td><tt>$prefix</tt>";
396 $html .= htmlspecialchars($line) . "</td></tr>\n";
402 function _emit_diff ($xbeg,$xlen,$ybeg,$ylen,$hunks)
404 $html = '<tr><td><table width="100%" bgcolor="white"'
405 . " cellspacing=0 border=0 cellpadding=4>\n"
407 . $this->_diff_header($xbeg, $xlen, $ybeg, $ylen)
408 . "</td></tr>\n<tr><td>\n"
409 . "<table width=\"100%\" cellspacing=0 border=0 cellpadding=2>\n";
411 for (reset($hunks); $hunk = current($hunks); next($hunks))
413 $html .= $this->_emit_lines($hunk['context'],
414 $this->context_prefix,
416 $html .= $this->_emit_lines($hunk['deletes'],
417 $this->deletes_prefix,
419 $html .= $this->_emit_lines($hunk['adds'],
424 $html .= "</table></td></tr></table></td></tr>\n";
428 function _diff_header ($xbeg,$xlen,$ybeg,$ylen)
430 $what = $xlen ? ($ylen ? 'c' : 'd') : 'a';
431 $xlen = $xlen > 1 ? "," . ($xbeg + $xlen - 1) : '';
432 $ylen = $ylen > 1 ? "," . ($ybeg + $ylen - 1) : '';
434 return "$xbeg$xlen$what$ybeg$ylen";
438 class WikiUnifiedDiffFormatter extends WikiDiffFormatter {
439 function WikiUnifiedDiffFormatter ($context_lines = 3)
441 $this->context_lines = $context_lines;
442 $this->context_prefix = ' ';
443 $this->deletes_prefix = '-';
444 $this->adds_prefix = '+';
447 function _diff_header ($xbeg,$xlen,$ybeg,$ylen)
449 $xlen = $xlen == 1 ? '' : ",$xlen";
450 $ylen = $ylen == 1 ? '' : ",$ylen";
452 return "@@ -$xbeg$xlen +$ybeg$ylen @@";
458 /////////////////////////////////////////////////////////////////
462 $wiki = RetrievePage($dbi, $pagename);
463 $dba = OpenDataBase($ArchiveDataBase);
464 $archive= RetrievePage($dba, $pagename);
466 if((!is_array($wiki)) || (!is_array($archive))) {
467 $html = 'There exists no archived version of the page, or the page itself does not exist.';
470 $diff = new WikiDiff($archive['content'], $wiki['content']);
471 $plain_fmt = new WikiDiffFormatter();
472 $html = $plain_fmt->format($diff, $archive['content']);
475 GeneratePage('MESSAGE', $html, 'Diff of '.htmlspecialchars($pagename), 0);