4 // A PHP diff engine for phpwiki.
6 // Copyright (C) 2000 Geoffrey T. Dairiki <dairiki@dairiki.org>
7 // You may copy this code freely under the conditions of the GPL.
12 * Class used internally by WikiDiff to actually compute the diffs.
14 * The algorithm and much of the code for this class is lifted straight
15 * from analyze.c (from GNU diffutils-2.7).
17 * GNU diffutils can be found at:
18 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
20 class _WikiDiffEngine {
21 var $xv, $yv; // Vectors being compared.
22 var $xchanged, $ychanged; // Flags for changed lines in XV and YV.
23 var $edits; // List of editing operation to convert XV to YV.
25 function _WikiDiffEngine ($from_lines, $to_lines)
27 $this->xv = $from_lines;
28 $this->yv = $to_lines;
33 $xlim = sizeof($this->xv);
34 $ylim = sizeof($this->yv);
36 // Compute xchanged and ychanged.
37 $this->_compareseq(0, $xlim, 0, $ylim);
39 $this->edits = array();
41 while ($x < $xlim || $y < $ylim)
45 // Skip matching "snake".
46 while ($x < $xlim && $y < $ylim
47 && !$this->xchanged[$x] && !$this->ychanged[$y])
55 while ($x < $xlim && $this->xchanged[$x])
63 while ($y < $ylim && $this->ychanged[$y])
64 $edit['add'][] = $this->yv[$y++];
66 if ($edit['del'] || $edit['add'])
67 $this->edits[] = $edit;
71 /* Find the midpoint of the shortest edit script for a specified
72 portion of the two files.
74 Scan from the beginnings of the files, and simultaneously from the ends,
75 doing a breadth-first search through the space of edit-sequence.
76 When the two searches meet, we have found the midpoint of the shortest
79 Returns (C, XMID, YMID). C is the approximate edit cost; this is
80 the total number of lines inserted or deleted (counting only lines
81 before the midpoint). (XMID, YMID) is the midpoint.
83 This function assumes that the first lines of the specified portions
84 of the two files do not match, and likewise that the last lines do not
85 match. The caller must trim matching lines from the beginning and end
86 of the portions it is going to specify. */
87 function _diag ($xoff, $xlim, $yoff, $ylim)
89 $dmin = $xoff - $ylim; // Minimum valid diagonal.
90 $doff = 1 - $dmin; // Offset to assure index is always non-neg.
92 $_dmax = $xlim - $yoff + $doff; // Maximum valid diagonal
93 $_fmid = $xoff - $yoff + $doff; // Center diagonal of top-down search
94 $_bmid = $xlim - $ylim + $doff; // Center diagonal of bottom-up search
95 $_fmin = $_fmid; $_fmax = $_fmid; // Limits of top-down search.
96 $_bmin = $_bmid; $_bmax = $_bmid; // Limits of bottom-up search.
97 $is_odd = ($_fmid - $_bmid) & 1; /* True if southeast corner is on
98 * an odd diagonal with respect to
106 //int d; /* Active diagonal. */
109 // Extend the top-down search by an edit step in each diagonal.
111 $fd[--$_fmin - 1] = -1;
115 $fd[++$_fmax + 1] = -1;
119 for ($_d = $_fmax; $_d >= $_fmin; $_d -= 2)
129 $y = $x - ($_d - $doff);
130 while ($x < $xlim && $y < $ylim
131 && $this->xv[$x] == $this->yv[$y])
137 if ($is_odd && $_bmin <= $_d && $_d <= $_bmax
140 return array(2 * $c - 1, $x, $y);
144 /* Similarly extend the bottom-up search. */
146 $bd[--$_bmin - 1] = $xlim + 1;
149 $bd[++$_bmax + 1] = $xlim + 1;
152 for ($_d = $_bmax; $_d >= $_bmin; $_d -= 2)
154 $tlo = $bd[$_d - 1]; $thi = $bd[$_d + 1];
161 $y = $x - ($_d - $doff);
162 while ($x > $xoff && $y > $yoff
163 && $this->xv[$x - 1] == $this->yv[$y - 1])
169 if (!$is_odd && $_fmin <= $_d && $_d <= $_fmax
172 return array(2 * $c, $x, $y);
175 /* FIXME: add heuristics to avoid slowness?
177 * I've deleted from the diffutils algorithm two hairy
178 * heuristics which are desingned to keep the algorithm
179 * efficient even on large files. (The algorithm as
180 * implemented here on O(n^2).)
182 * I believe that As long as we're dealing with files
183 * under a few hundred lines long this is not really
189 /* Compare in detail contiguous subsequences of the two files
190 which are known, as a whole, to match each other.
192 The results are recorded in the vectors $this->{x,y}changed[], by
193 storing a 1 in the element for each line that is an insertion
196 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
198 Note that XLIM, YLIM are exclusive bounds.
199 All line numbers are origin-0 and discarded lines are not counted. */
200 function _compareseq ($xoff, $xlim, $yoff, $ylim)
202 /* Slide down the bottom initial diagonal. */
203 while ($xoff < $xlim && $yoff < $ylim
204 && $this->xv[$xoff] == $this->yv[$yoff])
210 /* Slide up the top initial diagonal. */
211 while ($xlim > $xoff && $ylim > $yoff
212 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1])
218 /* Handle simple cases. */
221 while ($yoff < $ylim)
222 $this->ychanged[$yoff++] = 1;
224 else if ($yoff == $ylim)
226 while ($xoff < $xlim)
227 $this->xchanged[$xoff++] = 1;
231 // Find a point of correspondence in the middle of the files.
232 list ($c, $xmid, $ymid) = $this->_diag($xoff, $xlim,
237 /* This should be impossible, because it implies that
238 one of the two subsequences is empty,
239 and that case was handled above without calling `diag'.
240 Let's verify that this is true. */
241 die("This is impossible"); //FIXME
244 // Use the partitions to split this problem into subproblems.
245 $this->_compareseq ($xoff, $xmid, $yoff, $ymid);
246 $this->_compareseq ($xmid, $xlim, $ymid, $ylim);
257 function WikiDiff($from_lines = false, $to_lines = false)
259 if ($from_lines && $to_lines)
261 $compute = new _WikiDiffEngine($from_lines, $to_lines);
262 $this->edits = $compute->edits;
264 else if ($from_lines)
266 // $from_lines is not really from_lines, but rather
267 // a serialized WikiDiff.
268 $this->edits = unserialize($from_lines);
272 function apply ($from_lines)
275 $xlim = sizeof($from_lines);
277 for ( reset($this->edits);
278 $edit = current($this->edits);
282 for ($i = 0; $i < $edit['cp']; $i++)
283 $output[] = $from_lines[$x++];
285 if ($adds = $edit['add'])
286 for ( reset($adds); $add = current($adds); next($adds) )
290 $output[] = $from_lines[$x++];
292 die("WikiDiff::apply: line count mismatch: $x != $xlim");
296 function serialize ()
298 return serialize($this->edits);
301 function unserialize ($serial)
303 $this->edits = unserialize($serial);
307 class WikiDiffFormatter {
310 var $context_prefix, $deletes_prefix, $adds_prefix;
312 function WikiDiffFormatter ()
314 $this->context_lines = 0;
315 $this->context_prefix = ' ';
316 $this->deletes_prefix = '< ';
317 $this->adds_prefix = '> ';
320 function format ($diff, $from_lines)
322 $html = '<table width="100%" bgcolor="black"' .
323 "cellspacing=2 cellpadding=2 border=0\n";
324 $html .= $this->_format($diff->edits, $from_lines);
325 $html .= "</table>\n";
330 function _format ($edits, $from_lines)
333 $xlim = sizeof($from_lines);
336 while ($edit = current($edits))
341 // Copy leading context.
343 $cc = max(0, $x - $this->context_lines);
347 $yoff = $xoff + $y - $x;
350 $hunk['context'][] = $from_lines[$cc++];
353 for ($i = 0; $i < $edit['del']; $i++)
354 $hunk['deletes'][] = $from_lines[$x++];
357 if ($adds = $edit['add'])
358 for ( reset($adds); $add = current($adds); next($adds) )
360 $hunk['adds'][] = $add;
367 // Update context pointer
370 if (!($edit = next($edits))
371 || $edit['cp'] > 2 * $this->context_lines)
373 // Copy trailing context
374 while ($cc < min($x + $this->context_lines, $xlim))
375 $hunk['context'][] = $from_lines[$cc++];
377 $ylen = $cc + $y - $x - $yoff;;
382 $xbeg = $xlen ? $xoff + 1 : $xoff;
383 $ybeg = $ylen ? $yoff + 1 : $yoff;
384 $html .= $this->_emit_diff( $xbeg,$xlen,$ybeg,$ylen, $hunks );
392 function _emit_lines($lines, $prefix, $color)
394 if (! is_array($lines))
397 for (reset($lines); $line = current($lines); next($lines))
399 $html .= "<tr bgcolor=\"$color\"><td><tt>$prefix</tt>";
400 $html .= htmlspecialchars($line) . "</td></tr>\n";
406 function _emit_diff ($xbeg,$xlen,$ybeg,$ylen,$hunks)
408 $html = '<tr><td><table width="100%" bgcolor="white"'
409 . " cellspacing=0 border=0 cellpadding=4>\n"
410 . '<tr bgcolor="#cccccc"><td><tt>'
411 . $this->_diff_header($xbeg, $xlen, $ybeg, $ylen)
412 . "</tt></td></tr>\n<tr><td>\n"
413 . "<table width=\"100%\" cellspacing=0 border=0 cellpadding=2>\n";
415 for (reset($hunks); $hunk = current($hunks); next($hunks))
417 $html .= $this->_emit_lines($hunk['context'],
418 $this->context_prefix,
420 $html .= $this->_emit_lines($hunk['deletes'],
421 $this->deletes_prefix,
423 $html .= $this->_emit_lines($hunk['adds'],
428 $html .= "</table></td></tr></table></td></tr>\n";
432 function _diff_header ($xbeg,$xlen,$ybeg,$ylen)
434 $what = $xlen ? ($ylen ? 'c' : 'd') : 'a';
435 $xlen = $xlen > 1 ? "," . ($xbeg + $xlen - 1) : '';
436 $ylen = $ylen > 1 ? "," . ($ybeg + $ylen - 1) : '';
438 return "$xbeg$xlen$what$ybeg$ylen";
442 class WikiUnifiedDiffFormatter extends WikiDiffFormatter {
443 function WikiUnifiedDiffFormatter ($context_lines = 3)
445 $this->context_lines = $context_lines;
446 $this->context_prefix = ' ';
447 $this->deletes_prefix = '-';
448 $this->adds_prefix = '+';
451 function _diff_header ($xbeg,$xlen,$ybeg,$ylen)
453 $xlen = $xlen == 1 ? '' : ",$xlen";
454 $ylen = $ylen == 1 ? '' : ",$ylen";
456 return "@@ -$xbeg$xlen +$ybeg$ylen @@";
462 /////////////////////////////////////////////////////////////////
466 $wiki = RetrievePage($dbi, $pagename);
467 $dba = OpenDataBase($ArchiveDataBase);
468 $archive= RetrievePage($dba, $pagename);
470 $html = '<table><tr><td align="right">Current page:</td>';
472 $html .= "<td>version $wiki[version],</td><td>last modified on "
473 . date($datetimeformat, $wiki['lastmodified'])
474 . "</td><td>by $wiki[author]</td>";
476 $html .= "<td colspan=3><em>None</em></td>";
477 $html .= '</tr><tr><td align="right">Archived page:</td>';
478 if (is_array($archive))
479 $html .= "<td>version $archive[version],</td><td>last modified on "
480 . date($datetimeformat, $archive['lastmodified'])
481 . "</td><td>by $archive[author]</td>";
483 $html .= "<td colspan=3><em>None</em></td>";
484 $html .= "</tr></table><p>\n";
487 if (is_array($wiki) && is_array($archive))
489 $diff = new WikiDiff($archive['content'], $wiki['content']);
490 //$fmt = new WikiDiffFormatter();
491 $fmt = new WikiUnifiedDiffFormatter();
492 $html .= $fmt->format($diff, $archive['content']);
495 GeneratePage('MESSAGE', $html, 'Diff of '.htmlspecialchars($pagename), 0);