1 <!-- $Id: wiki_diff.php3,v 1.5 2000-07-03 23:55:22 dairiki Exp $ -->
5 // A PHP diff engine for phpwiki.
7 // Copyright (C) 2000 Geoffrey T. Dairiki <dairiki@dairiki.org>
8 // You may copy this code freely under the conditions of the GPL.
12 * Class used internally by WikiDiff to actually compute the diffs.
14 * The algorithm used here is mostly lifted from the perl module
15 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
16 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
18 * More ideas are taken from:
19 * http://www.ics.uci.edu/~eppstein/161/960229.html
21 * Some ideas are (and a bit of code) are from from analyze.c, from GNU
22 * diffutils-2.7, which can be found at:
23 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
25 * Finally, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
30 var $edits; // List of editing operation to convert XV to YV.
32 function _WikiDiffEngine ($from_lines, $to_lines)
34 $n_from = sizeof($from_lines);
35 $n_to = sizeof($to_lines);
37 // Ignore trailing and leading matching lines.
38 while ($n_from > 0 && $n_to > 0)
40 if ($from_lines[$n_from - 1] != $to_lines[$n_to - 1])
46 for ( $skip = 0; $skip < min($n_from, $n_to); $skip++)
47 if ($from_lines[$skip] != $to_lines[$skip])
52 // Ignore lines which do not exist in both files.
53 for ($x = 0; $x < $n_from; $x++)
54 $xhash[$from_lines[$x + $skip]] = 1;
55 for ($y = 0; $y < $n_to; $y++)
57 $line = $to_lines[$y + $skip];
59 if ( ($this->ychanged[$y] = ! $xhash[$line]) )
65 for ($x = 0; $x < $n_from; $x++)
67 $line = $from_lines[$x + $skip];
69 if ( ($this->xchanged[$x] = ! $yhash[$line]) )
77 $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
79 // Merge edits when possible
80 $this->_shift_boundaries($xlines, $this->xchanged, $this->ychanged);
81 $this->_shift_boundaries($ylines, $this->ychanged, $this->xchanged);
83 // Compute the edit operations.
84 $this->edits = array();
86 $this->edits[] = $skip;
90 while ($x < $n_from || $y < $n_to)
93 if ( ($y == $n_to && !$this->xchanged[$x])
94 || ($x == $n_from && !$this->ychanged[$y]) )
95 die("assertion error");
98 // Skip matching "snake".
100 while ( $x < $n_from && $y < $n_to
101 && !$this->xchanged[$x] && !$this->ychanged[$y])
108 $this->edits[] = $x - $x0;
112 while ($x < $n_from && $this->xchanged[$x])
118 $this->edits[] = -($x - $x0);
121 if ($this->ychanged[$y])
124 while ($y < $n_to && $this->ychanged[$y])
125 $adds[] = "" . $ylines[$y++];
126 $this->edits[] = $adds;
130 $this->edits[] = $endskip;
133 /* Divide the Largest Common Subsequence (LCS) of the sequences
134 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
137 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
138 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
139 * sub sequences. The first sub-sequence is contained in [X0, X1),
140 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
141 * that (X0, Y0) == (XOFF, YOFF) and
142 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
144 * This function assumes that the first lines of the specified portions
145 * of the two files do not match, and likewise that the last lines do not
146 * match. The caller must trim matching lines from the beginning and end
147 * of the portions it is going to specify.
149 function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks)
151 if ($xlim - $xoff > $ylim - $yoff)
153 // Things seems faster (I'm not sure I understand why)
154 // when the shortest sequence in X.
156 list ($xoff, $xlim, $yoff, $ylim)
157 = array( $yoff, $ylim, $xoff, $xlim);
161 for ($i = $ylim - 1; $i >= $yoff; $i--)
162 $ymatches[$this->xv[$i]][] = $i;
164 for ($i = $ylim - 1; $i >= $yoff; $i--)
165 $ymatches[$this->yv[$i]][] = $i;
168 $this->seq[0]= $yoff - 1;
169 $this->in_seq = array();
172 $numer = $xlim - $xoff + $nchunks - 1;
174 for ($chunk = 0; $chunk < $nchunks; $chunk++)
177 for ($i = 0; $i <= $this->lcs; $i++)
178 $ymids[$i][$chunk-1] = $this->seq[$i];
180 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
181 for ( ; $x < $x1; $x++)
183 $matches = $ymatches[$flip ? $this->yv[$x] : $this->xv[$x]];
187 while (list ($junk, $y) = each($matches))
188 if (! $this->in_seq[$y])
190 $k = $this->_lcs_pos($y);
191 //if (!$k) die('assertion "!$k" failed');
192 $ymids[$k] = $ymids[$k-1];
195 while (list ($junk, $y) = each($matches))
197 if ($y > $this->seq[$k-1])
199 //if ($y >= $this->seq[$k]) die('assertion failed');
200 // Optimization: this is a common case:
201 // next match is just replacing previous match.
202 $this->in_seq[$this->seq[$k]] = false;
204 $this->in_seq[$y] = 1;
206 else if (! $this->in_seq[$y])
208 $k = $this->_lcs_pos($y);
209 //if (!$k) die('assertion "!$k" failed');
210 $ymids[$k] = $ymids[$k-1];
216 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
217 $ymid = $ymids[$this->lcs];
218 for ($n = 0; $n < $nchunks - 1; $n++)
220 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
222 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
224 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
226 return array($this->lcs, $seps);
229 function _lcs_pos ($ypos)
232 if ($end == 0 || $ypos > $this->seq[$end])
234 $this->seq[++$this->lcs] = $ypos;
235 $this->in_seq[$ypos] = 1;
242 $mid = (int)(($beg + $end) / 2);
243 if ( $ypos > $this->seq[$mid] )
249 //if ($ypos == $this->seq[$end]) die("assertion failure");
251 $this->in_seq[$this->seq[$end]] = false;
252 $this->seq[$end] = $ypos;
253 $this->in_seq[$ypos] = 1;
257 /* Find LCS of two sequences.
259 * The results are recorded in the vectors $this->{x,y}changed[], by
260 * storing a 1 in the element for each line that is an insertion
261 * or deletion (ie. is not in the LCS).
263 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
265 * Note that XLIM, YLIM are exclusive bounds.
266 * All line numbers are origin-0 and discarded lines are not counted.
268 function _compareseq ($xoff, $xlim, $yoff, $ylim)
270 // Slide down the bottom initial diagonal.
271 while ($xoff < $xlim && $yoff < $ylim
272 && $this->xv[$xoff] == $this->yv[$yoff])
278 // Slide up the top initial diagonal.
279 while ($xlim > $xoff && $ylim > $yoff
280 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1])
286 if ($xoff == $xlim || $yoff == $ylim)
290 // This is ad hoc but seems to work well.
291 //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
292 //$nchunks = max(2,min(8,(int)$nchunks));
293 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
295 = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
300 // X and Y sequences have no common subsequence:
302 while ($yoff < $ylim)
303 $this->ychanged[$this->yind[$yoff++]] = 1;
304 while ($xoff < $xlim)
305 $this->xchanged[$this->xind[$xoff++]] = 1;
309 // Use the partitions to split this problem into subproblems.
312 while ($pt2 = next($seps))
314 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
320 /* Adjust inserts/deletes of identical lines to join changes
321 * as much as possible.
323 * We do something when a run of changed lines include a
324 * line at one end and have an excluded, identical line at the other.
325 * We are free to choose which identical line is included.
326 * `compareseq' usually chooses the one at the beginning,
327 * but usually it is cleaner to consider the following identical line
328 * to be the "change".
330 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
332 function _shift_boundaries ($lines, &$changed, $other_changed)
336 $len = sizeof($lines);
340 * Scan forwards to find beginning of another run of changes.
341 * Also keep track of the corresponding point in the other file.
343 while ($i < $len && $changed[$i] == 0)
345 while ($other_changed[$j++])
355 // Find the end of this run of changes.
356 while ($changed[++$i])
358 while ($other_changed[$j])
364 * Record the length of this run of changes, so that
365 * we can later determine whether the run has grown.
367 $runlength = $i - $start;
370 * Move the changed region back, so long as the
371 * previous unchanged line matches the last changed one.
372 * This merges with previous changed regions.
374 while ($start && $lines[$start - 1] == $lines[$i - 1])
376 $changed[--$start] = 1;
377 $changed[--$i] = false;
378 while ($changed[$start - 1])
380 while ($other_changed[--$j])
385 * Set CORRESPONDING to the end of the changed run, at the last
386 * point where it corresponds to a changed run in the other file.
387 * CORRESPONDING == LEN means no such point has been found.
389 $corresponding = $other_changed[$j - 1] ? $i : $len;
392 * Move the changed region forward, so long as the
393 * first changed line matches the following unchanged one.
394 * This merges with following changed regions.
395 * Do this second, so that if there are no merges,
396 * the changed region is moved forward as far as possible.
398 while ($i != $len && $lines[$start] == $lines[$i])
400 $changed[$start++] = false;
404 while ($other_changed[++$j])
408 while ($runlength != $i - $start);
411 * If possible, move the fully-merged run of changes
412 * back to a corresponding run in the other file.
414 while ($corresponding < $i)
416 $changed[--$start] = 1;
418 while ($other_changed[--$j])
426 * Class representing a diff between two files.
433 * Compute diff between files (or deserialize serialized WikiDiff.)
435 function WikiDiff($from_lines = false, $to_lines = false)
437 if ($from_lines && $to_lines)
439 $compute = new _WikiDiffEngine($from_lines, $to_lines);
440 $this->edits = $compute->edits;
442 else if ($from_lines)
444 // $from_lines is not really from_lines, but rather
445 // a serialized WikiDiff.
446 $this->edits = unserialize($from_lines);
450 $this->edits = array();
455 * Compute reversed WikiDiff.
459 * $diff = new WikiDiff($lines1, $lines2);
460 * $rev = $diff->reverse($lines1);
462 * // reconstruct $lines1 from $lines2:
463 * $out = $rev->apply($lines2);
465 function reverse ($from_lines)
470 for ( reset($this->edits);
471 $edit = current($this->edits);
476 // Was a copy --- just pass it through. }
480 { // Was a delete, turn it into an add.
483 while ($ndelete-- > 0)
484 $edit[] = "" . $from_lines[$x++];
486 else if (is_array($edit))
487 { // Was an add, turn it into a delete.
488 $nadd = sizeof($edit);
489 if ($nadd == 0) die("assertion error");
492 else die("assertion error");
494 $rev->edits[] = $edit;
501 * Compose (concatenate) WikiDiffs.
505 * $diff1 = new WikiDiff($lines1, $lines2);
506 * $diff2 = new WikiDiff($lines2, $lines3);
507 * $comp = $diff1->compose($diff2);
509 * // reconstruct $lines3 from $lines1:
510 * $out = $comp->apply($lines1);
512 function compose ($that)
517 $comp = new WikiDiff;
518 $left = current($this->edits);
519 $right = current($that->edits);
521 while ($left || $right)
524 { // Left op is a delete.
526 $left = next($this->edits);
528 else if (is_array($right))
529 { // Right op is an add.
531 $right = next($that->edits);
533 else if (!$left || !$right)
534 die ("assertion error");
536 { // Left op is a copy.
537 if ($left <= abs($right))
539 $newop = $right > 0 ? $left : -$left;
542 $right = next($that->edits);
543 $left = next($this->edits);
548 $left -= abs($right);
549 $right = next($that->edits);
553 { // Left op is an add.
554 if (!is_array($left)) die('assertion error');
555 $nleft = sizeof($left);
556 if ($nleft <= abs($right))
559 { // Right op is copy
563 else // Right op is delete
569 $right = next($that->edits);
570 $left = next($this->edits);
576 for ($i = 0; $i < $right; $i++)
577 $newop[] = $left[$i];
580 for ($i = abs($right); $i < $nleft; $i++)
583 $right = next($that->edits);
594 if (is_array($op) && is_array($newop))
596 // Both $op and $newop are adds.
597 for ($i = 0; $i < sizeof($newop); $i++)
600 else if (($op > 0 && $newop > 0) || ($op < 0 && $newop < 0))
601 { // $op and $newop are both either deletes or copies.
606 $comp->edits[] = $op;
611 $comp->edits[] = $op;
620 for (reset($this->edits);
621 $edit = current($this->edits);
628 echo "Delete " . -$edit;
629 else if (is_array($edit))
631 echo "Add " . sizeof($edit) . "<ul>";
632 for ($i = 0; $i < sizeof($edit); $i++)
633 echo "<li>" . htmlspecialchars($edit[$i]);
637 die("assertion error");
644 * Apply a WikiDiff to a set of lines.
648 * $diff = new WikiDiff($lines1, $lines2);
650 * // reconstruct $lines2 from $lines1:
651 * $out = $diff->apply($lines1);
653 function apply ($from_lines)
656 $xlim = sizeof($from_lines);
658 for ( reset($this->edits);
659 $edit = current($this->edits);
664 $output[] = $from_lines[$x++];
670 while (list ($junk, $line) = each($edit))
675 die("WikiDiff::apply: line count mismatch: $x != $xlim");
680 * Serialize a WikiDiff.
684 * $diff = new WikiDiff($lines1, $lines2);
685 * $string = $diff->serialize;
687 * // recover WikiDiff from serialized version:
688 * $diff2 = new WikiDiff($string);
690 function serialize ()
692 return serialize($this->edits);
696 * Compute the length of the Longest Common Subsequence (LCS).
698 * This is mostly for diagnostic purposed.
703 for (reset($this->edits);
704 $edit = current($this->edits);
714 * Check a WikiDiff for validity.
716 * This is here only for debugging purposes.
718 function _check ($from_lines, $to_lines)
720 $test = $this->apply($from_lines);
721 if (serialize($test) != serialize($to_lines))
722 die("WikiDiff::_check: failed");
725 $prev = current($this->edits);
726 $prevtype = $prev > 0 ? 'c' : ($prev < 0 ? 'd' : 'a');
728 while ($edit = next($this->edits))
730 $type = $edit > 0 ? 'c' : ($edit < 0 ? 'd' : 'a');
731 if ( $prevtype == $type )
732 die("WikiDiff::_check: edit sequence is non-optimal");
736 echo "<strong>WikiDiff Okay: LCS = $lcs</strong>\n";
742 * A class to format a WikiDiff as HTML.
746 * $diff = new WikiDiff($lines1, $lines2); // compute diff.
748 * $fmt = new WikiDiffFormatter;
749 * echo $fmt->format($diff, $lines1); // Output HTMLified standard diff.
751 * or to output reverse diff (diff's that would take $lines2 to $lines1):
753 * $fmt = new WikiDiffFormatter('reversed');
754 * echo $fmt->format($diff, $lines1);
756 class WikiDiffFormatter
759 var $do_reverse_diff;
760 var $context_prefix, $deletes_prefix, $adds_prefix;
762 function WikiDiffFormatter ($reverse = false)
764 $this->do_reverse_diff = $reverse;
765 $this->context_lines = 0;
766 $this->context_prefix = ' ';
767 $this->deletes_prefix = '< ';
768 $this->adds_prefix = '> ';
771 function format ($diff, $from_lines)
773 $html = '<table width="100%" bgcolor="black"' .
774 "cellspacing=2 cellpadding=2 border=0\n";
775 $html .= $this->_format($diff->edits, $from_lines);
776 $html .= "</table>\n";
781 function _format ($edits, $from_lines)
784 $xlim = sizeof($from_lines);
787 while ($edit = current($edits))
790 { // Edit op is a copy.
798 // Start of an output hunk.
799 $xoff = max(0, $x - $this->context_lines);
800 $yoff = $xoff + $y - $x;
803 // Get leading context.
805 for ($i = $xoff; $i < $x; $i++)
806 $context[] = $from_lines[$i];
807 $hunk['c'] = $context;
811 { // Edit op is a delete
814 $deletes[] = $from_lines[$x++];
815 $hunk[$this->do_reverse_diff ? 'a' : 'd'] = $deletes;
818 { // Edit op is an add.
819 if (!is_array($edit)) die("assertion error");
821 $hunk[$this->do_reverse_diff ? 'd' : 'a'] = $edit;
825 $next = next($edits);
828 if ( !$next || $ncopy > 2 * $this->context_lines)
830 // End of an output hunk.
834 $xend = min($x + $this->context_lines, $xlim);
837 // Get trailing context.
839 for ($i = $x; $i < $xend; $i++)
840 $context[] = $from_lines[$i];
841 $hunks[] = array('c' => $context);
844 $xlen = $xend - $xoff;
845 $ylen = $xend + $y - $x - $yoff;
846 $xbeg = $xlen ? $xoff + 1 : $xoff;
847 $ybeg = $ylen ? $yoff + 1 : $yoff;
849 if ($this->do_reverse_diff)
850 list ($xbeg, $xlen, $ybeg, $ylen)
851 = array($ybeg, $ylen, $xbeg, $xlen);
853 $html .= $this->_emit_diff($xbeg,$xlen,$ybeg,$ylen,
863 for ($i = $x; $i < $x + $ncopy; $i++)
864 $context[] = $from_lines[$i];
865 $hunk = array('c' => $context);
875 function _emit_lines($lines, $prefix, $color)
878 while (list ($junk, $line) = each($lines))
880 $html .= "<tr bgcolor=\"$color\"><td><tt>$prefix</tt>";
881 $html .= "<tt>" . htmlspecialchars($line) . "</tt></td></tr>\n";
886 function _emit_diff ($xbeg,$xlen,$ybeg,$ylen,$hunks)
888 $html = '<tr><td><table width="100%" bgcolor="white"'
889 . " cellspacing=0 border=0 cellpadding=4>\n"
890 . '<tr bgcolor="#cccccc"><td><tt>'
891 . $this->_diff_header($xbeg, $xlen, $ybeg, $ylen)
892 . "</tt></td></tr>\n<tr><td>\n"
893 . "<table width=\"100%\" cellspacing=0 border=0 cellpadding=2>\n";
895 $prefix = array('c' => $this->context_prefix,
896 'a' => $this->adds_prefix,
897 'd' => $this->deletes_prefix);
898 $color = array('c' => '#ffffff',
902 for (reset($hunks); $hunk = current($hunks); next($hunks))
904 if ($lines = $hunk['c'])
905 $html .= $this->_emit_lines($lines,
906 $this->context_prefix, '#ffffff');
907 if ($lines = $hunk['d'])
908 $html .= $this->_emit_lines($lines,
909 $this->deletes_prefix, '#ccffcc');
910 if ($lines = $hunk['a'])
911 $html .= $this->_emit_lines($lines,
912 $this->adds_prefix, '#ffcccc');
915 $html .= "</table></td></tr></table></td></tr>\n";
919 function _diff_header ($xbeg,$xlen,$ybeg,$ylen)
921 $what = $xlen ? ($ylen ? 'c' : 'd') : 'a';
922 $xlen = $xlen > 1 ? "," . ($xbeg + $xlen - 1) : '';
923 $ylen = $ylen > 1 ? "," . ($ybeg + $ylen - 1) : '';
925 return "$xbeg$xlen$what$ybeg$ylen";
930 * A class to format a WikiDiff as a pretty HTML unified diff.
934 * $diff = new WikiDiff($lines1, $lines2); // compute diff.
936 * $fmt = new WikiUnifiedDiffFormatter;
937 * echo $fmt->format($diff, $lines1); // Output HTMLified unified diff.
939 class WikiUnifiedDiffFormatter extends WikiDiffFormatter
941 function WikiUnifiedDiffFormatter ($reverse = false, $context_lines = 3)
943 $this->do_reverse_diff = $reverse;
944 $this->context_lines = $context_lines;
945 $this->context_prefix = ' ';
946 $this->deletes_prefix = '-';
947 $this->adds_prefix = '+';
950 function _diff_header ($xbeg,$xlen,$ybeg,$ylen)
952 $xlen = $xlen == 1 ? '' : ",$xlen";
953 $ylen = $ylen == 1 ? '' : ",$ylen";
955 return "@@ -$xbeg$xlen +$ybeg$ylen @@";
961 /////////////////////////////////////////////////////////////////
967 $wiki = RetrievePage($dbi, $pagename);
968 $dba = OpenDataBase($ArchiveDataBase);
969 $archive= RetrievePage($dba, $pagename);
971 $html = '<table><tr><td align="right">Current page:</td>';
973 $html .= "<td>version $wiki[version],</td><td>last modified on "
974 . date($datetimeformat, $wiki['lastmodified'])
975 . "</td><td>by $wiki[author]</td>";
977 $html .= "<td colspan=3><em>None</em></td>";
978 $html .= '</tr><tr><td align="right">Archived page:</td>';
979 if (is_array($archive))
980 $html .= "<td>version $archive[version],</td><td>last modified on "
981 . date($datetimeformat, $archive['lastmodified'])
982 . "</td><td>by $archive[author]</td>";
984 $html .= "<td colspan=3><em>None</em></td>";
985 $html .= "</tr></table><p>\n";
988 if (is_array($wiki) && is_array($archive))
990 $diff = new WikiDiff($archive['content'], $wiki['content']);
991 //$fmt = new WikiDiffFormatter;
992 $fmt = new WikiUnifiedDiffFormatter;
993 $html .= $fmt->format($diff, $archive['content']);
996 GeneratePage('MESSAGE', $html, 'Diff of '.htmlspecialchars($pagename), 0);