1 <!-- $Id: diff.php,v 1.4 2000-11-01 11:31:41 ahollosi Exp $ -->
5 // A PHP diff engine for phpwiki.
7 // Copyright (C) 2000 Geoffrey T. Dairiki <dairiki@dairiki.org>
8 // You may copy this code freely under the conditions of the GPL.
12 * Class used internally by WikiDiff to actually compute the diffs.
14 * The algorithm used here is mostly lifted from the perl module
15 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
16 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
18 * More ideas are taken from:
19 * http://www.ics.uci.edu/~eppstein/161/960229.html
21 * Some ideas are (and a bit of code) are from from analyze.c, from GNU
22 * diffutils-2.7, which can be found at:
23 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
25 * Finally, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
30 var $edits; // List of editing operation to convert XV to YV.
31 var $xv = array(), $yv = array();
33 function _WikiDiffEngine ($from_lines, $to_lines)
35 $n_from = sizeof($from_lines);
36 $n_to = sizeof($to_lines);
38 // Ignore trailing and leading matching lines.
39 while ($n_from > 0 && $n_to > 0)
41 if ($from_lines[$n_from - 1] != $to_lines[$n_to - 1])
47 for ( $skip = 0; $skip < min($n_from, $n_to); $skip++)
48 if ($from_lines[$skip] != $to_lines[$skip])
56 // Ignore lines which do not exist in both files.
57 for ($x = 0; $x < $n_from; $x++)
58 $xhash[$from_lines[$x + $skip]] = 1;
59 for ($y = 0; $y < $n_to; $y++)
61 $line = $to_lines[$y + $skip];
63 if ( ($this->ychanged[$y] = empty($xhash[$line])) )
69 for ($x = 0; $x < $n_from; $x++)
71 $line = $from_lines[$x + $skip];
73 if ( ($this->xchanged[$x] = empty($yhash[$line])) )
74 continue; // fixme? what happens to yhash/xhash when
75 // there are two identical lines??
82 $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
84 // Merge edits when possible
85 $this->_shift_boundaries($xlines, $this->xchanged, $this->ychanged);
86 $this->_shift_boundaries($ylines, $this->ychanged, $this->xchanged);
88 // Compute the edit operations.
89 $this->edits = array();
91 $this->edits[] = $skip;
95 while ($x < $n_from || $y < $n_to)
98 if ( ($y == $n_to && !$this->xchanged[$x])
99 || ($x == $n_from && !$this->ychanged[$y]) )
100 die("assertion error");
103 // Skip matching "snake".
106 while ( $x < $n_from && $y < $n_to
107 && !$this->xchanged[$x] && !$this->ychanged[$y])
114 $this->edits[] = $x - $x0;
119 while ($x < $n_from && $this->xchanged[$x])
125 $this->edits[] = -($x - $x0);
128 if ($this->ychanged[$y])
131 while ($y < $n_to && $this->ychanged[$y])
132 $adds[] = "" . $ylines[$y++];
133 $this->edits[] = $adds;
136 if (!empty($endskip))
137 $this->edits[] = $endskip;
140 /* Divide the Largest Common Subsequence (LCS) of the sequences
141 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
144 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
145 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
146 * sub sequences. The first sub-sequence is contained in [X0, X1),
147 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
148 * that (X0, Y0) == (XOFF, YOFF) and
149 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
151 * This function assumes that the first lines of the specified portions
152 * of the two files do not match, and likewise that the last lines do not
153 * match. The caller must trim matching lines from the beginning and end
154 * of the portions it is going to specify.
156 function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks)
158 if ($xlim - $xoff > $ylim - $yoff)
160 // Things seems faster (I'm not sure I understand why)
161 // when the shortest sequence in X.
163 list ($xoff, $xlim, $yoff, $ylim)
164 = array( $yoff, $ylim, $xoff, $xlim);
168 for ($i = $ylim - 1; $i >= $yoff; $i--)
169 $ymatches[$this->xv[$i]][] = $i;
171 for ($i = $ylim - 1; $i >= $yoff; $i--)
172 $ymatches[$this->yv[$i]][] = $i;
175 $this->seq[0]= $yoff - 1;
176 $this->in_seq = array();
179 $numer = $xlim - $xoff + $nchunks - 1;
181 for ($chunk = 0; $chunk < $nchunks; $chunk++)
184 for ($i = 0; $i <= $this->lcs; $i++)
185 $ymids[$i][$chunk-1] = $this->seq[$i];
187 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
188 for ( ; $x < $x1; $x++)
190 $matches = $ymatches[$flip ? $this->yv[$x] : $this->xv[$x]];
194 while (list ($junk, $y) = each($matches))
195 if (! $this->in_seq[$y])
197 $k = $this->_lcs_pos($y);
198 //if (!$k) die('assertion "!$k" failed');
199 $ymids[$k] = $ymids[$k-1];
202 while (list ($junk, $y) = each($matches))
204 if ($y > $this->seq[$k-1])
206 //if ($y >= $this->seq[$k]) die('assertion failed');
207 // Optimization: this is a common case:
208 // next match is just replacing previous match.
209 $this->in_seq[$this->seq[$k]] = false;
211 $this->in_seq[$y] = 1;
213 else if (! $this->in_seq[$y])
215 $k = $this->_lcs_pos($y);
216 //if (!$k) die('assertion "!$k" failed');
217 $ymids[$k] = $ymids[$k-1];
223 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
224 $ymid = $ymids[$this->lcs];
225 for ($n = 0; $n < $nchunks - 1; $n++)
227 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
229 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
231 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
233 return array($this->lcs, $seps);
236 function _lcs_pos ($ypos)
239 if ($end == 0 || $ypos > $this->seq[$end])
241 $this->seq[++$this->lcs] = $ypos;
242 $this->in_seq[$ypos] = 1;
249 $mid = (int)(($beg + $end) / 2);
250 if ( $ypos > $this->seq[$mid] )
256 //if ($ypos == $this->seq[$end]) die("assertion failure");
258 $this->in_seq[$this->seq[$end]] = false;
259 $this->seq[$end] = $ypos;
260 $this->in_seq[$ypos] = 1;
264 /* Find LCS of two sequences.
266 * The results are recorded in the vectors $this->{x,y}changed[], by
267 * storing a 1 in the element for each line that is an insertion
268 * or deletion (ie. is not in the LCS).
270 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
272 * Note that XLIM, YLIM are exclusive bounds.
273 * All line numbers are origin-0 and discarded lines are not counted.
275 function _compareseq ($xoff, $xlim, $yoff, $ylim)
277 // Slide down the bottom initial diagonal.
278 while ($xoff < $xlim && $yoff < $ylim
279 && $this->xv[$xoff] == $this->yv[$yoff])
285 // Slide up the top initial diagonal.
286 while ($xlim > $xoff && $ylim > $yoff
287 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1])
293 if ($xoff == $xlim || $yoff == $ylim)
297 // This is ad hoc but seems to work well.
298 //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
299 //$nchunks = max(2,min(8,(int)$nchunks));
300 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
302 = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
307 // X and Y sequences have no common subsequence:
309 while ($yoff < $ylim)
310 $this->ychanged[$this->yind[$yoff++]] = 1;
311 while ($xoff < $xlim)
312 $this->xchanged[$this->xind[$xoff++]] = 1;
316 // Use the partitions to split this problem into subproblems.
319 while ($pt2 = next($seps))
321 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
327 /* Adjust inserts/deletes of identical lines to join changes
328 * as much as possible.
330 * We do something when a run of changed lines include a
331 * line at one end and have an excluded, identical line at the other.
332 * We are free to choose which identical line is included.
333 * `compareseq' usually chooses the one at the beginning,
334 * but usually it is cleaner to consider the following identical line
335 * to be the "change".
337 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
339 function _shift_boundaries ($lines, &$changed, $other_changed)
343 $len = sizeof($lines);
347 * Scan forwards to find beginning of another run of changes.
348 * Also keep track of the corresponding point in the other file.
350 while ($i < $len && $changed[$i] == 0)
352 while ($other_changed[$j++])
362 // Find the end of this run of changes.
363 while (isset($changed[++$i]))
365 while ($other_changed[$j])
371 * Record the length of this run of changes, so that
372 * we can later determine whether the run has grown.
374 $runlength = $i - $start;
377 * Move the changed region back, so long as the
378 * previous unchanged line matches the last changed one.
379 * This merges with previous changed regions.
381 while ($start && $lines[$start - 1] == $lines[$i - 1])
383 $changed[--$start] = 1;
384 $changed[--$i] = false;
385 while ($changed[$start - 1])
387 while ($other_changed[--$j])
392 * Set CORRESPONDING to the end of the changed run, at the last
393 * point where it corresponds to a changed run in the other file.
394 * CORRESPONDING == LEN means no such point has been found.
396 $corresponding = empty($other_changed[$j - 1]) ? $len : $i;
399 * Move the changed region forward, so long as the
400 * first changed line matches the following unchanged one.
401 * This merges with following changed regions.
402 * Do this second, so that if there are no merges,
403 * the changed region is moved forward as far as possible.
405 while ($i != $len && $lines[$start] == $lines[$i])
407 $changed[$start++] = false;
411 while ($other_changed[++$j])
415 while ($runlength != $i - $start);
418 * If possible, move the fully-merged run of changes
419 * back to a corresponding run in the other file.
421 while ($corresponding < $i)
423 $changed[--$start] = 1;
425 while ($other_changed[--$j])
433 * Class representing a diff between two files.
440 * Compute diff between files (or deserialize serialized WikiDiff.)
442 function WikiDiff($from_lines = false, $to_lines = false)
444 if ($from_lines && $to_lines)
446 $compute = new _WikiDiffEngine($from_lines, $to_lines);
447 $this->edits = $compute->edits;
449 else if ($from_lines)
451 // $from_lines is not really from_lines, but rather
452 // a serialized WikiDiff.
453 $this->edits = unserialize($from_lines);
457 $this->edits = array();
462 * Compute reversed WikiDiff.
466 * $diff = new WikiDiff($lines1, $lines2);
467 * $rev = $diff->reverse($lines1);
469 * // reconstruct $lines1 from $lines2:
470 * $out = $rev->apply($lines2);
472 function reverse ($from_lines)
477 for ( reset($this->edits);
478 $edit = current($this->edits);
482 { // Was an add, turn it into a delete.
483 $nadd = sizeof($edit);
484 if ($nadd == 0) die("assertion error");
489 // Was a copy --- just pass it through. }
493 { // Was a delete, turn it into an add.
496 while ($ndelete-- > 0)
497 $edit[] = "" . $from_lines[$x++];
499 else die("assertion error");
501 $rev->edits[] = $edit;
508 * Compose (concatenate) WikiDiffs.
512 * $diff1 = new WikiDiff($lines1, $lines2);
513 * $diff2 = new WikiDiff($lines2, $lines3);
514 * $comp = $diff1->compose($diff2);
516 * // reconstruct $lines3 from $lines1:
517 * $out = $comp->apply($lines1);
519 function compose ($that)
524 $comp = new WikiDiff;
525 $left = current($this->edits);
526 $right = current($that->edits);
528 while ($left || $right)
530 if (!is_array($left) && $left < 0)
531 { // Left op is a delete.
533 $left = next($this->edits);
535 else if (is_array($right))
536 { // Right op is an add.
538 $right = next($that->edits);
540 else if (!$left || !$right)
541 die ("assertion error");
542 else if (!is_array($left) && $left > 0)
543 { // Left op is a copy.
544 if ($left <= abs($right))
546 $newop = $right > 0 ? $left : -$left;
549 $right = next($that->edits);
550 $left = next($this->edits);
555 $left -= abs($right);
556 $right = next($that->edits);
560 { // Left op is an add.
561 if (!is_array($left)) die('assertion error');
562 $nleft = sizeof($left);
563 if ($nleft <= abs($right))
566 { // Right op is copy
570 else // Right op is delete
576 $right = next($that->edits);
577 $left = next($this->edits);
583 for ($i = 0; $i < $right; $i++)
584 $newop[] = $left[$i];
587 for ($i = abs($right); $i < $nleft; $i++)
590 $right = next($that->edits);
601 if (is_array($op) && is_array($newop))
603 // Both $op and $newop are adds.
604 for ($i = 0; $i < sizeof($newop); $i++)
607 else if (($op > 0 && $newop > 0) || ($op < 0 && $newop < 0))
608 { // $op and $newop are both either deletes or copies.
613 $comp->edits[] = $op;
618 $comp->edits[] = $op;
627 for (reset($this->edits);
628 $edit = current($this->edits);
635 echo "Delete " . -$edit;
636 else if (is_array($edit))
638 echo "Add " . sizeof($edit) . "<ul>";
639 for ($i = 0; $i < sizeof($edit); $i++)
640 echo "<li>" . htmlspecialchars($edit[$i]);
644 die("assertion error");
651 * Apply a WikiDiff to a set of lines.
655 * $diff = new WikiDiff($lines1, $lines2);
657 * // reconstruct $lines2 from $lines1:
658 * $out = $diff->apply($lines1);
660 function apply ($from_lines)
663 $xlim = sizeof($from_lines);
665 for ( reset($this->edits);
666 $edit = current($this->edits);
672 while (list ($junk, $line) = each($edit))
677 $output[] = $from_lines[$x++];
682 ExitWiki(sprintf(gettext ("WikiDiff::apply: line count mismatch: %s != %s"), $x, $xlim));
687 * Serialize a WikiDiff.
691 * $diff = new WikiDiff($lines1, $lines2);
692 * $string = $diff->serialize;
694 * // recover WikiDiff from serialized version:
695 * $diff2 = new WikiDiff($string);
697 function serialize ()
699 return serialize($this->edits);
703 * Return true if two files were equal.
707 if (sizeof($this->edits) > 1)
709 if (sizeof($this->edits) == 0)
711 // Test for: only edit is a copy.
712 return !is_array($this->edits[0]) && $this->edits[0] > 0;
716 * Compute the length of the Longest Common Subsequence (LCS).
718 * This is mostly for diagnostic purposed.
723 for (reset($this->edits);
724 $edit = current($this->edits);
727 if (!is_array($edit) && $edit > 0)
734 * Check a WikiDiff for validity.
736 * This is here only for debugging purposes.
738 function _check ($from_lines, $to_lines)
740 $test = $this->apply($from_lines);
741 if (serialize($test) != serialize($to_lines))
742 ExitWiki(gettext ("WikiDiff::_check: failed"));
745 $prev = current($this->edits);
746 $prevtype = is_array($prev) ? 'a' : ($prev > 0 ? 'c' : 'd');
748 while ($edit = next($this->edits))
750 $type = is_array($edit) ? 'a' : ($edit > 0 ? 'c' : 'd');
751 if ( $prevtype == $type )
752 ExitWiki(gettext ("WikiDiff::_check: edit sequence is non-optimal"));
756 printf ("<strong>" . gettext ("WikiDiff Okay: LCS = %s") . "</strong>\n", $lcs);
762 * A class to format a WikiDiff as HTML.
766 * $diff = new WikiDiff($lines1, $lines2); // compute diff.
768 * $fmt = new WikiDiffFormatter;
769 * echo $fmt->format($diff, $lines1); // Output HTMLified standard diff.
771 * or to output reverse diff (diff's that would take $lines2 to $lines1):
773 * $fmt = new WikiDiffFormatter('reversed');
774 * echo $fmt->format($diff, $lines1);
776 class WikiDiffFormatter
779 var $do_reverse_diff;
780 var $context_prefix, $deletes_prefix, $adds_prefix;
782 function WikiDiffFormatter ($reverse = false)
784 $this->do_reverse_diff = $reverse;
785 $this->context_lines = 0;
786 $this->context_prefix = ' ';
787 $this->deletes_prefix = '< ';
788 $this->adds_prefix = '> ';
791 function format ($diff, $from_lines)
793 $html = '<table width="100%" bgcolor="black"' .
794 "cellspacing=2 cellpadding=2 border=0>\n";
795 $html .= $this->_format($diff->edits, $from_lines);
796 $html .= "</table>\n";
801 function _format ($edits, $from_lines)
805 $xlim = sizeof($from_lines);
808 while ($edit = current($edits))
810 if (!is_array($edit) && $edit >= 0)
811 { // Edit op is a copy.
819 // Start of an output hunk.
820 $xoff = max(0, $x - $this->context_lines);
821 $yoff = $xoff + $y - $x;
824 // Get leading context.
826 for ($i = $xoff; $i < $x; $i++)
827 $context[] = $from_lines[$i];
828 $hunk['c'] = $context;
832 { // Edit op is an add.
834 $hunk[$this->do_reverse_diff ? 'd' : 'a'] = $edit;
837 { // Edit op is a delete
840 $deletes[] = $from_lines[$x++];
841 $hunk[$this->do_reverse_diff ? 'a' : 'd'] = $deletes;
845 $next = next($edits);
848 if ( !$next || $ncopy > 2 * $this->context_lines)
850 // End of an output hunk.
854 $xend = min($x + $this->context_lines, $xlim);
857 // Get trailing context.
859 for ($i = $x; $i < $xend; $i++)
860 $context[] = $from_lines[$i];
861 $hunks[] = array('c' => $context);
864 $xlen = $xend - $xoff;
865 $ylen = $xend + $y - $x - $yoff;
866 $xbeg = $xlen ? $xoff + 1 : $xoff;
867 $ybeg = $ylen ? $yoff + 1 : $yoff;
869 if ($this->do_reverse_diff)
870 list ($xbeg, $xlen, $ybeg, $ylen)
871 = array($ybeg, $ylen, $xbeg, $xlen);
873 $html .= $this->_emit_diff($xbeg,$xlen,$ybeg,$ylen,
883 for ($i = $x; $i < $x + $ncopy; $i++)
884 $context[] = $from_lines[$i];
885 $hunk = array('c' => $context);
895 function _emit_lines($lines, $prefix, $color)
899 while (list ($junk, $line) = each($lines))
901 $html .= "<tr bgcolor=\"$color\"><td><tt>$prefix</tt>";
902 $html .= "<tt>" . htmlspecialchars($line) . "</tt></td></tr>\n";
907 function _emit_diff ($xbeg,$xlen,$ybeg,$ylen,$hunks)
909 $html = '<tr><td><table width="100%" bgcolor="white"'
910 . " cellspacing=0 border=0 cellpadding=4>\n"
911 . '<tr bgcolor="#cccccc"><td><tt>'
912 . $this->_diff_header($xbeg, $xlen, $ybeg, $ylen)
913 . "</tt></td></tr>\n<tr><td>\n"
914 . "<table width=\"100%\" cellspacing=0 border=0 cellpadding=2>\n";
916 $prefix = array('c' => $this->context_prefix,
917 'a' => $this->adds_prefix,
918 'd' => $this->deletes_prefix);
919 $color = array('c' => '#ffffff',
923 for (reset($hunks); $hunk = current($hunks); next($hunks))
925 if (!empty($hunk['c']))
926 $html .= $this->_emit_lines($hunk['c'],
927 $this->context_prefix, '#ffffff');
928 if (!empty($hunk['d']))
929 $html .= $this->_emit_lines($hunk['d'],
930 $this->deletes_prefix, '#ccffcc');
931 if (!empty($hunk['a']))
932 $html .= $this->_emit_lines($hunk['a'],
933 $this->adds_prefix, '#ffcccc');
936 $html .= "</table></td></tr></table></td></tr>\n";
940 function _diff_header ($xbeg,$xlen,$ybeg,$ylen)
942 $what = $xlen ? ($ylen ? 'c' : 'd') : 'a';
943 $xlen = $xlen > 1 ? "," . ($xbeg + $xlen - 1) : '';
944 $ylen = $ylen > 1 ? "," . ($ybeg + $ylen - 1) : '';
946 return "$xbeg$xlen$what$ybeg$ylen";
951 * A class to format a WikiDiff as a pretty HTML unified diff.
955 * $diff = new WikiDiff($lines1, $lines2); // compute diff.
957 * $fmt = new WikiUnifiedDiffFormatter;
958 * echo $fmt->format($diff, $lines1); // Output HTMLified unified diff.
960 class WikiUnifiedDiffFormatter extends WikiDiffFormatter
962 function WikiUnifiedDiffFormatter ($reverse = false, $context_lines = 3)
964 $this->do_reverse_diff = $reverse;
965 $this->context_lines = $context_lines;
966 $this->context_prefix = ' ';
967 $this->deletes_prefix = '-';
968 $this->adds_prefix = '+';
971 function _diff_header ($xbeg,$xlen,$ybeg,$ylen)
973 $xlen = $xlen == 1 ? '' : ",$xlen";
974 $ylen = $ylen == 1 ? '' : ",$ylen";
976 return "@@ -$xbeg$xlen +$ybeg$ylen @@";
982 /////////////////////////////////////////////////////////////////
986 if (get_magic_quotes_gpc()) {
987 $diff = stripslashes($diff);
992 $wiki = RetrievePage($dbi, $pagename, $WikiPageStore);
993 // $dba = OpenDataBase($ArchivePageStore);
994 $archive= RetrievePage($dbi, $pagename, $ArchivePageStore);
996 $html = '<table><tr><td align="right">';
997 $html .= gettext ("Current page:");
999 if (is_array($wiki)) {
1001 $html .= sprintf(gettext ("version %s"), $wiki['version']);
1002 $html .= "</td><td>";
1003 $html .= sprintf(gettext ("last modified on %s"),
1004 date($datetimeformat, $wiki['lastmodified']));
1005 $html .= "</td><td>";
1006 $html .= sprintf (gettext ("by %s"), $wiki['author']);
1009 $html .= "<td colspan=3><em>";
1010 $html .= gettext ("None");
1011 $html .= "</em></td>";
1014 $html .= '<tr><td align="right">';
1015 $html .= gettext ("Archived page:");
1017 if (is_array($archive)) {
1019 $html .= sprintf(gettext ("version %s"), $archive['version']);
1020 $html .= "</td><td>";
1021 $html .= sprintf(gettext ("last modified on %s"),
1022 date($datetimeformat, $archive['lastmodified']));
1023 $html .= "</td><td>";
1024 $html .= sprintf(gettext ("by %s"), $archive['author']);
1027 $html .= "<td colspan=3><em>";
1028 $html .= gettext ("None");
1029 $html .= "</em></td>";
1031 $html .= "</tr></table><p>\n";
1033 if (is_array($wiki) && is_array($archive))
1035 $diff = new WikiDiff($archive['content'], $wiki['content']);
1036 if ($diff->isEmpty()) {
1037 $html .= '<hr>[' . gettext ("Versions are identical") . ']';
1039 //$fmt = new WikiDiffFormatter;
1040 $fmt = new WikiUnifiedDiffFormatter;
1041 $html .= $fmt->format($diff, $archive['content']);
1045 GeneratePage('MESSAGE', $html, sprintf(gettext ("Diff of %s."),
1046 htmlspecialchars($pagename)), 0);