1 <!-- $Id: diff.php,v 1.2 2000-10-19 21:36:50 ahollosi Exp $ -->
5 // A PHP diff engine for phpwiki.
7 // Copyright (C) 2000 Geoffrey T. Dairiki <dairiki@dairiki.org>
8 // You may copy this code freely under the conditions of the GPL.
12 * Class used internally by WikiDiff to actually compute the diffs.
14 * The algorithm used here is mostly lifted from the perl module
15 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
16 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
18 * More ideas are taken from:
19 * http://www.ics.uci.edu/~eppstein/161/960229.html
21 * Some ideas are (and a bit of code) are from from analyze.c, from GNU
22 * diffutils-2.7, which can be found at:
23 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
25 * Finally, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
30 var $edits; // List of editing operation to convert XV to YV.
32 function _WikiDiffEngine ($from_lines, $to_lines)
34 $n_from = sizeof($from_lines);
35 $n_to = sizeof($to_lines);
37 // Ignore trailing and leading matching lines.
38 while ($n_from > 0 && $n_to > 0)
40 if ($from_lines[$n_from - 1] != $to_lines[$n_to - 1])
46 for ( $skip = 0; $skip < min($n_from, $n_to); $skip++)
47 if ($from_lines[$skip] != $to_lines[$skip])
52 // Ignore lines which do not exist in both files.
53 for ($x = 0; $x < $n_from; $x++)
54 $xhash[$from_lines[$x + $skip]] = 1;
55 for ($y = 0; $y < $n_to; $y++)
57 $line = $to_lines[$y + $skip];
59 if ( ($this->ychanged[$y] = ! $xhash[$line]) )
65 for ($x = 0; $x < $n_from; $x++)
67 $line = $from_lines[$x + $skip];
69 if ( ($this->xchanged[$x] = ! $yhash[$line]) )
77 $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
79 // Merge edits when possible
80 $this->_shift_boundaries($xlines, $this->xchanged, $this->ychanged);
81 $this->_shift_boundaries($ylines, $this->ychanged, $this->xchanged);
83 // Compute the edit operations.
84 $this->edits = array();
86 $this->edits[] = $skip;
90 while ($x < $n_from || $y < $n_to)
93 if ( ($y == $n_to && !$this->xchanged[$x])
94 || ($x == $n_from && !$this->ychanged[$y]) )
95 die("assertion error");
98 // Skip matching "snake".
100 while ( $x < $n_from && $y < $n_to
101 && !$this->xchanged[$x] && !$this->ychanged[$y])
108 $this->edits[] = $x - $x0;
112 while ($x < $n_from && $this->xchanged[$x])
118 $this->edits[] = -($x - $x0);
121 if ($this->ychanged[$y])
124 while ($y < $n_to && $this->ychanged[$y])
125 $adds[] = "" . $ylines[$y++];
126 $this->edits[] = $adds;
130 $this->edits[] = $endskip;
133 /* Divide the Largest Common Subsequence (LCS) of the sequences
134 * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
137 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
138 * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
139 * sub sequences. The first sub-sequence is contained in [X0, X1),
140 * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
141 * that (X0, Y0) == (XOFF, YOFF) and
142 * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
144 * This function assumes that the first lines of the specified portions
145 * of the two files do not match, and likewise that the last lines do not
146 * match. The caller must trim matching lines from the beginning and end
147 * of the portions it is going to specify.
149 function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks)
151 if ($xlim - $xoff > $ylim - $yoff)
153 // Things seems faster (I'm not sure I understand why)
154 // when the shortest sequence in X.
156 list ($xoff, $xlim, $yoff, $ylim)
157 = array( $yoff, $ylim, $xoff, $xlim);
161 for ($i = $ylim - 1; $i >= $yoff; $i--)
162 $ymatches[$this->xv[$i]][] = $i;
164 for ($i = $ylim - 1; $i >= $yoff; $i--)
165 $ymatches[$this->yv[$i]][] = $i;
168 $this->seq[0]= $yoff - 1;
169 $this->in_seq = array();
172 $numer = $xlim - $xoff + $nchunks - 1;
174 for ($chunk = 0; $chunk < $nchunks; $chunk++)
177 for ($i = 0; $i <= $this->lcs; $i++)
178 $ymids[$i][$chunk-1] = $this->seq[$i];
180 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
181 for ( ; $x < $x1; $x++)
183 $matches = $ymatches[$flip ? $this->yv[$x] : $this->xv[$x]];
187 while (list ($junk, $y) = each($matches))
188 if (! $this->in_seq[$y])
190 $k = $this->_lcs_pos($y);
191 //if (!$k) die('assertion "!$k" failed');
192 $ymids[$k] = $ymids[$k-1];
195 while (list ($junk, $y) = each($matches))
197 if ($y > $this->seq[$k-1])
199 //if ($y >= $this->seq[$k]) die('assertion failed');
200 // Optimization: this is a common case:
201 // next match is just replacing previous match.
202 $this->in_seq[$this->seq[$k]] = false;
204 $this->in_seq[$y] = 1;
206 else if (! $this->in_seq[$y])
208 $k = $this->_lcs_pos($y);
209 //if (!$k) die('assertion "!$k" failed');
210 $ymids[$k] = $ymids[$k-1];
216 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
217 $ymid = $ymids[$this->lcs];
218 for ($n = 0; $n < $nchunks - 1; $n++)
220 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
222 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
224 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
226 return array($this->lcs, $seps);
229 function _lcs_pos ($ypos)
232 if ($end == 0 || $ypos > $this->seq[$end])
234 $this->seq[++$this->lcs] = $ypos;
235 $this->in_seq[$ypos] = 1;
242 $mid = (int)(($beg + $end) / 2);
243 if ( $ypos > $this->seq[$mid] )
249 //if ($ypos == $this->seq[$end]) die("assertion failure");
251 $this->in_seq[$this->seq[$end]] = false;
252 $this->seq[$end] = $ypos;
253 $this->in_seq[$ypos] = 1;
257 /* Find LCS of two sequences.
259 * The results are recorded in the vectors $this->{x,y}changed[], by
260 * storing a 1 in the element for each line that is an insertion
261 * or deletion (ie. is not in the LCS).
263 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
265 * Note that XLIM, YLIM are exclusive bounds.
266 * All line numbers are origin-0 and discarded lines are not counted.
268 function _compareseq ($xoff, $xlim, $yoff, $ylim)
270 // Slide down the bottom initial diagonal.
271 while ($xoff < $xlim && $yoff < $ylim
272 && $this->xv[$xoff] == $this->yv[$yoff])
278 // Slide up the top initial diagonal.
279 while ($xlim > $xoff && $ylim > $yoff
280 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1])
286 if ($xoff == $xlim || $yoff == $ylim)
290 // This is ad hoc but seems to work well.
291 //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
292 //$nchunks = max(2,min(8,(int)$nchunks));
293 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
295 = $this->_diag($xoff,$xlim,$yoff, $ylim,$nchunks);
300 // X and Y sequences have no common subsequence:
302 while ($yoff < $ylim)
303 $this->ychanged[$this->yind[$yoff++]] = 1;
304 while ($xoff < $xlim)
305 $this->xchanged[$this->xind[$xoff++]] = 1;
309 // Use the partitions to split this problem into subproblems.
312 while ($pt2 = next($seps))
314 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
320 /* Adjust inserts/deletes of identical lines to join changes
321 * as much as possible.
323 * We do something when a run of changed lines include a
324 * line at one end and have an excluded, identical line at the other.
325 * We are free to choose which identical line is included.
326 * `compareseq' usually chooses the one at the beginning,
327 * but usually it is cleaner to consider the following identical line
328 * to be the "change".
330 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
332 function _shift_boundaries ($lines, &$changed, $other_changed)
336 $len = sizeof($lines);
340 * Scan forwards to find beginning of another run of changes.
341 * Also keep track of the corresponding point in the other file.
343 while ($i < $len && $changed[$i] == 0)
345 while ($other_changed[$j++])
355 // Find the end of this run of changes.
356 while ($changed[++$i])
358 while ($other_changed[$j])
364 * Record the length of this run of changes, so that
365 * we can later determine whether the run has grown.
367 $runlength = $i - $start;
370 * Move the changed region back, so long as the
371 * previous unchanged line matches the last changed one.
372 * This merges with previous changed regions.
374 while ($start && $lines[$start - 1] == $lines[$i - 1])
376 $changed[--$start] = 1;
377 $changed[--$i] = false;
378 while ($changed[$start - 1])
380 while ($other_changed[--$j])
385 * Set CORRESPONDING to the end of the changed run, at the last
386 * point where it corresponds to a changed run in the other file.
387 * CORRESPONDING == LEN means no such point has been found.
389 $corresponding = $other_changed[$j - 1] ? $i : $len;
392 * Move the changed region forward, so long as the
393 * first changed line matches the following unchanged one.
394 * This merges with following changed regions.
395 * Do this second, so that if there are no merges,
396 * the changed region is moved forward as far as possible.
398 while ($i != $len && $lines[$start] == $lines[$i])
400 $changed[$start++] = false;
404 while ($other_changed[++$j])
408 while ($runlength != $i - $start);
411 * If possible, move the fully-merged run of changes
412 * back to a corresponding run in the other file.
414 while ($corresponding < $i)
416 $changed[--$start] = 1;
418 while ($other_changed[--$j])
426 * Class representing a diff between two files.
433 * Compute diff between files (or deserialize serialized WikiDiff.)
435 function WikiDiff($from_lines = false, $to_lines = false)
437 if ($from_lines && $to_lines)
439 $compute = new _WikiDiffEngine($from_lines, $to_lines);
440 $this->edits = $compute->edits;
442 else if ($from_lines)
444 // $from_lines is not really from_lines, but rather
445 // a serialized WikiDiff.
446 $this->edits = unserialize($from_lines);
450 $this->edits = array();
455 * Compute reversed WikiDiff.
459 * $diff = new WikiDiff($lines1, $lines2);
460 * $rev = $diff->reverse($lines1);
462 * // reconstruct $lines1 from $lines2:
463 * $out = $rev->apply($lines2);
465 function reverse ($from_lines)
470 for ( reset($this->edits);
471 $edit = current($this->edits);
475 { // Was an add, turn it into a delete.
476 $nadd = sizeof($edit);
477 if ($nadd == 0) die("assertion error");
482 // Was a copy --- just pass it through. }
486 { // Was a delete, turn it into an add.
489 while ($ndelete-- > 0)
490 $edit[] = "" . $from_lines[$x++];
492 else die("assertion error");
494 $rev->edits[] = $edit;
501 * Compose (concatenate) WikiDiffs.
505 * $diff1 = new WikiDiff($lines1, $lines2);
506 * $diff2 = new WikiDiff($lines2, $lines3);
507 * $comp = $diff1->compose($diff2);
509 * // reconstruct $lines3 from $lines1:
510 * $out = $comp->apply($lines1);
512 function compose ($that)
517 $comp = new WikiDiff;
518 $left = current($this->edits);
519 $right = current($that->edits);
521 while ($left || $right)
523 if (!is_array($left) && $left < 0)
524 { // Left op is a delete.
526 $left = next($this->edits);
528 else if (is_array($right))
529 { // Right op is an add.
531 $right = next($that->edits);
533 else if (!$left || !$right)
534 die ("assertion error");
535 else if (!is_array($left) && $left > 0)
536 { // Left op is a copy.
537 if ($left <= abs($right))
539 $newop = $right > 0 ? $left : -$left;
542 $right = next($that->edits);
543 $left = next($this->edits);
548 $left -= abs($right);
549 $right = next($that->edits);
553 { // Left op is an add.
554 if (!is_array($left)) die('assertion error');
555 $nleft = sizeof($left);
556 if ($nleft <= abs($right))
559 { // Right op is copy
563 else // Right op is delete
569 $right = next($that->edits);
570 $left = next($this->edits);
576 for ($i = 0; $i < $right; $i++)
577 $newop[] = $left[$i];
580 for ($i = abs($right); $i < $nleft; $i++)
583 $right = next($that->edits);
594 if (is_array($op) && is_array($newop))
596 // Both $op and $newop are adds.
597 for ($i = 0; $i < sizeof($newop); $i++)
600 else if (($op > 0 && $newop > 0) || ($op < 0 && $newop < 0))
601 { // $op and $newop are both either deletes or copies.
606 $comp->edits[] = $op;
611 $comp->edits[] = $op;
620 for (reset($this->edits);
621 $edit = current($this->edits);
628 echo "Delete " . -$edit;
629 else if (is_array($edit))
631 echo "Add " . sizeof($edit) . "<ul>";
632 for ($i = 0; $i < sizeof($edit); $i++)
633 echo "<li>" . htmlspecialchars($edit[$i]);
637 die("assertion error");
644 * Apply a WikiDiff to a set of lines.
648 * $diff = new WikiDiff($lines1, $lines2);
650 * // reconstruct $lines2 from $lines1:
651 * $out = $diff->apply($lines1);
653 function apply ($from_lines)
656 $xlim = sizeof($from_lines);
658 for ( reset($this->edits);
659 $edit = current($this->edits);
665 while (list ($junk, $line) = each($edit))
670 $output[] = $from_lines[$x++];
675 die("WikiDiff::apply: line count mismatch: $x != $xlim");
680 * Serialize a WikiDiff.
684 * $diff = new WikiDiff($lines1, $lines2);
685 * $string = $diff->serialize;
687 * // recover WikiDiff from serialized version:
688 * $diff2 = new WikiDiff($string);
690 function serialize ()
692 return serialize($this->edits);
696 * Return true if two files were equal.
700 if (sizeof($this->edits) > 1)
702 if (sizeof($this->edits) == 0)
704 // Test for: only edit is a copy.
705 return !is_array($this->edits[0]) && $this->edits[0] > 0;
709 * Compute the length of the Longest Common Subsequence (LCS).
711 * This is mostly for diagnostic purposed.
716 for (reset($this->edits);
717 $edit = current($this->edits);
720 if (!is_array($edit) && $edit > 0)
727 * Check a WikiDiff for validity.
729 * This is here only for debugging purposes.
731 function _check ($from_lines, $to_lines)
733 $test = $this->apply($from_lines);
734 if (serialize($test) != serialize($to_lines))
735 die("WikiDiff::_check: failed");
738 $prev = current($this->edits);
739 $prevtype = is_array($prev) ? 'a' : ($prev > 0 ? 'c' : 'd');
741 while ($edit = next($this->edits))
743 $type = is_array($edit) ? 'a' : ($edit > 0 ? 'c' : 'd');
744 if ( $prevtype == $type )
745 die("WikiDiff::_check: edit sequence is non-optimal");
749 echo "<strong>WikiDiff Okay: LCS = $lcs</strong>\n";
755 * A class to format a WikiDiff as HTML.
759 * $diff = new WikiDiff($lines1, $lines2); // compute diff.
761 * $fmt = new WikiDiffFormatter;
762 * echo $fmt->format($diff, $lines1); // Output HTMLified standard diff.
764 * or to output reverse diff (diff's that would take $lines2 to $lines1):
766 * $fmt = new WikiDiffFormatter('reversed');
767 * echo $fmt->format($diff, $lines1);
769 class WikiDiffFormatter
772 var $do_reverse_diff;
773 var $context_prefix, $deletes_prefix, $adds_prefix;
775 function WikiDiffFormatter ($reverse = false)
777 $this->do_reverse_diff = $reverse;
778 $this->context_lines = 0;
779 $this->context_prefix = ' ';
780 $this->deletes_prefix = '< ';
781 $this->adds_prefix = '> ';
784 function format ($diff, $from_lines)
786 $html = '<table width="100%" bgcolor="black"' .
787 "cellspacing=2 cellpadding=2 border=0>\n";
788 $html .= $this->_format($diff->edits, $from_lines);
789 $html .= "</table>\n";
794 function _format ($edits, $from_lines)
797 $xlim = sizeof($from_lines);
800 while ($edit = current($edits))
802 if (!is_array($edit) && $edit >= 0)
803 { // Edit op is a copy.
811 // Start of an output hunk.
812 $xoff = max(0, $x - $this->context_lines);
813 $yoff = $xoff + $y - $x;
816 // Get leading context.
818 for ($i = $xoff; $i < $x; $i++)
819 $context[] = $from_lines[$i];
820 $hunk['c'] = $context;
824 { // Edit op is an add.
826 $hunk[$this->do_reverse_diff ? 'd' : 'a'] = $edit;
829 { // Edit op is a delete
832 $deletes[] = $from_lines[$x++];
833 $hunk[$this->do_reverse_diff ? 'a' : 'd'] = $deletes;
837 $next = next($edits);
840 if ( !$next || $ncopy > 2 * $this->context_lines)
842 // End of an output hunk.
846 $xend = min($x + $this->context_lines, $xlim);
849 // Get trailing context.
851 for ($i = $x; $i < $xend; $i++)
852 $context[] = $from_lines[$i];
853 $hunks[] = array('c' => $context);
856 $xlen = $xend - $xoff;
857 $ylen = $xend + $y - $x - $yoff;
858 $xbeg = $xlen ? $xoff + 1 : $xoff;
859 $ybeg = $ylen ? $yoff + 1 : $yoff;
861 if ($this->do_reverse_diff)
862 list ($xbeg, $xlen, $ybeg, $ylen)
863 = array($ybeg, $ylen, $xbeg, $xlen);
865 $html .= $this->_emit_diff($xbeg,$xlen,$ybeg,$ylen,
875 for ($i = $x; $i < $x + $ncopy; $i++)
876 $context[] = $from_lines[$i];
877 $hunk = array('c' => $context);
887 function _emit_lines($lines, $prefix, $color)
890 while (list ($junk, $line) = each($lines))
892 $html .= "<tr bgcolor=\"$color\"><td><tt>$prefix</tt>";
893 $html .= "<tt>" . htmlspecialchars($line) . "</tt></td></tr>\n";
898 function _emit_diff ($xbeg,$xlen,$ybeg,$ylen,$hunks)
900 $html = '<tr><td><table width="100%" bgcolor="white"'
901 . " cellspacing=0 border=0 cellpadding=4>\n"
902 . '<tr bgcolor="#cccccc"><td><tt>'
903 . $this->_diff_header($xbeg, $xlen, $ybeg, $ylen)
904 . "</tt></td></tr>\n<tr><td>\n"
905 . "<table width=\"100%\" cellspacing=0 border=0 cellpadding=2>\n";
907 $prefix = array('c' => $this->context_prefix,
908 'a' => $this->adds_prefix,
909 'd' => $this->deletes_prefix);
910 $color = array('c' => '#ffffff',
914 for (reset($hunks); $hunk = current($hunks); next($hunks))
916 if ($lines = $hunk['c'])
917 $html .= $this->_emit_lines($lines,
918 $this->context_prefix, '#ffffff');
919 if ($lines = $hunk['d'])
920 $html .= $this->_emit_lines($lines,
921 $this->deletes_prefix, '#ccffcc');
922 if ($lines = $hunk['a'])
923 $html .= $this->_emit_lines($lines,
924 $this->adds_prefix, '#ffcccc');
927 $html .= "</table></td></tr></table></td></tr>\n";
931 function _diff_header ($xbeg,$xlen,$ybeg,$ylen)
933 $what = $xlen ? ($ylen ? 'c' : 'd') : 'a';
934 $xlen = $xlen > 1 ? "," . ($xbeg + $xlen - 1) : '';
935 $ylen = $ylen > 1 ? "," . ($ybeg + $ylen - 1) : '';
937 return "$xbeg$xlen$what$ybeg$ylen";
942 * A class to format a WikiDiff as a pretty HTML unified diff.
946 * $diff = new WikiDiff($lines1, $lines2); // compute diff.
948 * $fmt = new WikiUnifiedDiffFormatter;
949 * echo $fmt->format($diff, $lines1); // Output HTMLified unified diff.
951 class WikiUnifiedDiffFormatter extends WikiDiffFormatter
953 function WikiUnifiedDiffFormatter ($reverse = false, $context_lines = 3)
955 $this->do_reverse_diff = $reverse;
956 $this->context_lines = $context_lines;
957 $this->context_prefix = ' ';
958 $this->deletes_prefix = '-';
959 $this->adds_prefix = '+';
962 function _diff_header ($xbeg,$xlen,$ybeg,$ylen)
964 $xlen = $xlen == 1 ? '' : ",$xlen";
965 $ylen = $ylen == 1 ? '' : ",$ylen";
967 return "@@ -$xbeg$xlen +$ybeg$ylen @@";
973 /////////////////////////////////////////////////////////////////
977 if (get_magic_quotes_gpc()) {
978 $diff = stripslashes($diff);
983 $wiki = RetrievePage($dbi, $pagename, $WikiPageStore);
984 // $dba = OpenDataBase($ArchivePageStore);
985 $archive= RetrievePage($dbi, $pagename, $ArchivePageStore);
987 $html = '<table><tr><td align="right">Current page:</td>';
989 $html .= "<td>version $wiki[version],</td><td>last modified on "
990 . date($datetimeformat, $wiki['lastmodified'])
991 . "</td><td>by $wiki[author]</td>";
993 $html .= "<td colspan=3><em>None</em></td>";
994 $html .= '</tr><tr><td align="right">Archived page:</td>';
995 if (is_array($archive))
996 $html .= "<td>version $archive[version],</td><td>last modified on "
997 . date($datetimeformat, $archive['lastmodified'])
998 . "</td><td>by $archive[author]</td>";
1000 $html .= "<td colspan=3><em>None</em></td>";
1001 $html .= "</tr></table><p>\n";
1004 if (is_array($wiki) && is_array($archive))
1006 $diff = new WikiDiff($archive['content'], $wiki['content']);
1007 if ($diff->isEmpty())
1008 $html .= '<hr>[Versions are identical]';
1011 //$fmt = new WikiDiffFormatter;
1012 $fmt = new WikiUnifiedDiffFormatter;
1013 $html .= $fmt->format($diff, $archive['content']);
1017 GeneratePage('MESSAGE', $html, sprintf(gettext("Diff of %s."),
1018 htmlspecialchars($pagename)), 0);