From 569ef438d1bc2dae13d05f289416ed18185aabc8 Mon Sep 17 00:00:00 2001 From: ian Date: Fri, 23 Mar 2018 21:06:06 +0000 Subject: [PATCH] MFC r315051, r315101, r315103, r315107, r315180, r315197, r315293, r315319, r315590, r315649, r315726, r315743, r315746-r315747, r315779, r315985, r316002, r316639, r316959, r317187, r317194, r317205-r317207, r317381, r319489, r319847, r321076-r321079, r321227, r326822 Add the BSD-licensed diff from OpenBSD, which is optionally built and installed when WITHOUT_GNU_DIFF is set. r315051: Import diff from OpenBSD Some of the modifications from the previous summer of code has been integrated Modification for compatibility with GNU diff output has been added Main difference with OpenBSD: Implement multiple GNU diff options: * --ignore-file-name-case * --no-ignore-file-name-case * --normal * --tabsize * --strip-trailing-cr Make diff -p compatible with GNU diff Implement diff -l Make diff -r compatible with GNU diff Capsicumize diffing 2 regular files Add a simple test suite Approved by: AsiaBSDcon devsummit Obtained from: OpenBSD, GSoC Relnotes: yes r315101: Fix wrong date in diff(1) Reported by: rgrimes r315103: Implement a stub --horizon-lines=NUM for compatibility with GNU diff3 some options of GNU diff3 would call diff with --horizon-lines, rcs is depending on that. Reported by: antoine r315107: Fix building with recent gcc Reported by: lwhsu, ngie r315180: Readd codes that creates a tmp file for diffing stdout or devices r315197: Do not die if cap_rights_limit reports ENOSYS Reported by: mmel r315293: Integrate contrib/netbsd-tests/usr.bin/diff/t_diff.sh in as .../usr.bin/diff/diff_test Some minor adjustment needed to be done for :same as it currently has the test script hardcoded into the test, instead of using an idiom like $(dirname $0) Sponsored by: Dell EMC Isilon r315319: diff(1): sort long options under -D example in SYNOPSYS Sponsored by: Dell EMC Isilon r315590: diff(1): add --strip-trailing-cr to last example in the SYNOPSIS This syncs the last example in the SYNOPSIS with the other examples. Reviewed by: bapt Sponsored by: Dell EMC Isilon Differential Revision: D10017 r315649: Cache tzdata when running under capsicum PR: 217957 Reported by: tobik@ r315726: diff(1): fix SYNOPSIS section noting non-existent option, --no-ignore-case `--no-ignore-case` should be `--no-ignore-file-name-case` per code for compatibility with [g]diff(1). Sponsored by: Dell EMC Isilon r315743: Use MAX and MIN macros from sys/param.h r315746: Use strndup(3) instead of malloc + memcpy r315747: Use MIN macros from sys/param.h r315779: diff(1): document remaining long options While here, try and tie together some of the short options with their long option equivalents, where possible. Sponsored by: Dell EMC Isilon r315985: diff: Fix mtime of file1 in -u/-c header line. PR: 218018 Reviewed by: bapt Differential Revision: https://reviews.freebsd.org/D10140 r316002: diff: Show nanoseconds in -u/-c header line. Show nanoseconds in the -u/-c header line. The present portability conditionals cannot handle the POSIX standard st_mtim, so remove them and unconditionally use st_mtim. PR: 218018 Reported by: jbeich Reviewed by: bapt Differential Revision: https://reviews.freebsd.org/D10145 r316639: add a stub --speed-large-files for compatibility with GNU diff There is no intention to implement it, but lots of scripts/tools using diff(1) passes GNU diff option r316959: Clean up headers declaration r317187: Add a regression test for diff -D r317194: Implement a basic --changed-group-format etcupdate(8) requires that option, while GNU diff supports many more variation of that options, their behaviour beside the simple verion implemented here are quite inconsistent as such I do not plan to implement those. The only special keyword supported by this implementation are: %< and %> %= is not implemented as the documentation of GNU diff says: common lines, but it actually when tested print the changes from the first file r317205: Document all long options r317206: Update the TODO list to reflect what has been changed r317207: Cross reference pr(1) which diff might call with -l option r317381: Fix the following warning from gcc 4.2 in usr.bin/diff: usr.bin/diff/diffreg.c: In function 'change': usr.bin/diff/diffreg.c:1085: warning: 'i' may be used uninitialized in this function This version of gcc is not smart enough to see that 'i' cannot actually be used unitialized. However, the variable is confusingly re-used, so it is better to give it another name, and clearly initialize it before attempting to use it. Reviewed by: bapt Differential Revision: https://reviews.freebsd.org/D10484 r319489: Add -H as an alias for --speed-large-file to match GNU diff. This is undocumented to match GNU diff where -H is also undocumented. Some existing software (such as kompare) uses this option by default. Reviewed by: emaste, rpokala Differential Revision: https://reviews.freebsd.org/D11022 r319847: Add some testcases for `diff --side-by-side` support These are were created proactively, in anticipation of the support being fully implemented sometime in the future. The tests currently fail on ^/head@r319845, however. Expect them to fail. PR: 219933 Tested with: gdiff r321076: Don't emit "diff: diff arguments" when diffing files if -q is specified. This improves compatibility with GNU diff. Found by accident with `diff -Nrq /usr/tests /usr/tests.new | grep Kyuafile`. Relnotes: yes r321077: Add some tests for brief (--brief/-q) format MFC with: r321076 r321078: Fix exit status with -rq when there is a file in one directory but not another, i.e., when print_only is called. Prior to this change, -rq was always returning 0. After this change it will return 1 if there is a difference between two directories. This fixes compatibility with GNU diff and unbreaks backwards compatibility expectations. Found when trying to extend diff_test:brief_format_test. MFC with: r321076, r321077 r321079: Add tests that exercise -q, like -rq and add tests that test -q like -Nrq MFC with: r321076, r321077, r321078 r321227: Use more flexible expression for replacing t_diff in contrib/netbsd-tests/usr.bin/diff/t_diff.sh with the name of the script via `basename $0`. This was a change I forgot to port over from ^/head/gnu/usr.bin/diff/tests/Makefile@r272787. r326822: Replace homemade equivalent of tolower(3) by towlower(3) This will help in the futur making diff -i works with multibyte Relnotes: Yes --- usr.bin/Makefile | 3 + usr.bin/diff/Makefile | 12 + usr.bin/diff/TODO | 18 + usr.bin/diff/diff.1 | 610 ++++++++++ usr.bin/diff/diff.c | 466 ++++++++ usr.bin/diff/diff.h | 103 ++ usr.bin/diff/diffdir.c | 239 ++++ usr.bin/diff/diffreg.c | 1628 ++++++++++++++++++++++++++ usr.bin/diff/tests/Makefile | 39 + usr.bin/diff/tests/diff_test.sh | 153 +++ usr.bin/diff/tests/group-format.out | 27 + usr.bin/diff/tests/header.out | 4 + usr.bin/diff/tests/header_ns.out | 4 + usr.bin/diff/tests/ifdef.out | 26 + usr.bin/diff/tests/input1.in | 2 + usr.bin/diff/tests/input2.in | 3 + usr.bin/diff/tests/input_c1.in | 15 + usr.bin/diff/tests/input_c2.in | 16 + usr.bin/diff/tests/simple.out | 5 + usr.bin/diff/tests/simple_b.out | 6 + usr.bin/diff/tests/simple_e.out | 4 + usr.bin/diff/tests/simple_i.out | 6 + usr.bin/diff/tests/simple_n.out | 4 + usr.bin/diff/tests/simple_p.out | 34 + usr.bin/diff/tests/simple_u.out | 7 + usr.bin/diff/tests/simple_w.out | 6 + usr.bin/diff/tests/unified_9999.out | 21 + usr.bin/diff/tests/unified_c9999.out | 36 + usr.bin/diff/tests/unified_p.out | 20 + usr.bin/diff/xmalloc.c | 88 ++ usr.bin/diff/xmalloc.h | 32 + 31 files changed, 3637 insertions(+) create mode 100644 usr.bin/diff/Makefile create mode 100644 usr.bin/diff/TODO create mode 100644 usr.bin/diff/diff.1 create mode 100644 usr.bin/diff/diff.c create mode 100644 usr.bin/diff/diff.h create mode 100644 usr.bin/diff/diffdir.c create mode 100644 usr.bin/diff/diffreg.c create mode 100644 usr.bin/diff/tests/Makefile create mode 100755 usr.bin/diff/tests/diff_test.sh create mode 100644 usr.bin/diff/tests/group-format.out create mode 100644 usr.bin/diff/tests/header.out create mode 100644 usr.bin/diff/tests/header_ns.out create mode 100644 usr.bin/diff/tests/ifdef.out create mode 100644 usr.bin/diff/tests/input1.in create mode 100644 usr.bin/diff/tests/input2.in create mode 100644 usr.bin/diff/tests/input_c1.in create mode 100644 usr.bin/diff/tests/input_c2.in create mode 100644 usr.bin/diff/tests/simple.out create mode 100644 usr.bin/diff/tests/simple_b.out create mode 100644 usr.bin/diff/tests/simple_e.out create mode 100644 usr.bin/diff/tests/simple_i.out create mode 100644 usr.bin/diff/tests/simple_n.out create mode 100644 usr.bin/diff/tests/simple_p.out create mode 100644 usr.bin/diff/tests/simple_u.out create mode 100644 usr.bin/diff/tests/simple_w.out create mode 100644 usr.bin/diff/tests/unified_9999.out create mode 100644 usr.bin/diff/tests/unified_c9999.out create mode 100644 usr.bin/diff/tests/unified_p.out create mode 100644 usr.bin/diff/xmalloc.c create mode 100644 usr.bin/diff/xmalloc.h diff --git a/usr.bin/Makefile b/usr.bin/Makefile index 523da29acb7..6da9c86a265 100644 --- a/usr.bin/Makefile +++ b/usr.bin/Makefile @@ -217,6 +217,9 @@ SUBDIR.${MK_GAMES}+= number SUBDIR.${MK_GAMES}+= pom SUBDIR.${MK_GAMES}+= primes SUBDIR.${MK_GAMES}+= random +.if ${MK_GNU_DIFF} == "no" +SUBDIR+= diff +.endif .if ${MK_GPL_DTC} != "yes" .if ${COMPILER_FEATURES:Mc++11} SUBDIR+= dtc diff --git a/usr.bin/diff/Makefile b/usr.bin/diff/Makefile new file mode 100644 index 00000000000..8242ec944d0 --- /dev/null +++ b/usr.bin/diff/Makefile @@ -0,0 +1,12 @@ +# $FreeBSD$ + +.include + +PROG= diff +SRCS= diff.c diffdir.c diffreg.c xmalloc.c + +.if ${MK_TESTS} != "no" +SUBDIR+= tests +.endif + +.include diff --git a/usr.bin/diff/TODO b/usr.bin/diff/TODO new file mode 100644 index 00000000000..537643dd890 --- /dev/null +++ b/usr.bin/diff/TODO @@ -0,0 +1,18 @@ +-y: + * soc implemented it via calling sdiff directly, but some options are + incompatible so it is fragile + * just recommend the user to run sdiff directly and do not implement it + * make a libsdiff and use that directly to avoid duplicating the code + +to be implemented: +--ignore-blank-lines +--horizon-lines +--ignore-tab-expansion +--line-format + +Will probably be not implemented: +--GTYPE-group-format (partially implement - minimal) +--LTYPE-line-format +--help (We have a manpage already) +--suppress-common-lines: depends on -y (won't be implemented, as it conflicts +the way sdiff expects it and in any case we have sdiff for that feature) diff --git a/usr.bin/diff/diff.1 b/usr.bin/diff/diff.1 new file mode 100644 index 00000000000..0618756aefb --- /dev/null +++ b/usr.bin/diff/diff.1 @@ -0,0 +1,610 @@ +.\" $OpenBSD: diff.1,v 1.47 2015/11/24 19:35:41 jmc Exp $ +.\" +.\" Copyright (c) 1980, 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)diff.1 8.1 (Berkeley) 6/30/93 +.\" $FreeBSD$ +.\" +.Dd April 20, 2017 +.Dt DIFF 1 +.Os +.Sh NAME +.Nm diff +.Nd differential file and directory comparator +.Sh SYNOPSIS +.Nm diff +.Op Fl abdipTtw +.Oo +.Fl c | e | f | +.Fl n | q | u +.Oc +.Op Fl -brief +.Op Fl -changed-group-format Ar GFMT +.Op Fl -ed +.Op Fl -expand-tabs +.Op Fl -forward-ed +.Op Fl -ignore-all-space +.Op Fl -ignore-case +.Op Fl -ignore-space-change +.Op Fl -initial-tab +.Op Fl -minimal +.Op Fl -no-ignore-file-name-case +.Op Fl -normal +.Op Fl -rcs +.Op Fl -show-c-function +.Op Fl -starting-file +.Op Fl -speed-large-files +.Op Fl -strip-trailing-cr +.Op Fl -tabsize +.Op Fl -text +.Op Fl -unified +.Op Fl I Ar pattern | Fl -ignore-matching-lines Ar pattern +.Op Fl L Ar label | Fl -label Ar label +.Ar file1 file2 +.Nm diff +.Op Fl abdilpTtw +.Op Fl I Ar pattern | Fl -ignore-matching-lines Ar pattern +.Op Fl L Ar label | Fl -label Ar label +.Op Fl -brief +.Op Fl -changed-group-format Ar GFMT +.Op Fl -ed +.Op Fl -expand-tabs +.Op Fl -forward-ed +.Op Fl -ignore-all-space +.Op Fl -ignore-case +.Op Fl -ignore-space-change +.Op Fl -initial-tab +.Op Fl -minimal +.Op Fl -no-ignore-file-name-case +.Op Fl -normal +.Op Fl -paginate +.Op Fl -rcs +.Op Fl -show-c-function +.Op Fl -speed-large-files +.Op Fl -starting-file +.Op Fl -strip-trailing-cr +.Op Fl -tabsize +.Op Fl -text +.Fl C Ar number | -context Ar number +.Ar file1 file2 +.Nm diff +.Op Fl abdiltw +.Op Fl I Ar pattern | Fl -ignore-matching-lines Ar pattern +.Op Fl -brief +.Op Fl -changed-group-format Ar GFMT +.Op Fl -ed +.Op Fl -expand-tabs +.Op Fl -forward-ed +.Op Fl -ignore-all-space +.Op Fl -ignore-case +.Op Fl -ignore-space-change +.Op Fl -initial-tab +.Op Fl -minimal +.Op Fl -no-ignore-file-name-case +.Op Fl -normal +.Op Fl -paginate +.Op Fl -rcs +.Op Fl -show-c-function +.Op Fl -speed-large-files +.Op Fl -starting-file +.Op Fl -strip-trailing-cr +.Op Fl -tabsize +.Op Fl -text +.Fl D Ar string | Fl -ifdef Ar string +.Ar file1 file2 +.Nm diff +.Op Fl abdilpTtw +.Op Fl I Ar pattern | Fl -ignore-matching-lines Ar pattern +.Op Fl L Ar label | Fl -label Ar label +.Op Fl -brief +.Op Fl -changed-group-format Ar GFMT +.Op Fl -ed +.Op Fl -expand-tabs +.Op Fl -forward-ed +.Op Fl -ignore-all-space +.Op Fl -ignore-case +.Op Fl -ignore-space-change +.Op Fl -initial-tab +.Op Fl -minimal +.Op Fl -no-ignore-file-name-case +.Op Fl -normal +.Op Fl -paginate +.Op Fl -rcs +.Op Fl -show-c-function +.Op Fl -speed-large-files +.Op Fl -starting-file +.Op Fl -strip-trailing-cr +.Op Fl -tabsize +.Op Fl -text +.Fl U Ar number | Fl -unified Ar number +.Ar file1 file2 +.Nm diff +.Op Fl abdilNPprsTtw +.Oo +.Fl c | e | f | +.Fl n | q | u +.Oc +.Op Fl -brief +.Op Fl -changed-group-format Ar GFMT +.Op Fl -context +.Op Fl -ed +.Op Fl -expand-tabs +.Op Fl -forward-ed +.Op Fl -ignore-all-space +.Op Fl -ignore-case +.Op Fl -ignore-space-change +.Op Fl -initial-tab +.Op Fl -minimal +.Op Fl -new-file +.Op Fl -no-ignore-file-name-case +.Op Fl -normal +.Op Fl -paginate +.Op Fl -rcs +.Op Fl -recursive +.Op Fl -report-identical-files +.Op Fl -show-c-function +.Op Fl -speed-large-files +.Op Fl -strip-trailing-cr +.Op Fl -tabsize +.Op Fl -text +.Op Fl -unidirectional-new-file +.Op Fl -unified +.Op Fl I Ar pattern | Fl -ignore-matching-lines Ar pattern +.Bk -words +.Op Fl L Ar label | Fl -label Ar label +.Op Fl S Ar name | Fl -starting-file Ar name +.Op Fl X Ar file | Fl -exclude-from Ar file +.Op Fl x Ar pattern | Fl -exclude Ar pattern +.Ek +.Ar dir1 dir2 +.Sh DESCRIPTION +The +.Nm +utility compares the contents of +.Ar file1 +and +.Ar file2 +and writes to the standard output the list of changes necessary to +convert one file into the other. +No output is produced if the files are identical. +.Pp +Output options (mutually exclusive): +.Bl -tag -width Ds +.It Fl C Ar number Fl -context Ar number +Like +.Fl c +but produces a diff with +.Ar number +lines of context. +.It Fl c +Produces a diff with 3 lines of context. +With +.Fl c +the output format is modified slightly: +the output begins with identification of the files involved and +their creation dates and then each change is separated +by a line with fifteen +.Li * Ns 's . +The lines removed from +.Ar file1 +are marked with +.Sq \&-\ \& ; +those added to +.Ar file2 +are marked +.Sq \+\ \& . +Lines which are changed from one file to the other are marked in +both files with +.Sq !\ \& . +Changes which lie within 3 lines of each other are grouped together on +output. +.It Fl D Ar string Fl -ifdef Ar string +Creates a merged version of +.Ar file1 +and +.Ar file2 +on the standard output, with C preprocessor controls included so that +a compilation of the result without defining +.Ar string +is equivalent to compiling +.Ar file1 , +while defining +.Ar string +will yield +.Ar file2 . +.It Fl e -ed +Produces output in a form suitable as input for the editor utility, +.Xr ed 1 , +which can then be used to convert file1 into file2. +.Pp +Extra commands are added to the output when comparing directories with +.Fl e , +so that the result is a +.Xr sh 1 +script for converting text files which are common to the two directories +from their state in +.Ar dir1 +to their state in +.Ar dir2 . +.It Fl f -forward-ed +Identical output to that of the +.Fl e +flag, but in reverse order. +It cannot be digested by +.Xr ed 1 . +.It Fl n +Produces a script similar to that of +.Fl e , +but in the opposite order and with a count of changed lines on each +insert or delete command. +This is the form used by +.Xr rcsdiff 1 . +.It Fl q -brief +Just print a line when the files differ. +Does not output a list of changes. +.It Fl U Ar number Fl -unified Ar number +Like +.Fl u +but produces a diff with +.Ar number +lines of context. +.It Fl u +Produces a +.Em unified +diff with 3 lines of context. +A unified diff is similar to the context diff produced by the +.Fl c +option. +However, unlike with +.Fl c , +all lines to be changed (added and/or removed) are present in +a single section. +.El +.Pp +Comparison options: +.Bl -tag -width Ds +.It Fl a -text +Treat all files as +.Tn ASCII +text. +Normally +.Nm +will simply print +.Dq Binary files ... differ +if files contain binary characters. +Use of this option forces +.Nm +to produce a diff. +.It Fl b +Causes trailing blanks (spaces and tabs) to be ignored, and other +strings of blanks to compare equal. +.It Fl d -minimal +Try very hard to produce a diff as small as possible. +This may consume a lot of processing power and memory when processing +large files with many changes. +.It Fl I Ar pattern Fl -ignore-matching-lines Ar pattern +Ignores changes, insertions, and deletions whose lines match the +extended regular expression +.Ar pattern . +Multiple +.Fl I +patterns may be specified. +All lines in the change must match some pattern for the change to be +ignored. +See +.Xr re_format 7 +for more information on regular expression patterns. +.It Fl i -ignore-case +Ignores the case of letters. +E.g., +.Dq A +will compare equal to +.Dq a . +.It Fl l -paginate +Pass the output through +.Xr pr 1 +to paginate it. +.It Fl L Ar label Fl -label Ar label +Print +.Ar label +instead of the first (and second, if this option is specified twice) +file name and time in the context or unified diff header. +.It Fl p -show-c-function +With unified and context diffs, show with each change +the first 40 characters of the last line before the context beginning +with a letter, an underscore or a dollar sign. +For C source code following standard layout conventions, this will +show the prototype of the function the change applies to. +.It Fl T -initial-tab +Print a tab rather than a space before the rest of the line for the +normal, context or unified output formats. +This makes the alignment of tabs in the line consistent. +.It Fl t -expand-tabs +Will expand tabs in output lines. +Normal or +.Fl c +output adds character(s) to the front of each line which may screw up +the indentation of the original source lines and make the output listing +difficult to interpret. +This option will preserve the original source's indentation. +.It Fl w -ignore-all-blanks +Is similar to +.Fl b -ignore-space-change +but causes whitespace (blanks and tabs) to be totally ignored. +E.g., +.Dq if (\ \&a == b \&) +will compare equal to +.Dq if(a==b) . +.It Fl -changed-group-format Ar GFMT +Format input groups in the provided +.Pp +the format is a string with special keywords: +.Bl -tag -width %< +.It %< +lines from FILE1 +.It %< +lines from FILE2 +.El +.It Fl -ignore-file-name-case +ignore case when comparing file names +.It Fl -no-ignore-file-name-case +do not ignore case wen comparing file names (default) +.It Fl -normal +default diff output +.It Fl -speed-large-files +stub option for compatibility with GNU diff +.It Fl -strip-trailing-cr +strip carriage return on input files +.It Fl tabsize Ar number +Number of spaces representing a tab (default 8) +.El +.Pp +Directory comparison options: +.Bl -tag -width Ds +.It Fl N -new-file +If a file is found in only one directory, act as if it was found in the +other directory too but was of zero size. +.It Fl P -unidirectional-new-file +If a file is found only in +.Ar dir2 , +act as if it was found in +.Ar dir1 +too but was of zero size. +.It Fl r -recursive +Causes application of +.Nm +recursively to common subdirectories encountered. +.It Fl S Ar name Fl -starting-file Ar name +Re-starts a directory +.Nm +in the middle, beginning with file +.Ar name . +.It Fl s -report-identical-files +Causes +.Nm +to report files which are the same, which are otherwise not mentioned. +.It Fl X Ar file Fl -exclude-from Ar file +Exclude files and subdirectories from comparison whose basenames match +lines in +.Ar file . +Multiple +.Fl X +options may be specified. +.It Fl x Ar pattern Fl -exclude Ar pattern +Exclude files and subdirectories from comparison whose basenames match +.Ar pattern . +Patterns are matched using shell-style globbing via +.Xr fnmatch 3 . +Multiple +.Fl x +options may be specified. +.El +.Pp +If both arguments are directories, +.Nm +sorts the contents of the directories by name, and then runs the +regular file +.Nm +algorithm, producing a change list, +on text files which are different. +Binary files which differ, +common subdirectories, and files which appear in only one directory +are described as such. +In directory mode only regular files and directories are compared. +If a non-regular file such as a device special file or +.Tn FIFO +is encountered, a diagnostic message is printed. +.Pp +If only one of +.Ar file1 +and +.Ar file2 +is a directory, +.Nm +is applied to the non-directory file and the file contained in +the directory file with a filename that is the same as the +last component of the non-directory file. +.Pp +If either +.Ar file1 +or +.Ar file2 +is +.Sq - , +the standard input is +used in its place. +.Ss Output Style +The default (without +.Fl e , +.Fl c , +or +.Fl n -rcs +.\" -C +options) +output contains lines of these forms, where +.Va XX , YY , ZZ , QQ +are line numbers respective of file order. +.Pp +.Bl -tag -width "XX,YYcZZ,QQ" -compact +.It Li XX Ns Ic a Ns Li YY +At (the end of) line +.Va XX +of +.Ar file1 , +append the contents +of line +.Va YY +of +.Ar file2 +to make them equal. +.It Li XX Ns Ic a Ns Li YY,ZZ +Same as above, but append the range of lines, +.Va YY +through +.Va ZZ +of +.Ar file2 +to line +.Va XX +of file1. +.It Li XX Ns Ic d Ns Li YY +At line +.Va XX +delete +the line. +The value +.Va YY +tells to which line the change would bring +.Ar file1 +in line with +.Ar file2 . +.It Li XX,YY Ns Ic d Ns Li ZZ +Delete the range of lines +.Va XX +through +.Va YY +in +.Ar file1 . +.It Li XX Ns Ic c Ns Li YY +Change the line +.Va XX +in +.Ar file1 +to the line +.Va YY +in +.Ar file2 . +.It Li XX,YY Ns Ic c Ns Li ZZ +Replace the range of specified lines with the line +.Va ZZ . +.It Li XX,YY Ns Ic c Ns Li ZZ,QQ +Replace the range +.Va XX , Ns Va YY +from +.Ar file1 +with the range +.Va ZZ , Ns Va QQ +from +.Ar file2 . +.El +.Pp +These lines resemble +.Xr ed 1 +subcommands to convert +.Ar file1 +into +.Ar file2 . +The line numbers before the action letters pertain to +.Ar file1 ; +those after pertain to +.Ar file2 . +Thus, by exchanging +.Ic a +for +.Ic d +and reading the line in reverse order, one can also +determine how to convert +.Ar file2 +into +.Ar file1 . +As in +.Xr ed 1 , +identical +pairs (where num1 = num2) are abbreviated as a single +number. +.Sh FILES +.Bl -tag -width /tmp/diff.XXXXXXXX -compact +.It Pa /tmp/diff. Ns Ar XXXXXXXX +Temporary file used when comparing a device or the standard input. +Note that the temporary file is unlinked as soon as it is created +so it will not show up in a directory listing. +.El +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It 0 +No differences were found. +.It 1 +Differences were found. +.It >1 +An error occurred. +.El +.Sh SEE ALSO +.Xr cmp 1 , +.Xr comm 1 , +.Xr diff3 1 , +.Xr ed 1 , +.Xr patch 1 , +.Xr pr 1 , +.Xr sdiff 1 +.Rs +.%A James W. Hunt +.%A M. Douglas McIlroy +.%T "An Algorithm for Differential File Comparison" +.%J Computing Science Technical Report +.%Q Bell Laboratories 41 +.%D June 1976 +.Re +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification. +.Pp +The flags +.Op Fl aDdIiLlNnPpqSsTtwXx +are extensions to that specification. +.Sh HISTORY +A +.Nm +command appeared in +.At v6 . diff --git a/usr.bin/diff/diff.c b/usr.bin/diff/diff.c new file mode 100644 index 00000000000..e12697c4e89 --- /dev/null +++ b/usr.bin/diff/diff.c @@ -0,0 +1,466 @@ +/* $OpenBSD: diff.c,v 1.65 2015/12/29 19:04:46 gsoares Exp $ */ + +/* + * Copyright (c) 2003 Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "diff.h" +#include "xmalloc.h" + +int lflag, Nflag, Pflag, rflag, sflag, Tflag, cflag; +int diff_format, diff_context, status, ignore_file_case; +int tabsize = 8; +char *start, *ifdefname, *diffargs, *label[2], *ignore_pats; +char *group_format = NULL; +struct stat stb1, stb2; +struct excludes *excludes_list; +regex_t ignore_re; + +#define OPTIONS "0123456789aBbC:cdD:efHhI:iL:lnNPpqrS:sTtU:uwX:x:" +enum { + OPT_TSIZE = CHAR_MAX + 1, + OPT_STRIPCR, + OPT_IGN_FN_CASE, + OPT_NO_IGN_FN_CASE, + OPT_NORMAL, + OPT_HORIZON_LINES, + OPT_CHANGED_GROUP_FORMAT, +}; + +static struct option longopts[] = { + { "text", no_argument, 0, 'a' }, + { "ignore-space-change", no_argument, 0, 'b' }, + { "context", optional_argument, 0, 'C' }, + { "ifdef", required_argument, 0, 'D' }, + { "minimal", no_argument, 0, 'd' }, + { "ed", no_argument, 0, 'e' }, + { "forward-ed", no_argument, 0, 'f' }, + { "speed-large-files", no_argument, NULL, 'H' }, + { "ignore-matching-lines", required_argument, 0, 'I' }, + { "ignore-case", no_argument, 0, 'i' }, + { "paginate", no_argument, NULL, 'l' }, + { "label", required_argument, 0, 'L' }, + { "new-file", no_argument, 0, 'N' }, + { "rcs", no_argument, 0, 'n' }, + { "unidirectional-new-file", no_argument, 0, 'P' }, + { "show-c-function", no_argument, 0, 'p' }, + { "brief", no_argument, 0, 'q' }, + { "recursive", no_argument, 0, 'r' }, + { "report-identical-files", no_argument, 0, 's' }, + { "starting-file", required_argument, 0, 'S' }, + { "expand-tabs", no_argument, 0, 't' }, + { "initial-tab", no_argument, 0, 'T' }, + { "unified", optional_argument, 0, 'U' }, + { "ignore-all-space", no_argument, 0, 'w' }, + { "exclude", required_argument, 0, 'x' }, + { "exclude-from", required_argument, 0, 'X' }, + { "ignore-file-name-case", no_argument, NULL, OPT_IGN_FN_CASE }, + { "horizon-lines", required_argument, NULL, OPT_HORIZON_LINES }, + { "no-ignore-file-name-case", no_argument, NULL, OPT_NO_IGN_FN_CASE }, + { "normal", no_argument, NULL, OPT_NORMAL }, + { "strip-trailing-cr", no_argument, NULL, OPT_STRIPCR }, + { "tabsize", optional_argument, NULL, OPT_TSIZE }, + { "changed-group-format", required_argument, NULL, OPT_CHANGED_GROUP_FORMAT}, + { NULL, 0, 0, '\0'} +}; + +void usage(void) __dead2; +void push_excludes(char *); +void push_ignore_pats(char *); +void read_excludes_file(char *file); +void set_argstr(char **, char **); + +int +main(int argc, char **argv) +{ + const char *errstr = NULL; + char *ep, **oargv; + long l; + int ch, dflags, lastch, gotstdin, prevoptind, newarg; + + oargv = argv; + gotstdin = 0; + dflags = 0; + lastch = '\0'; + prevoptind = 1; + newarg = 1; + diff_context = 3; + diff_format = 0; + while ((ch = getopt_long(argc, argv, OPTIONS, longopts, NULL)) != -1) { + switch (ch) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (newarg) + usage(); /* disallow -[0-9]+ */ + else if (lastch == 'c' || lastch == 'u') + diff_context = 0; + else if (!isdigit(lastch) || diff_context > INT_MAX / 10) + usage(); + diff_context = (diff_context * 10) + (ch - '0'); + break; + case 'a': + dflags |= D_FORCEASCII; + break; + case 'b': + dflags |= D_FOLDBLANKS; + break; + case 'C': + case 'c': + cflag = 1; + diff_format = D_CONTEXT; + if (optarg != NULL) { + l = strtol(optarg, &ep, 10); + if (*ep != '\0' || l < 0 || l >= INT_MAX) + usage(); + diff_context = (int)l; + } + break; + case 'd': + dflags |= D_MINIMAL; + break; + case 'D': + diff_format = D_IFDEF; + ifdefname = optarg; + break; + case 'e': + diff_format = D_EDIT; + break; + case 'f': + diff_format = D_REVERSE; + break; + case 'H': + /* ignore but needed for compatibility with GNU diff */ + break; + case 'h': + /* silently ignore for backwards compatibility */ + break; + case 'I': + push_ignore_pats(optarg); + break; + case 'i': + dflags |= D_IGNORECASE; + break; + case 'L': + if (label[0] == NULL) + label[0] = optarg; + else if (label[1] == NULL) + label[1] = optarg; + else + usage(); + break; + case 'l': + lflag = 1; + break; + case 'N': + Nflag = 1; + break; + case 'n': + diff_format = D_NREVERSE; + break; + case 'p': + if (diff_format == 0) + diff_format = D_CONTEXT; + dflags |= D_PROTOTYPE; + break; + case 'P': + Pflag = 1; + break; + case 'r': + rflag = 1; + break; + case 'q': + diff_format = D_BRIEF; + break; + case 'S': + start = optarg; + break; + case 's': + sflag = 1; + break; + case 'T': + Tflag = 1; + break; + case 't': + dflags |= D_EXPANDTABS; + break; + case 'U': + case 'u': + diff_format = D_UNIFIED; + if (optarg != NULL) { + l = strtol(optarg, &ep, 10); + if (*ep != '\0' || l < 0 || l >= INT_MAX) + usage(); + diff_context = (int)l; + } + break; + case 'w': + dflags |= D_IGNOREBLANKS; + break; + case 'X': + read_excludes_file(optarg); + break; + case 'x': + push_excludes(optarg); + break; + case OPT_CHANGED_GROUP_FORMAT: + diff_format = D_GFORMAT; + group_format = optarg; + break; + case OPT_HORIZON_LINES: + break; /* XXX TODO for compatibility with GNU diff3 */ + case OPT_IGN_FN_CASE: + ignore_file_case = 1; + break; + case OPT_NO_IGN_FN_CASE: + ignore_file_case = 0; + break; + case OPT_NORMAL: + diff_format = D_NORMAL; + break; + case OPT_TSIZE: + tabsize = (int) strtonum(optarg, 1, INT_MAX, &errstr); + if (errstr) { + warnx("Invalid argument for tabsize"); + usage(); + } + break; + case OPT_STRIPCR: + dflags |= D_STRIPCR; + break; + default: + usage(); + break; + } + lastch = ch; + newarg = optind != prevoptind; + prevoptind = optind; + } + argc -= optind; + argv += optind; + +#ifdef __OpenBSD__ + if (pledge("stdio rpath tmppath", NULL) == -1) + err(2, "pledge"); +#endif + + /* + * Do sanity checks, fill in stb1 and stb2 and call the appropriate + * driver routine. Both drivers use the contents of stb1 and stb2. + */ + if (argc != 2) + usage(); + if (ignore_pats != NULL) { + char buf[BUFSIZ]; + int error; + + if ((error = regcomp(&ignore_re, ignore_pats, + REG_NEWLINE | REG_EXTENDED)) != 0) { + regerror(error, &ignore_re, buf, sizeof(buf)); + if (*ignore_pats != '\0') + errx(2, "%s: %s", ignore_pats, buf); + else + errx(2, "%s", buf); + } + } + if (strcmp(argv[0], "-") == 0) { + fstat(STDIN_FILENO, &stb1); + gotstdin = 1; + } else if (stat(argv[0], &stb1) != 0) + err(2, "%s", argv[0]); + if (strcmp(argv[1], "-") == 0) { + fstat(STDIN_FILENO, &stb2); + gotstdin = 1; + } else if (stat(argv[1], &stb2) != 0) + err(2, "%s", argv[1]); + if (gotstdin && (S_ISDIR(stb1.st_mode) || S_ISDIR(stb2.st_mode))) + errx(2, "can't compare - to a directory"); + set_argstr(oargv, argv); + if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { + if (diff_format == D_IFDEF) + errx(2, "-D option not supported with directories"); + diffdir(argv[0], argv[1], dflags); + } else { + if (S_ISDIR(stb1.st_mode)) { + argv[0] = splice(argv[0], argv[1]); + if (stat(argv[0], &stb1) < 0) + err(2, "%s", argv[0]); + } + if (S_ISDIR(stb2.st_mode)) { + argv[1] = splice(argv[1], argv[0]); + if (stat(argv[1], &stb2) < 0) + err(2, "%s", argv[1]); + } + print_status(diffreg(argv[0], argv[1], dflags, 1), argv[0], + argv[1], ""); + } + exit(status); +} + +void +set_argstr(char **av, char **ave) +{ + size_t argsize; + char **ap; + + argsize = 4 + *ave - *av + 1; + diffargs = xmalloc(argsize); + strlcpy(diffargs, "diff", argsize); + for (ap = av + 1; ap < ave; ap++) { + if (strcmp(*ap, "--") != 0) { + strlcat(diffargs, " ", argsize); + strlcat(diffargs, *ap, argsize); + } + } +} + +/* + * Read in an excludes file and push each line. + */ +void +read_excludes_file(char *file) +{ + FILE *fp; + char *buf, *pattern; + size_t len; + + if (strcmp(file, "-") == 0) + fp = stdin; + else if ((fp = fopen(file, "r")) == NULL) + err(2, "%s", file); + while ((buf = fgetln(fp, &len)) != NULL) { + if (buf[len - 1] == '\n') + len--; + if ((pattern = strndup(buf, len)) == NULL) + err(2, "xstrndup"); + push_excludes(pattern); + } + if (strcmp(file, "-") != 0) + fclose(fp); +} + +/* + * Push a pattern onto the excludes list. + */ +void +push_excludes(char *pattern) +{ + struct excludes *entry; + + entry = xmalloc(sizeof(*entry)); + entry->pattern = pattern; + entry->next = excludes_list; + excludes_list = entry; +} + +void +push_ignore_pats(char *pattern) +{ + size_t len; + + if (ignore_pats == NULL) + ignore_pats = xstrdup(pattern); + else { + /* old + "|" + new + NUL */ + len = strlen(ignore_pats) + strlen(pattern) + 2; + ignore_pats = xreallocarray(ignore_pats, 1, len); + strlcat(ignore_pats, "|", len); + strlcat(ignore_pats, pattern, len); + } +} + +void +print_only(const char *path, size_t dirlen, const char *entry) +{ + if (dirlen > 1) + dirlen--; + printf("Only in %.*s: %s\n", (int)dirlen, path, entry); +} + +void +print_status(int val, char *path1, char *path2, const char *entry) +{ + switch (val) { + case D_BINARY: + printf("Binary files %s%s and %s%s differ\n", + path1, entry, path2, entry); + break; + case D_DIFFER: + if (diff_format == D_BRIEF) + printf("Files %s%s and %s%s differ\n", + path1, entry, path2, entry); + break; + case D_SAME: + if (sflag) + printf("Files %s%s and %s%s are identical\n", + path1, entry, path2, entry); + break; + case D_MISMATCH1: + printf("File %s%s is a directory while file %s%s is a regular file\n", + path1, entry, path2, entry); + break; + case D_MISMATCH2: + printf("File %s%s is a regular file while file %s%s is a directory\n", + path1, entry, path2, entry); + break; + case D_SKIPPED1: + printf("File %s%s is not a regular file or directory and was skipped\n", + path1, entry); + break; + case D_SKIPPED2: + printf("File %s%s is not a regular file or directory and was skipped\n", + path2, entry); + break; + } +} + +void +usage(void) +{ + (void)fprintf(stderr, + "usage: diff [-abdilpTtw] [-c | -e | -f | -n | -q | -u] [--ignore-case]\n" + " [--no-ignore-case] [--normal] [--strip-trailing-cr] [--tabsize]\n" + " [-I pattern] [-L label] file1 file2\n" + " diff [-abdilpTtw] [-I pattern] [-L label] [--ignore-case]\n" + " [--no-ignore-case] [--normal] [--strip-trailing-cr] [--tabsize]\n" + " -C number file1 file2\n" + " diff [-abdiltw] [-I pattern] [--ignore-case] [--no-ignore-case]\n" + " [--normal] [--strip-trailing-cr] [--tabsize] -D string file1 file2\n" + " diff [-abdilpTtw] [-I pattern] [-L label] [--ignore-case]\n" + " [--no-ignore-case] [--normal] [--tabsize] [--strip-trailing-cr]\n" + " -U number file1 file2\n" + " diff [-abdilNPprsTtw] [-c | -e | -f | -n | -q | -u] [--ignore-case]\n" + " [--no-ignore-case] [--normal] [--tabsize] [-I pattern] [-L label]\n" + " [-S name] [-X file] [-x pattern] dir1 dir2\n"); + + exit(2); +} diff --git a/usr.bin/diff/diff.h b/usr.bin/diff/diff.h new file mode 100644 index 00000000000..87452058220 --- /dev/null +++ b/usr.bin/diff/diff.h @@ -0,0 +1,103 @@ + + +/*ROR + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)diff.h 8.1 (Berkeley) 6/6/93 + * $FreeBSD$ + */ + +#include +#include + +/* + * Output format options + */ +#define D_NORMAL 0 /* Normal output */ +#define D_EDIT -1 /* Editor script out */ +#define D_REVERSE 1 /* Reverse editor script */ +#define D_CONTEXT 2 /* Diff with context */ +#define D_UNIFIED 3 /* Unified context diff */ +#define D_IFDEF 4 /* Diff with merged #ifdef's */ +#define D_NREVERSE 5 /* Reverse ed script with numbered + lines and no trailing . */ +#define D_BRIEF 6 /* Say if the files differ */ +#define D_GFORMAT 7 /* Diff with defined changed group format */ + +/* + * Output flags + */ +#define D_HEADER 0x001 /* Print a header/footer between files */ +#define D_EMPTY1 0x002 /* Treat first file as empty (/dev/null) */ +#define D_EMPTY2 0x004 /* Treat second file as empty (/dev/null) */ + +/* + * Command line flags + */ +#define D_FORCEASCII 0x008 /* Treat file as ascii regardless of content */ +#define D_FOLDBLANKS 0x010 /* Treat all white space as equal */ +#define D_MINIMAL 0x020 /* Make diff as small as possible */ +#define D_IGNORECASE 0x040 /* Case-insensitive matching */ +#define D_PROTOTYPE 0x080 /* Display C function prototype */ +#define D_EXPANDTABS 0x100 /* Expand tabs to spaces */ +#define D_IGNOREBLANKS 0x200 /* Ignore white space changes */ +#define D_STRIPCR 0x400 /* Strip trailing cr */ + +/* + * Status values for print_status() and diffreg() return values + */ +#define D_SAME 0 /* Files are the same */ +#define D_DIFFER 1 /* Files are different */ +#define D_BINARY 2 /* Binary files are different */ +#define D_MISMATCH1 3 /* path1 was a dir, path2 a file */ +#define D_MISMATCH2 4 /* path1 was a file, path2 a dir */ +#define D_SKIPPED1 5 /* path1 was a special file */ +#define D_SKIPPED2 6 /* path2 was a special file */ + +struct excludes { + char *pattern; + struct excludes *next; +}; + +extern int lflag, Nflag, Pflag, rflag, sflag, Tflag, cflag; +extern int diff_format, diff_context, status, ignore_file_case; +extern int tabsize; +extern char *start, *ifdefname, *diffargs, *label[2], *ignore_pats; +extern char *group_format; +extern struct stat stb1, stb2; +extern struct excludes *excludes_list; +extern regex_t ignore_re; + +char *splice(char *, char *); +int diffreg(char *, char *, int, int); +int easprintf(char **, const char *, ...); +void *emalloc(size_t); +void *erealloc(void *, size_t); +void diffdir(char *, char *, int); +void print_only(const char *, size_t, const char *); +void print_status(int, char *, char *, const char *); diff --git a/usr.bin/diff/diffdir.c b/usr.bin/diff/diffdir.c new file mode 100644 index 00000000000..a022bffc115 --- /dev/null +++ b/usr.bin/diff/diffdir.c @@ -0,0 +1,239 @@ +/* $OpenBSD: diffdir.c,v 1.45 2015/10/05 20:15:00 millert Exp $ */ + +/* + * Copyright (c) 2003, 2010 Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "diff.h" + +static int selectfile(const struct dirent *); +static void diffit(struct dirent *, char *, size_t, char *, size_t, int); + +#define d_status d_type /* we need to store status for -l */ + +/* + * Diff directory traversal. Will be called recursively if -r was specified. + */ +void +diffdir(char *p1, char *p2, int flags) +{ + struct dirent *dent1, **dp1, **edp1, **dirp1 = NULL; + struct dirent *dent2, **dp2, **edp2, **dirp2 = NULL; + size_t dirlen1, dirlen2; + char path1[PATH_MAX], path2[PATH_MAX]; + int pos; + + edp1 = edp2 = NULL; + + dirlen1 = strlcpy(path1, *p1 ? p1 : ".", sizeof(path1)); + if (dirlen1 >= sizeof(path1) - 1) { + warnc(ENAMETOOLONG, "%s", p1); + status = 2; + return; + } + if (path1[dirlen1 - 1] != '/') { + path1[dirlen1++] = '/'; + path1[dirlen1] = '\0'; + } + dirlen2 = strlcpy(path2, *p2 ? p2 : ".", sizeof(path2)); + if (dirlen2 >= sizeof(path2) - 1) { + warnc(ENAMETOOLONG, "%s", p2); + status = 2; + return; + } + if (path2[dirlen2 - 1] != '/') { + path2[dirlen2++] = '/'; + path2[dirlen2] = '\0'; + } + + /* + * Get a list of entries in each directory, skipping "excluded" files + * and sorting alphabetically. + */ + pos = scandir(path1, &dirp1, selectfile, alphasort); + if (pos == -1) { + if (errno == ENOENT && (Nflag || Pflag)) { + pos = 0; + } else { + warn("%s", path1); + goto closem; + } + } + dp1 = dirp1; + edp1 = dirp1 + pos; + + pos = scandir(path2, &dirp2, selectfile, alphasort); + if (pos == -1) { + if (errno == ENOENT && Nflag) { + pos = 0; + } else { + warn("%s", path2); + goto closem; + } + } + dp2 = dirp2; + edp2 = dirp2 + pos; + + /* + * If we were given a starting point, find it. + */ + if (start != NULL) { + while (dp1 != edp1 && strcmp((*dp1)->d_name, start) < 0) + dp1++; + while (dp2 != edp2 && strcmp((*dp2)->d_name, start) < 0) + dp2++; + } + + /* + * Iterate through the two directory lists, diffing as we go. + */ + while (dp1 != edp1 || dp2 != edp2) { + dent1 = dp1 != edp1 ? *dp1 : NULL; + dent2 = dp2 != edp2 ? *dp2 : NULL; + + pos = dent1 == NULL ? 1 : dent2 == NULL ? -1 : + ignore_file_case ? strcasecmp(dent1->d_name, dent2->d_name) : + strcmp(dent1->d_name, dent2->d_name) ; + if (pos == 0) { + /* file exists in both dirs, diff it */ + diffit(dent1, path1, dirlen1, path2, dirlen2, flags); + dp1++; + dp2++; + } else if (pos < 0) { + /* file only in first dir, only diff if -N */ + if (Nflag) + diffit(dent1, path1, dirlen1, path2, dirlen2, + flags); + else { + print_only(path1, dirlen1, dent1->d_name); + status = 1; + } + dp1++; + } else { + /* file only in second dir, only diff if -N or -P */ + if (Nflag || Pflag) + diffit(dent2, path1, dirlen1, path2, dirlen2, + flags); + else { + print_only(path2, dirlen2, dent2->d_name); + status = 1; + } + dp2++; + } + } + +closem: + if (dirp1 != NULL) { + for (dp1 = dirp1; dp1 < edp1; dp1++) + free(*dp1); + free(dirp1); + } + if (dirp2 != NULL) { + for (dp2 = dirp2; dp2 < edp2; dp2++) + free(*dp2); + free(dirp2); + } +} + +/* + * Do the actual diff by calling either diffreg() or diffdir(). + */ +static void +diffit(struct dirent *dp, char *path1, size_t plen1, char *path2, size_t plen2, + int flags) +{ + flags |= D_HEADER; + strlcpy(path1 + plen1, dp->d_name, PATH_MAX - plen1); + if (stat(path1, &stb1) != 0) { + if (!(Nflag || Pflag) || errno != ENOENT) { + warn("%s", path1); + return; + } + flags |= D_EMPTY1; + memset(&stb1, 0, sizeof(stb1)); + } + + strlcpy(path2 + plen2, dp->d_name, PATH_MAX - plen2); + if (stat(path2, &stb2) != 0) { + if (!Nflag || errno != ENOENT) { + warn("%s", path2); + return; + } + flags |= D_EMPTY2; + memset(&stb2, 0, sizeof(stb2)); + stb2.st_mode = stb1.st_mode; + } + if (stb1.st_mode == 0) + stb1.st_mode = stb2.st_mode; + + if (S_ISDIR(stb1.st_mode) && S_ISDIR(stb2.st_mode)) { + if (rflag) + diffdir(path1, path2, flags); + else + printf("Common subdirectories: %s and %s\n", + path1, path2); + return; + } + if (!S_ISREG(stb1.st_mode) && !S_ISDIR(stb1.st_mode)) + dp->d_status = D_SKIPPED1; + else if (!S_ISREG(stb2.st_mode) && !S_ISDIR(stb2.st_mode)) + dp->d_status = D_SKIPPED2; + else + dp->d_status = diffreg(path1, path2, flags, 0); + print_status(dp->d_status, path1, path2, ""); +} + +/* + * Returns 1 if the directory entry should be included in the + * diff, else 0. Checks the excludes list. + */ +static int +selectfile(const struct dirent *dp) +{ + struct excludes *excl; + + if (dp->d_fileno == 0) + return (0); + + /* always skip "." and ".." */ + if (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' || + (dp->d_name[1] == '.' && dp->d_name[2] == '\0'))) + return (0); + + /* check excludes list */ + for (excl = excludes_list; excl != NULL; excl = excl->next) + if (fnmatch(excl->pattern, dp->d_name, FNM_PATHNAME) == 0) + return (0); + + return (1); +} diff --git a/usr.bin/diff/diffreg.c b/usr.bin/diff/diffreg.c new file mode 100644 index 00000000000..525cb4ec946 --- /dev/null +++ b/usr.bin/diff/diffreg.c @@ -0,0 +1,1628 @@ +/* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */ + +/* + * Copyright (C) Caldera International Inc. 2001-2002. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code and documentation must retain the above + * copyright notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * 4. Neither the name of Caldera International, Inc. nor the names of other + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, + * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "diff.h" +#include "xmalloc.h" + +#define _PATH_PR "/usr/bin/pr" + +/* + * diff - compare two files. + */ + +/* + * Uses an algorithm due to Harold Stone, which finds + * a pair of longest identical subsequences in the two + * files. + * + * The major goal is to generate the match vector J. + * J[i] is the index of the line in file1 corresponding + * to line i file0. J[i] = 0 if there is no + * such line in file1. + * + * Lines are hashed so as to work in core. All potential + * matches are located by sorting the lines of each file + * on the hash (called ``value''). In particular, this + * collects the equivalence classes in file1 together. + * Subroutine equiv replaces the value of each line in + * file0 by the index of the first element of its + * matching equivalence in (the reordered) file1. + * To save space equiv squeezes file1 into a single + * array member in which the equivalence classes + * are simply concatenated, except that their first + * members are flagged by changing sign. + * + * Next the indices that point into member are unsorted into + * array class according to the original order of file0. + * + * The cleverness lies in routine stone. This marches + * through the lines of file0, developing a vector klist + * of "k-candidates". At step i a k-candidate is a matched + * pair of lines x,y (x in file0 y in file1) such that + * there is a common subsequence of length k + * between the first i lines of file0 and the first y + * lines of file1, but there is no such subsequence for + * any smaller y. x is the earliest possible mate to y + * that occurs in such a subsequence. + * + * Whenever any of the members of the equivalence class of + * lines in file1 matable to a line in file0 has serial number + * less than the y of some k-candidate, that k-candidate + * with the smallest such y is replaced. The new + * k-candidate is chained (via pred) to the current + * k-1 candidate so that the actual subsequence can + * be recovered. When a member has serial number greater + * that the y of all k-candidates, the klist is extended. + * At the end, the longest subsequence is pulled out + * and placed in the array J by unravel + * + * With J in hand, the matches there recorded are + * check'ed against reality to assure that no spurious + * matches have crept in due to hashing. If they have, + * they are broken, and "jackpot" is recorded--a harmless + * matter except that a true match for a spuriously + * mated line may now be unnecessarily reported as a change. + * + * Much of the complexity of the program comes simply + * from trying to minimize core utilization and + * maximize the range of doable problems by dynamically + * allocating what is needed and reusing what is not. + * The core requirements for problems larger than somewhat + * are (in words) 2*length(file0) + length(file1) + + * 3*(number of k-candidates installed), typically about + * 6n words for files of length n. + */ + +struct cand { + int x; + int y; + int pred; +}; + +static struct line { + int serial; + int value; +} *file[2]; + +/* + * The following struct is used to record change information when + * doing a "context" or "unified" diff. (see routine "change" to + * understand the highly mnemonic field names) + */ +struct context_vec { + int a; /* start line in old file */ + int b; /* end line in old file */ + int c; /* start line in new file */ + int d; /* end line in new file */ +}; + +#define diff_output printf +static FILE *opentemp(const char *); +static void output(char *, FILE *, char *, FILE *, int); +static void check(FILE *, FILE *, int); +static void range(int, int, const char *); +static void uni_range(int, int); +static void dump_context_vec(FILE *, FILE *, int); +static void dump_unified_vec(FILE *, FILE *, int); +static void prepare(int, FILE *, size_t, int); +static void prune(void); +static void equiv(struct line *, int, struct line *, int, int *); +static void unravel(int); +static void unsort(struct line *, int, int *); +static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); +static void sort(struct line *, int); +static void print_header(const char *, const char *); +static int ignoreline(char *); +static int asciifile(FILE *); +static int fetch(long *, int, int, FILE *, int, int, int); +static int newcand(int, int, int); +static int search(int *, int, int); +static int skipline(FILE *); +static int isqrt(int); +static int stone(int *, int, int *, int *, int); +static int readhash(FILE *, int); +static int files_differ(FILE *, FILE *, int); +static char *match_function(const long *, int, FILE *); +static char *preadline(int, size_t, off_t); + +static int *J; /* will be overlaid on class */ +static int *class; /* will be overlaid on file[0] */ +static int *klist; /* will be overlaid on file[0] after class */ +static int *member; /* will be overlaid on file[1] */ +static int clen; +static int inifdef; /* whether or not we are in a #ifdef block */ +static int len[2]; +static int pref, suff; /* length of prefix and suffix */ +static int slen[2]; +static int anychange; +static long *ixnew; /* will be overlaid on file[1] */ +static long *ixold; /* will be overlaid on klist */ +static struct cand *clist; /* merely a free storage pot for candidates */ +static int clistlen; /* the length of clist */ +static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ +static int (*chrtran)(int); /* translation table for case-folding */ +static struct context_vec *context_vec_start; +static struct context_vec *context_vec_end; +static struct context_vec *context_vec_ptr; + +#define FUNCTION_CONTEXT_SIZE 55 +static char lastbuf[FUNCTION_CONTEXT_SIZE]; +static int lastline; +static int lastmatchline; + +static int +clow2low(int c) +{ + + return (c); +} + +static int +cup2low(int c) +{ + + return tolower(c); +} + +int +diffreg(char *file1, char *file2, int flags, int capsicum) +{ + FILE *f1, *f2; + int i, rval; + int ostdout = -1; + int pr_pd, kq; + struct kevent *e; + cap_rights_t rights_ro; + + e = NULL; + kq = -1; + f1 = f2 = NULL; + rval = D_SAME; + anychange = 0; + lastline = 0; + lastmatchline = 0; + context_vec_ptr = context_vec_start - 1; + if (flags & D_IGNORECASE) + chrtran = cup2low; + else + chrtran = clow2low; + if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) + return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); + if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) + goto closem; + + if (flags & D_EMPTY1) + f1 = fopen(_PATH_DEVNULL, "r"); + else { + if (!S_ISREG(stb1.st_mode)) { + if ((f1 = opentemp(file1)) == NULL || + fstat(fileno(f1), &stb1) < 0) { + warn("%s", file1); + status |= 2; + goto closem; + } + } else if (strcmp(file1, "-") == 0) + f1 = stdin; + else + f1 = fopen(file1, "r"); + } + if (f1 == NULL) { + warn("%s", file1); + status |= 2; + goto closem; + } + + if (flags & D_EMPTY2) + f2 = fopen(_PATH_DEVNULL, "r"); + else { + if (!S_ISREG(stb2.st_mode)) { + if ((f2 = opentemp(file2)) == NULL || + fstat(fileno(f2), &stb2) < 0) { + warn("%s", file2); + status |= 2; + goto closem; + } + } else if (strcmp(file2, "-") == 0) + f2 = stdin; + else + f2 = fopen(file2, "r"); + } + if (f2 == NULL) { + warn("%s", file2); + status |= 2; + goto closem; + } + + if (lflag) { + /* redirect stdout to pr */ + int pfd[2]; + pid_t pid; + char *header; + + xasprintf(&header, "%s %s %s", diffargs, file1, file2); + signal(SIGPIPE, SIG_IGN); + fflush(stdout); + rewind(stdout); + pipe(pfd); + switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) { + case -1: + status |= 2; + free(header); + err(2, "No more processes"); + case 0: + /* child */ + if (pfd[0] != STDIN_FILENO) { + dup2(pfd[0], STDIN_FILENO); + close(pfd[0]); + } + close(pfd[1]); + execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0); + _exit(127); + default: + + /* parent */ + if (pfd[1] != STDOUT_FILENO) { + ostdout = dup(STDOUT_FILENO); + dup2(pfd[1], STDOUT_FILENO); + close(pfd[1]); + } + close(pfd[0]); + rewind(stdout); + free(header); + kq = kqueue(); + if (kq == -1) + err(2, "kqueue"); + e = xmalloc(sizeof(struct kevent)); + EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0, + NULL); + if (kevent(kq, e, 1, NULL, 0, NULL) == -1) + err(2, "kevent"); + } + } + + if (capsicum) { + cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK); + if (cap_rights_limit(fileno(f1), &rights_ro) < 0 + && errno != ENOSYS) + err(2, "unable to limit rights on: %s", file1); + if (cap_rights_limit(fileno(f2), &rights_ro) < 0 && + errno != ENOSYS) + err(2, "unable to limit rights on: %s", file2); + if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) { + /* stding has already been limited */ + if (caph_limit_stderr() == -1) + err(2, "unable to limit stderr"); + if (caph_limit_stdout() == -1) + err(2, "unable to limit stdout"); + } else if (caph_limit_stdio() == -1) + err(2, "unable to limit stdio"); + + caph_cache_catpages(); + caph_cache_tzdata(); + if (cap_enter() < 0 && errno != ENOSYS) + err(2, "unable to enter capability mode"); + } + + switch (files_differ(f1, f2, flags)) { + case 0: + goto closem; + case 1: + break; + default: + /* error */ + status |= 2; + goto closem; + } + + if ((flags & D_FORCEASCII) == 0 && + (!asciifile(f1) || !asciifile(f2))) { + rval = D_BINARY; + status |= 1; + goto closem; + } + prepare(0, f1, stb1.st_size, flags); + prepare(1, f2, stb2.st_size, flags); + + prune(); + sort(sfile[0], slen[0]); + sort(sfile[1], slen[1]); + + member = (int *)file[1]; + equiv(sfile[0], slen[0], sfile[1], slen[1], member); + member = xreallocarray(member, slen[1] + 2, sizeof(*member)); + + class = (int *)file[0]; + unsort(sfile[0], slen[0], class); + class = xreallocarray(class, slen[0] + 2, sizeof(*class)); + + klist = xcalloc(slen[0] + 2, sizeof(*klist)); + clen = 0; + clistlen = 100; + clist = xcalloc(clistlen, sizeof(*clist)); + i = stone(class, slen[0], member, klist, flags); + free(member); + free(class); + + J = xreallocarray(J, len[0] + 2, sizeof(*J)); + unravel(klist[i]); + free(clist); + free(klist); + + ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold)); + ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew)); + check(f1, f2, flags); + output(file1, f1, file2, f2, flags); + if (ostdout != -1 && e != NULL) { + /* close the pipe to pr and restore stdout */ + int wstatus; + + fflush(stdout); + if (ostdout != STDOUT_FILENO) { + close(STDOUT_FILENO); + dup2(ostdout, STDOUT_FILENO); + close(ostdout); + } + if (kevent(kq, NULL, 0, e, 1, NULL) == -1) + err(2, "kevent"); + wstatus = e[0].data; + close(kq); + if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) + errx(2, "pr exited abnormally"); + else if (WIFSIGNALED(wstatus)) + errx(2, "pr killed by signal %d", + WTERMSIG(wstatus)); + } + +closem: + if (anychange) { + status |= 1; + if (rval == D_SAME) + rval = D_DIFFER; + } + if (f1 != NULL) + fclose(f1); + if (f2 != NULL) + fclose(f2); + + return (rval); +} + +/* + * Check to see if the given files differ. + * Returns 0 if they are the same, 1 if different, and -1 on error. + * XXX - could use code from cmp(1) [faster] + */ +static int +files_differ(FILE *f1, FILE *f2, int flags) +{ + char buf1[BUFSIZ], buf2[BUFSIZ]; + size_t i, j; + + if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || + (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) + return (1); + for (;;) { + i = fread(buf1, 1, sizeof(buf1), f1); + j = fread(buf2, 1, sizeof(buf2), f2); + if ((!i && ferror(f1)) || (!j && ferror(f2))) + return (-1); + if (i != j) + return (1); + if (i == 0) + return (0); + if (memcmp(buf1, buf2, i) != 0) + return (1); + } +} + +static FILE * +opentemp(const char *f) +{ + char buf[BUFSIZ], tempfile[PATH_MAX]; + ssize_t nread; + int ifd, ofd; + + if (strcmp(f, "-") == 0) + ifd = STDIN_FILENO; + else if ((ifd = open(f, O_RDONLY, 0644)) < 0) + return (NULL); + + (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile)); + + if ((ofd = mkstemp(tempfile)) < 0) { + close(ifd); + return (NULL); + } + unlink(tempfile); + while ((nread = read(ifd, buf, BUFSIZ)) > 0) { + if (write(ofd, buf, nread) != nread) { + close(ifd); + close(ofd); + return (NULL); + } + } + close(ifd); + lseek(ofd, (off_t)0, SEEK_SET); + return (fdopen(ofd, "r")); +} + +char * +splice(char *dir, char *path) +{ + char *tail, *buf; + size_t dirlen; + + dirlen = strlen(dir); + while (dirlen != 0 && dir[dirlen - 1] == '/') + dirlen--; + if ((tail = strrchr(path, '/')) == NULL) + tail = path; + else + tail++; + xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail); + return (buf); +} + +static void +prepare(int i, FILE *fd, size_t filesize, int flags) +{ + struct line *p; + int h; + size_t sz, j; + + rewind(fd); + + sz = MIN(filesize, SIZE_MAX) / 25; + if (sz < 100) + sz = 100; + + p = xcalloc(sz + 3, sizeof(*p)); + for (j = 0; (h = readhash(fd, flags));) { + if (j == sz) { + sz = sz * 3 / 2; + p = xreallocarray(p, sz + 3, sizeof(*p)); + } + p[++j].value = h; + } + len[i] = j; + file[i] = p; +} + +static void +prune(void) +{ + int i, j; + + for (pref = 0; pref < len[0] && pref < len[1] && + file[0][pref + 1].value == file[1][pref + 1].value; + pref++) + ; + for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && + file[0][len[0] - suff].value == file[1][len[1] - suff].value; + suff++) + ; + for (j = 0; j < 2; j++) { + sfile[j] = file[j] + pref; + slen[j] = len[j] - pref - suff; + for (i = 0; i <= slen[j]; i++) + sfile[j][i].serial = i; + } +} + +static void +equiv(struct line *a, int n, struct line *b, int m, int *c) +{ + int i, j; + + i = j = 1; + while (i <= n && j <= m) { + if (a[i].value < b[j].value) + a[i++].value = 0; + else if (a[i].value == b[j].value) + a[i++].value = j; + else + j++; + } + while (i <= n) + a[i++].value = 0; + b[m + 1].value = 0; + j = 0; + while (++j <= m) { + c[j] = -b[j].serial; + while (b[j + 1].value == b[j].value) { + j++; + c[j] = b[j].serial; + } + } + c[j] = -1; +} + +/* Code taken from ping.c */ +static int +isqrt(int n) +{ + int y, x = 1; + + if (n == 0) + return (0); + + do { /* newton was a stinker */ + y = x; + x = n / x; + x += y; + x /= 2; + } while ((x - y) > 1 || (x - y) < -1); + + return (x); +} + +static int +stone(int *a, int n, int *b, int *c, int flags) +{ + int i, k, y, j, l; + int oldc, tc, oldl, sq; + u_int numtries, bound; + + if (flags & D_MINIMAL) + bound = UINT_MAX; + else { + sq = isqrt(n); + bound = MAX(256, sq); + } + + k = 0; + c[0] = newcand(0, 0, 0); + for (i = 1; i <= n; i++) { + j = a[i]; + if (j == 0) + continue; + y = -b[j]; + oldl = 0; + oldc = c[0]; + numtries = 0; + do { + if (y <= clist[oldc].y) + continue; + l = search(c, k, y); + if (l != oldl + 1) + oldc = c[l - 1]; + if (l <= k) { + if (clist[c[l]].y <= y) + continue; + tc = c[l]; + c[l] = newcand(i, y, oldc); + oldc = tc; + oldl = l; + numtries++; + } else { + c[l] = newcand(i, y, oldc); + k++; + break; + } + } while ((y = b[++j]) > 0 && numtries < bound); + } + return (k); +} + +static int +newcand(int x, int y, int pred) +{ + struct cand *q; + + if (clen == clistlen) { + clistlen = clistlen * 11 / 10; + clist = xreallocarray(clist, clistlen, sizeof(*clist)); + } + q = clist + clen; + q->x = x; + q->y = y; + q->pred = pred; + return (clen++); +} + +static int +search(int *c, int k, int y) +{ + int i, j, l, t; + + if (clist[c[k]].y < y) /* quick look for typical case */ + return (k + 1); + i = 0; + j = k + 1; + for (;;) { + l = (i + j) / 2; + if (l <= i) + break; + t = clist[c[l]].y; + if (t > y) + j = l; + else if (t < y) + i = l; + else + return (l); + } + return (l + 1); +} + +static void +unravel(int p) +{ + struct cand *q; + int i; + + for (i = 0; i <= len[0]; i++) + J[i] = i <= pref ? i : + i > len[0] - suff ? i + len[1] - len[0] : 0; + for (q = clist + p; q->y != 0; q = clist + q->pred) + J[q->x + pref] = q->y + pref; +} + +/* + * Check does double duty: + * 1. ferret out any fortuitous correspondences due + * to confounding by hashing (which result in "jackpot") + * 2. collect random access indexes to the two files + */ +static void +check(FILE *f1, FILE *f2, int flags) +{ + int i, j, jackpot, c, d; + long ctold, ctnew; + + rewind(f1); + rewind(f2); + j = 1; + ixold[0] = ixnew[0] = 0; + jackpot = 0; + ctold = ctnew = 0; + for (i = 1; i <= len[0]; i++) { + if (J[i] == 0) { + ixold[i] = ctold += skipline(f1); + continue; + } + while (j < J[i]) { + ixnew[j] = ctnew += skipline(f2); + j++; + } + if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) { + for (;;) { + c = getc(f1); + d = getc(f2); + /* + * GNU diff ignores a missing newline + * in one file for -b or -w. + */ + if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) { + if (c == EOF && d == '\n') { + ctnew++; + break; + } else if (c == '\n' && d == EOF) { + ctold++; + break; + } + } + ctold++; + ctnew++; + if (flags & D_STRIPCR) { + if (c == '\r') { + if ((c = getc(f1)) == '\n') { + ctnew++; + break; + } + } + if (d == '\r') { + if ((d = getc(f2)) == '\n') { + ctold++; + break; + } + } + } + if ((flags & D_FOLDBLANKS) && isspace(c) && + isspace(d)) { + do { + if (c == '\n') + break; + ctold++; + } while (isspace(c = getc(f1))); + do { + if (d == '\n') + break; + ctnew++; + } while (isspace(d = getc(f2))); + } else if ((flags & D_IGNOREBLANKS)) { + while (isspace(c) && c != '\n') { + c = getc(f1); + ctold++; + } + while (isspace(d) && d != '\n') { + d = getc(f2); + ctnew++; + } + } + if (chrtran(c) != chrtran(d)) { + jackpot++; + J[i] = 0; + if (c != '\n' && c != EOF) + ctold += skipline(f1); + if (d != '\n' && c != EOF) + ctnew += skipline(f2); + break; + } + if (c == '\n' || c == EOF) + break; + } + } else { + for (;;) { + ctold++; + ctnew++; + if ((c = getc(f1)) != (d = getc(f2))) { + /* jackpot++; */ + J[i] = 0; + if (c != '\n' && c != EOF) + ctold += skipline(f1); + if (d != '\n' && c != EOF) + ctnew += skipline(f2); + break; + } + if (c == '\n' || c == EOF) + break; + } + } + ixold[i] = ctold; + ixnew[j] = ctnew; + j++; + } + for (; j <= len[1]; j++) { + ixnew[j] = ctnew += skipline(f2); + } + /* + * if (jackpot) + * fprintf(stderr, "jackpot\n"); + */ +} + +/* shellsort CACM #201 */ +static void +sort(struct line *a, int n) +{ + struct line *ai, *aim, w; + int j, m = 0, k; + + if (n == 0) + return; + for (j = 1; j <= n; j *= 2) + m = 2 * j - 1; + for (m /= 2; m != 0; m /= 2) { + k = n - m; + for (j = 1; j <= k; j++) { + for (ai = &a[j]; ai > a; ai -= m) { + aim = &ai[m]; + if (aim < ai) + break; /* wraparound */ + if (aim->value > ai[0].value || + (aim->value == ai[0].value && + aim->serial > ai[0].serial)) + break; + w.value = ai[0].value; + ai[0].value = aim->value; + aim->value = w.value; + w.serial = ai[0].serial; + ai[0].serial = aim->serial; + aim->serial = w.serial; + } + } + } +} + +static void +unsort(struct line *f, int l, int *b) +{ + int *a, i; + + a = xcalloc(l + 1, sizeof(*a)); + for (i = 1; i <= l; i++) + a[f[i].serial] = f[i].value; + for (i = 1; i <= l; i++) + b[i] = a[i]; + free(a); +} + +static int +skipline(FILE *f) +{ + int i, c; + + for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) + continue; + return (i); +} + +static void +output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) +{ + int m, i0, i1, j0, j1; + + rewind(f1); + rewind(f2); + m = len[0]; + J[0] = 0; + J[m + 1] = len[1] + 1; + if (diff_format != D_EDIT) { + for (i0 = 1; i0 <= m; i0 = i1 + 1) { + while (i0 <= m && J[i0] == J[i0 - 1] + 1) + i0++; + j0 = J[i0 - 1] + 1; + i1 = i0 - 1; + while (i1 < m && J[i1 + 1] == 0) + i1++; + j1 = J[i1 + 1] - 1; + J[i1] = j1; + change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); + } + } else { + for (i0 = m; i0 >= 1; i0 = i1 - 1) { + while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) + i0--; + j0 = J[i0 + 1] - 1; + i1 = i0 + 1; + while (i1 > 1 && J[i1 - 1] == 0) + i1--; + j1 = J[i1 - 1] + 1; + J[i1] = j1; + change(file1, f1, file2, f2, i1, i0, j1, j0, &flags); + } + } + if (m == 0) + change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags); + if (diff_format == D_IFDEF || diff_format == D_GFORMAT) { + for (;;) { +#define c i0 + if ((c = getc(f1)) == EOF) + return; + diff_output("%c", c); + } +#undef c + } + if (anychange != 0) { + if (diff_format == D_CONTEXT) + dump_context_vec(f1, f2, flags); + else if (diff_format == D_UNIFIED) + dump_unified_vec(f1, f2, flags); + } +} + +static void +range(int a, int b, const char *separator) +{ + diff_output("%d", a > b ? b : a); + if (a < b) + diff_output("%s%d", separator, b); +} + +static void +uni_range(int a, int b) +{ + if (a < b) + diff_output("%d,%d", a, b - a + 1); + else if (a == b) + diff_output("%d", b); + else + diff_output("%d,0", b); +} + +static char * +preadline(int fd, size_t rlen, off_t off) +{ + char *line; + ssize_t nr; + + line = xmalloc(rlen + 1); + if ((nr = pread(fd, line, rlen, off)) < 0) + err(2, "preadline"); + if (nr > 0 && line[nr-1] == '\n') + nr--; + line[nr] = '\0'; + return (line); +} + +static int +ignoreline(char *line) +{ + int ret; + + ret = regexec(&ignore_re, line, 0, NULL, 0); + free(line); + return (ret == 0); /* if it matched, it should be ignored. */ +} + +/* + * Indicate that there is a difference between lines a and b of the from file + * to get to lines c to d of the to file. If a is greater then b then there + * are no lines in the from file involved and this means that there were + * lines appended (beginning at b). If c is greater than d then there are + * lines missing from the to file. + */ +static void +change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, + int *pflags) +{ + static size_t max_context = 64; + long curpos; + int i, nc, f; + const char *walk; + +restart: + if ((diff_format != D_IFDEF || diff_format == D_GFORMAT) && + a > b && c > d) + return; + if (ignore_pats != NULL) { + char *line; + /* + * All lines in the change, insert, or delete must + * match an ignore pattern for the change to be + * ignored. + */ + if (a <= b) { /* Changes and deletes. */ + for (i = a; i <= b; i++) { + line = preadline(fileno(f1), + ixold[i] - ixold[i - 1], ixold[i - 1]); + if (!ignoreline(line)) + goto proceed; + } + } + if (a > b || c <= d) { /* Changes and inserts. */ + for (i = c; i <= d; i++) { + line = preadline(fileno(f2), + ixnew[i] - ixnew[i - 1], ixnew[i - 1]); + if (!ignoreline(line)) + goto proceed; + } + } + return; + } +proceed: + if (*pflags & D_HEADER && diff_format != D_BRIEF) { + diff_output("%s %s %s\n", diffargs, file1, file2); + *pflags &= ~D_HEADER; + } + if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) { + /* + * Allocate change records as needed. + */ + if (context_vec_ptr == context_vec_end - 1) { + ptrdiff_t offset = context_vec_ptr - context_vec_start; + max_context <<= 1; + context_vec_start = xreallocarray(context_vec_start, + max_context, sizeof(*context_vec_start)); + context_vec_end = context_vec_start + max_context; + context_vec_ptr = context_vec_start + offset; + } + if (anychange == 0) { + /* + * Print the context/unidiff header first time through. + */ + print_header(file1, file2); + anychange = 1; + } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 && + c > context_vec_ptr->d + (2 * diff_context) + 1) { + /* + * If this change is more than 'diff_context' lines from the + * previous change, dump the record and reset it. + */ + if (diff_format == D_CONTEXT) + dump_context_vec(f1, f2, *pflags); + else + dump_unified_vec(f1, f2, *pflags); + } + context_vec_ptr++; + context_vec_ptr->a = a; + context_vec_ptr->b = b; + context_vec_ptr->c = c; + context_vec_ptr->d = d; + return; + } + if (anychange == 0) + anychange = 1; + switch (diff_format) { + case D_BRIEF: + return; + case D_NORMAL: + case D_EDIT: + range(a, b, ","); + diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); + if (diff_format == D_NORMAL) + range(c, d, ","); + diff_output("\n"); + break; + case D_REVERSE: + diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); + range(a, b, " "); + diff_output("\n"); + break; + case D_NREVERSE: + if (a > b) + diff_output("a%d %d\n", b, d - c + 1); + else { + diff_output("d%d %d\n", a, b - a + 1); + if (!(c > d)) + /* add changed lines */ + diff_output("a%d %d\n", b, d - c + 1); + } + break; + } + if (diff_format == D_GFORMAT) { + curpos = ftell(f1); + /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ + nc = ixold[a > b ? b : a - 1] - curpos; + for (i = 0; i < nc; i++) + diff_output("%c", getc(f1)); + for (walk = group_format; *walk != '\0'; walk++) { + if (*walk == '%') { + walk++; + switch (*walk) { + case '<': + fetch(ixold, a, b, f1, '<', 1, *pflags); + break; + case '>': + fetch(ixnew, c, d, f2, '>', 0, *pflags); + break; + default: + diff_output("%%%c", *walk); + break; + } + continue; + } + diff_output("%c", *walk); + } + } + if (diff_format == D_NORMAL || diff_format == D_IFDEF) { + fetch(ixold, a, b, f1, '<', 1, *pflags); + if (a <= b && c <= d && diff_format == D_NORMAL) + diff_output("---\n"); + } + f = 0; + if (diff_format != D_GFORMAT) + f = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); + if (f != 0 && diff_format == D_EDIT) { + /* + * A non-zero return value for D_EDIT indicates that the + * last line printed was a bare dot (".") that has been + * escaped as ".." to prevent ed(1) from misinterpreting + * it. We have to add a substitute command to change this + * back and restart where we left off. + */ + diff_output(".\n"); + diff_output("%ds/.//\n", a + f - 1); + b = a + f - 1; + a = b + 1; + c += f; + goto restart; + } + if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d) + diff_output(".\n"); + if (inifdef) { + diff_output("#endif /* %s */\n", ifdefname); + inifdef = 0; + } +} + +static int +fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) +{ + int i, j, c, lastc, col, nc; + int newcol; + + /* + * When doing #ifdef's, copy down to current line + * if this is the first file, so that stuff makes it to output. + */ + if ((diff_format == D_IFDEF) && oldfile) { + long curpos = ftell(lb); + /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ + nc = f[a > b ? b : a - 1] - curpos; + for (i = 0; i < nc; i++) + diff_output("%c", getc(lb)); + } + if (a > b) + return (0); + if (diff_format == D_IFDEF) { + if (inifdef) { + diff_output("#else /* %s%s */\n", + oldfile == 1 ? "!" : "", ifdefname); + } else { + if (oldfile) + diff_output("#ifndef %s\n", ifdefname); + else + diff_output("#ifdef %s\n", ifdefname); + } + inifdef = 1 + oldfile; + } + for (i = a; i <= b; i++) { + fseek(lb, f[i - 1], SEEK_SET); + nc = f[i] - f[i - 1]; + if ((diff_format != D_IFDEF && diff_format != D_GFORMAT) && + ch != '\0') { + diff_output("%c", ch); + if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT + || diff_format == D_UNIFIED)) + diff_output("\t"); + else if (diff_format != D_UNIFIED) + diff_output(" "); + } + col = 0; + for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { + if ((c = getc(lb)) == EOF) { + if (diff_format == D_EDIT || diff_format == D_REVERSE || + diff_format == D_NREVERSE) + warnx("No newline at end of file"); + else + diff_output("\n\\ No newline at end of " + "file\n"); + return (0); + } + if (c == '\t' && (flags & D_EXPANDTABS)) { + newcol = ((col/tabsize)+1)*tabsize; + do { + diff_output(" "); + } while (++col < newcol); + } else { + if (diff_format == D_EDIT && j == 1 && c == '\n' + && lastc == '.') { + /* + * Don't print a bare "." line + * since that will confuse ed(1). + * Print ".." instead and return, + * giving the caller an offset + * from which to restart. + */ + diff_output(".\n"); + return (i - a + 1); + } + diff_output("%c", c); + col++; + } + } + } + return (0); +} + +/* + * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. + */ +static int +readhash(FILE *f, int flags) +{ + int i, t, space; + int sum; + + sum = 1; + space = 0; + if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) { + if (flags & D_IGNORECASE) + for (i = 0; (t = getc(f)) != '\n'; i++) { + if (flags & D_STRIPCR && t == '\r') { + t = getc(f); + if (t == '\n') + break; + ungetc(t, f); + } + if (t == EOF) { + if (i == 0) + return (0); + break; + } + sum = sum * 127 + chrtran(t); + } + else + for (i = 0; (t = getc(f)) != '\n'; i++) { + if (flags & D_STRIPCR && t == '\r') { + t = getc(f); + if (t == '\n') + break; + ungetc(t, f); + } + if (t == EOF) { + if (i == 0) + return (0); + break; + } + sum = sum * 127 + t; + } + } else { + for (i = 0;;) { + switch (t = getc(f)) { + case '\r': + case '\t': + case '\v': + case '\f': + case ' ': + space++; + continue; + default: + if (space && (flags & D_IGNOREBLANKS) == 0) { + i++; + space = 0; + } + sum = sum * 127 + chrtran(t); + i++; + continue; + case EOF: + if (i == 0) + return (0); + /* FALLTHROUGH */ + case '\n': + break; + } + break; + } + } + /* + * There is a remote possibility that we end up with a zero sum. + * Zero is used as an EOF marker, so return 1 instead. + */ + return (sum == 0 ? 1 : sum); +} + +static int +asciifile(FILE *f) +{ + unsigned char buf[BUFSIZ]; + size_t cnt; + + if (f == NULL) + return (1); + + rewind(f); + cnt = fread(buf, 1, sizeof(buf), f); + return (memchr(buf, '\0', cnt) == NULL); +} + +#define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) + +static char * +match_function(const long *f, int pos, FILE *fp) +{ + unsigned char buf[FUNCTION_CONTEXT_SIZE]; + size_t nc; + int last = lastline; + const char *state = NULL; + + lastline = pos; + while (pos > last) { + fseek(fp, f[pos - 1], SEEK_SET); + nc = f[pos] - f[pos - 1]; + if (nc >= sizeof(buf)) + nc = sizeof(buf) - 1; + nc = fread(buf, 1, nc, fp); + if (nc > 0) { + buf[nc] = '\0'; + buf[strcspn(buf, "\n")] = '\0'; + if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { + if (begins_with(buf, "private:")) { + if (!state) + state = " (private)"; + } else if (begins_with(buf, "protected:")) { + if (!state) + state = " (protected)"; + } else if (begins_with(buf, "public:")) { + if (!state) + state = " (public)"; + } else { + strlcpy(lastbuf, buf, sizeof lastbuf); + if (state) + strlcat(lastbuf, state, + sizeof lastbuf); + lastmatchline = pos; + return lastbuf; + } + } + } + pos--; + } + return lastmatchline > 0 ? lastbuf : NULL; +} + +/* dump accumulated "context" diff changes */ +static void +dump_context_vec(FILE *f1, FILE *f2, int flags) +{ + struct context_vec *cvp = context_vec_start; + int lowa, upb, lowc, upd, do_output; + int a, b, c, d; + char ch, *f; + + if (context_vec_start > context_vec_ptr) + return; + + b = d = 0; /* gcc */ + lowa = MAX(1, cvp->a - diff_context); + upb = MIN(len[0], context_vec_ptr->b + diff_context); + lowc = MAX(1, cvp->c - diff_context); + upd = MIN(len[1], context_vec_ptr->d + diff_context); + + diff_output("***************"); + if ((flags & D_PROTOTYPE)) { + f = match_function(ixold, lowa-1, f1); + if (f != NULL) + diff_output(" %s", f); + } + diff_output("\n*** "); + range(lowa, upb, ","); + diff_output(" ****\n"); + + /* + * Output changes to the "old" file. The first loop suppresses + * output if there were no changes to the "old" file (we'll see + * the "old" lines as context in the "new" list). + */ + do_output = 0; + for (; cvp <= context_vec_ptr; cvp++) + if (cvp->a <= cvp->b) { + cvp = context_vec_start; + do_output++; + break; + } + if (do_output) { + while (cvp <= context_vec_ptr) { + a = cvp->a; + b = cvp->b; + c = cvp->c; + d = cvp->d; + + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + if (ch == 'a') + fetch(ixold, lowa, b, f1, ' ', 0, flags); + else { + fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); + fetch(ixold, a, b, f1, + ch == 'c' ? '!' : '-', 0, flags); + } + lowa = b + 1; + cvp++; + } + fetch(ixold, b + 1, upb, f1, ' ', 0, flags); + } + /* output changes to the "new" file */ + diff_output("--- "); + range(lowc, upd, ","); + diff_output(" ----\n"); + + do_output = 0; + for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) + if (cvp->c <= cvp->d) { + cvp = context_vec_start; + do_output++; + break; + } + if (do_output) { + while (cvp <= context_vec_ptr) { + a = cvp->a; + b = cvp->b; + c = cvp->c; + d = cvp->d; + + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + if (ch == 'd') + fetch(ixnew, lowc, d, f2, ' ', 0, flags); + else { + fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); + fetch(ixnew, c, d, f2, + ch == 'c' ? '!' : '+', 0, flags); + } + lowc = d + 1; + cvp++; + } + fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); + } + context_vec_ptr = context_vec_start - 1; +} + +/* dump accumulated "unified" diff changes */ +static void +dump_unified_vec(FILE *f1, FILE *f2, int flags) +{ + struct context_vec *cvp = context_vec_start; + int lowa, upb, lowc, upd; + int a, b, c, d; + char ch, *f; + + if (context_vec_start > context_vec_ptr) + return; + + b = d = 0; /* gcc */ + lowa = MAX(1, cvp->a - diff_context); + upb = MIN(len[0], context_vec_ptr->b + diff_context); + lowc = MAX(1, cvp->c - diff_context); + upd = MIN(len[1], context_vec_ptr->d + diff_context); + + diff_output("@@ -"); + uni_range(lowa, upb); + diff_output(" +"); + uni_range(lowc, upd); + diff_output(" @@"); + if ((flags & D_PROTOTYPE)) { + f = match_function(ixold, lowa-1, f1); + if (f != NULL) + diff_output(" %s", f); + } + diff_output("\n"); + + /* + * Output changes in "unified" diff format--the old and new lines + * are printed together. + */ + for (; cvp <= context_vec_ptr; cvp++) { + a = cvp->a; + b = cvp->b; + c = cvp->c; + d = cvp->d; + + /* + * c: both new and old changes + * d: only changes in the old file + * a: only changes in the new file + */ + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + switch (ch) { + case 'c': + fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); + fetch(ixold, a, b, f1, '-', 0, flags); + fetch(ixnew, c, d, f2, '+', 0, flags); + break; + case 'd': + fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); + fetch(ixold, a, b, f1, '-', 0, flags); + break; + case 'a': + fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); + fetch(ixnew, c, d, f2, '+', 0, flags); + break; + } + lowa = b + 1; + lowc = d + 1; + } + fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); + + context_vec_ptr = context_vec_start - 1; +} + +static void +print_header(const char *file1, const char *file2) +{ + const char *time_format; + char buf1[256]; + char buf2[256]; + char end1[10]; + char end2[10]; + struct tm tm1, tm2, *tm_ptr1, *tm_ptr2; + int nsec1 = stb1.st_mtim.tv_nsec; + int nsec2 = stb2.st_mtim.tv_nsec; + + time_format = "%Y-%m-%d %H:%M:%S"; + + if (cflag) + time_format = "%c"; + tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1); + tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2); + strftime(buf1, 256, time_format, tm_ptr1); + strftime(buf2, 256, time_format, tm_ptr2); + if (!cflag) { + strftime(end1, 10, "%z", tm_ptr1); + strftime(end2, 10, "%z", tm_ptr2); + sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1); + sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2); + } + if (label[0] != NULL) + diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---", + label[0]); + else + diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---", + file1, buf1); + if (label[1] != NULL) + diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++", + label[1]); + else + diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++", + file2, buf2); +} diff --git a/usr.bin/diff/tests/Makefile b/usr.bin/diff/tests/Makefile new file mode 100644 index 00000000000..62a0ab5c618 --- /dev/null +++ b/usr.bin/diff/tests/Makefile @@ -0,0 +1,39 @@ +# $FreeBSD$ + +PACKAGE= tests + +ATF_TESTS_SH= diff_test + +${PACKAGE}FILES+= \ + input1.in \ + input2.in \ + input_c1.in \ + input_c2.in \ + simple.out \ + simple_e.out \ + simple_n.out \ + simple_u.out \ + simple_i.out \ + simple_w.out \ + simple_b.out \ + simple_p.out \ + unified_p.out \ + unified_c9999.out \ + unified_9999.out \ + header.out \ + header_ns.out \ + ifdef.out \ + group-format.out + +NETBSD_ATF_TESTS_SH+= netbsd_diff_test + +${PACKAGE}FILES+= \ + d_mallocv1.in \ + d_mallocv2.in + +ATF_TESTS_SH_SED_netbsd_diff_test+= -e 's/t_diff/`basename $$0`/g' +ATF_TESTS_SH_SRC_netbsd_diff_test= t_diff.sh + +.include + +.include diff --git a/usr.bin/diff/tests/diff_test.sh b/usr.bin/diff/tests/diff_test.sh new file mode 100755 index 00000000000..9b97f19f729 --- /dev/null +++ b/usr.bin/diff/tests/diff_test.sh @@ -0,0 +1,153 @@ +# $FreeBSD$ + +atf_test_case simple +atf_test_case unified +atf_test_case header +atf_test_case header_ns +atf_test_case ifdef +atf_test_case group_format +atf_test_case side_by_side +atf_test_case brief_format + +simple_body() +{ + atf_check -o file:$(atf_get_srcdir)/simple.out -s eq:1 \ + diff "$(atf_get_srcdir)/input1.in" "$(atf_get_srcdir)/input2.in" + + atf_check -o file:$(atf_get_srcdir)/simple_e.out -s eq:1 \ + diff -e "$(atf_get_srcdir)/input1.in" "$(atf_get_srcdir)/input2.in" + + atf_check -o file:$(atf_get_srcdir)/simple_u.out -s eq:1 \ + diff -u -L input1 -L input2 "$(atf_get_srcdir)/input1.in" "$(atf_get_srcdir)/input2.in" + + atf_check -o file:$(atf_get_srcdir)/simple_n.out -s eq:1 \ + diff -n "$(atf_get_srcdir)/input1.in" "$(atf_get_srcdir)/input2.in" + + atf_check -o inline:"Files $(atf_get_srcdir)/input1.in and $(atf_get_srcdir)/input2.in differ\n" -s eq:1 \ + diff -q "$(atf_get_srcdir)/input1.in" "$(atf_get_srcdir)/input2.in" + + atf_check \ + diff -q "$(atf_get_srcdir)/input1.in" "$(atf_get_srcdir)/input1.in" + + atf_check -o file:$(atf_get_srcdir)/simple_i.out -s eq:1 \ + diff -i "$(atf_get_srcdir)/input_c1.in" "$(atf_get_srcdir)/input_c2.in" + + atf_check -o file:$(atf_get_srcdir)/simple_w.out -s eq:1 \ + diff -w "$(atf_get_srcdir)/input_c1.in" "$(atf_get_srcdir)/input_c2.in" + + atf_check -o file:$(atf_get_srcdir)/simple_b.out -s eq:1 \ + diff -b "$(atf_get_srcdir)/input_c1.in" "$(atf_get_srcdir)/input_c2.in" + + atf_check -o file:$(atf_get_srcdir)/simple_p.out -s eq:1 \ + diff --label input_c1.in --label input_c2.in -p "$(atf_get_srcdir)/input_c1.in" "$(atf_get_srcdir)/input_c2.in" +} + +unified_body() +{ + atf_check -o file:$(atf_get_srcdir)/unified_p.out -s eq:1 \ + diff -up -L input_c1.in -L input_c2.in "$(atf_get_srcdir)/input_c1.in" "$(atf_get_srcdir)/input_c2.in" + atf_check -o file:$(atf_get_srcdir)/unified_c9999.out -s eq:1 \ + diff -u -c9999 -L input_c1.in -L input_c2.in "$(atf_get_srcdir)/input_c1.in" "$(atf_get_srcdir)/input_c2.in" + atf_check -o file:$(atf_get_srcdir)/unified_9999.out -s eq:1 \ + diff -u9999 -L input_c1.in -L input_c2.in "$(atf_get_srcdir)/input_c1.in" "$(atf_get_srcdir)/input_c2.in" +} + +header_body() +{ + export TZ=UTC + : > empty + echo hello > hello + touch -d 2015-04-03T01:02:03 empty + touch -d 2016-12-22T11:22:33 hello + atf_check -o "file:$(atf_get_srcdir)/header.out" -s eq:1 \ + diff -u empty hello +} + +header_ns_body() +{ + export TZ=UTC + : > empty + echo hello > hello + touch -d 2015-04-03T01:02:03.123456789 empty + touch -d 2016-12-22T11:22:33.987654321 hello + atf_check -o "file:$(atf_get_srcdir)/header_ns.out" -s eq:1 \ + diff -u empty hello +} + +ifdef_body() +{ + atf_check -o file:$(atf_get_srcdir)/ifdef.out -s eq:1 \ + diff -D PLOP "$(atf_get_srcdir)/input_c1.in" \ + "$(atf_get_srcdir)/input_c2.in" +} + +group_format_body() +{ + atf_check -o file:$(atf_get_srcdir)/group-format.out -s eq:1 \ + diff --changed-group-format='<<<<<<< (local) +%<======= +%>>>>>>>> (stock) +' "$(atf_get_srcdir)/input_c1.in" "$(atf_get_srcdir)/input_c2.in" +} + +side_by_side_body() +{ + atf_expect_fail "--side-by-side not currently implemented (bug # 219933)" + + atf_check -o save:A printf "A\nB\nC\n" + atf_check -o save:B printf "D\nB\nE\n" + + exp_output="A[[:space:]]+|[[:space:]]+D\nB[[:space:]]+B\nC[[:space:]]+|[[:space:]]+E" + exp_output_suppressed="A[[:space:]]+|[[:space:]]+D\nC[[:space:]]+|[[:space:]]+E" + + atf_check -o match:"$exp_output" -s exit:1 \ + diff --side-by-side A B + atf_check -o match:"$exp_output" -s exit:1 \ + diff -y A B + atf_check -o match:"$exp_output_suppressed" -s exit:1 \ + diff -y --suppress-common-lines A B + atf_check -o match:"$exp_output_suppressed" -s exit:1 \ + diff -W 65 -y --suppress-common-lines A B +} + +brief_format_body() +{ + atf_check mkdir A B + + atf_check -x "echo 1 > A/test-file" + atf_check -x "echo 2 > B/test-file" + + atf_check cp -Rf A C + atf_check cp -Rf A D + + atf_check -x "echo 3 > D/another-test-file" + + atf_check \ + -s exit:1 \ + -o inline:"Files A/test-file and B/test-file differ\n" \ + diff -rq A B + + atf_check diff -rq A C + + atf_check \ + -s exit:1 \ + -o inline:"Only in D: another-test-file\n" \ + diff -rq A D + + atf_check \ + -s exit:1 \ + -o inline:"Files A/another-test-file and D/another-test-file differ\n" \ + diff -Nrq A D +} + +atf_init_test_cases() +{ + atf_add_test_case simple + atf_add_test_case unified + atf_add_test_case header + atf_add_test_case header_ns + atf_add_test_case ifdef + atf_add_test_case group_format + atf_add_test_case side_by_side + atf_add_test_case brief_format +} diff --git a/usr.bin/diff/tests/group-format.out b/usr.bin/diff/tests/group-format.out new file mode 100644 index 00000000000..4e1bf85fbba --- /dev/null +++ b/usr.bin/diff/tests/group-format.out @@ -0,0 +1,27 @@ +/* + * A comment + * +<<<<<<< (local) + * And another bla +======= + * And another bla +>>>>>>> (stock) + * +<<<<<<< (local) + * And yet another +======= + * and yet another +>>>>>>> (stock) + */ + +int +main(void) +{ +<<<<<<< (local) +======= + +>>>>>>> (stock) + printf("something"); + + return (0); +} diff --git a/usr.bin/diff/tests/header.out b/usr.bin/diff/tests/header.out new file mode 100644 index 00000000000..2e1665a30e6 --- /dev/null +++ b/usr.bin/diff/tests/header.out @@ -0,0 +1,4 @@ +--- empty 2015-04-03 01:02:03.000000000 +0000 ++++ hello 2016-12-22 11:22:33.000000000 +0000 +@@ -0,0 +1 @@ ++hello diff --git a/usr.bin/diff/tests/header_ns.out b/usr.bin/diff/tests/header_ns.out new file mode 100644 index 00000000000..b1316dfc12b --- /dev/null +++ b/usr.bin/diff/tests/header_ns.out @@ -0,0 +1,4 @@ +--- empty 2015-04-03 01:02:03.123456789 +0000 ++++ hello 2016-12-22 11:22:33.987654321 +0000 +@@ -0,0 +1 @@ ++hello diff --git a/usr.bin/diff/tests/ifdef.out b/usr.bin/diff/tests/ifdef.out new file mode 100644 index 00000000000..cc72cac0863 --- /dev/null +++ b/usr.bin/diff/tests/ifdef.out @@ -0,0 +1,26 @@ +/* + * A comment + * +#ifndef PLOP + * And another bla +#else /* PLOP */ + * And another bla +#endif /* PLOP */ + * +#ifndef PLOP + * And yet another +#else /* PLOP */ + * and yet another +#endif /* PLOP */ + */ + +int +main(void) +{ +#ifdef PLOP + +#endif /* PLOP */ + printf("something"); + + return (0); +} diff --git a/usr.bin/diff/tests/input1.in b/usr.bin/diff/tests/input1.in new file mode 100644 index 00000000000..3892e8400f8 --- /dev/null +++ b/usr.bin/diff/tests/input1.in @@ -0,0 +1,2 @@ +Simple input file designed +to be able to test diff diff --git a/usr.bin/diff/tests/input2.in b/usr.bin/diff/tests/input2.in new file mode 100644 index 00000000000..c38b487353a --- /dev/null +++ b/usr.bin/diff/tests/input2.in @@ -0,0 +1,3 @@ +Simple input file designed +and written +to be able to test diff utility diff --git a/usr.bin/diff/tests/input_c1.in b/usr.bin/diff/tests/input_c1.in new file mode 100644 index 00000000000..d39dfbdc511 --- /dev/null +++ b/usr.bin/diff/tests/input_c1.in @@ -0,0 +1,15 @@ +/* + * A comment + * + * And another bla + * + * And yet another + */ + +int +main(void) +{ + printf("something"); + + return (0); +} diff --git a/usr.bin/diff/tests/input_c2.in b/usr.bin/diff/tests/input_c2.in new file mode 100644 index 00000000000..933ec67dc17 --- /dev/null +++ b/usr.bin/diff/tests/input_c2.in @@ -0,0 +1,16 @@ +/* + * A comment + * + * And another bla + * + * and yet another + */ + +int +main(void) +{ + + printf("something"); + + return (0); +} diff --git a/usr.bin/diff/tests/simple.out b/usr.bin/diff/tests/simple.out new file mode 100644 index 00000000000..fcbcaa041e8 --- /dev/null +++ b/usr.bin/diff/tests/simple.out @@ -0,0 +1,5 @@ +2c2,3 +< to be able to test diff +--- +> and written +> to be able to test diff utility diff --git a/usr.bin/diff/tests/simple_b.out b/usr.bin/diff/tests/simple_b.out new file mode 100644 index 00000000000..704be9d621a --- /dev/null +++ b/usr.bin/diff/tests/simple_b.out @@ -0,0 +1,6 @@ +6c6 +< * And yet another +--- +> * and yet another +11a12 +> diff --git a/usr.bin/diff/tests/simple_e.out b/usr.bin/diff/tests/simple_e.out new file mode 100644 index 00000000000..0c7e2b5c752 --- /dev/null +++ b/usr.bin/diff/tests/simple_e.out @@ -0,0 +1,4 @@ +2c +and written +to be able to test diff utility +. diff --git a/usr.bin/diff/tests/simple_i.out b/usr.bin/diff/tests/simple_i.out new file mode 100644 index 00000000000..9edc1f98d72 --- /dev/null +++ b/usr.bin/diff/tests/simple_i.out @@ -0,0 +1,6 @@ +4c4 +< * And another bla +--- +> * And another bla +11a12 +> diff --git a/usr.bin/diff/tests/simple_n.out b/usr.bin/diff/tests/simple_n.out new file mode 100644 index 00000000000..33ca7090cf9 --- /dev/null +++ b/usr.bin/diff/tests/simple_n.out @@ -0,0 +1,4 @@ +d2 1 +a2 2 +and written +to be able to test diff utility diff --git a/usr.bin/diff/tests/simple_p.out b/usr.bin/diff/tests/simple_p.out new file mode 100644 index 00000000000..f5aebb0d119 --- /dev/null +++ b/usr.bin/diff/tests/simple_p.out @@ -0,0 +1,34 @@ +*** input_c1.in +--- input_c2.in +*************** +*** 1,14 **** + /* + * A comment + * +! * And another bla + * +! * And yet another + */ + + int + main(void) + { + printf("something"); + + return (0); +--- 1,15 ---- + /* + * A comment + * +! * And another bla + * +! * and yet another + */ + + int + main(void) + { ++ + printf("something"); + + return (0); diff --git a/usr.bin/diff/tests/simple_u.out b/usr.bin/diff/tests/simple_u.out new file mode 100644 index 00000000000..f341987ebec --- /dev/null +++ b/usr.bin/diff/tests/simple_u.out @@ -0,0 +1,7 @@ +--- input1 ++++ input2 +@@ -1,2 +1,3 @@ + Simple input file designed +-to be able to test diff ++and written ++to be able to test diff utility diff --git a/usr.bin/diff/tests/simple_w.out b/usr.bin/diff/tests/simple_w.out new file mode 100644 index 00000000000..704be9d621a --- /dev/null +++ b/usr.bin/diff/tests/simple_w.out @@ -0,0 +1,6 @@ +6c6 +< * And yet another +--- +> * and yet another +11a12 +> diff --git a/usr.bin/diff/tests/unified_9999.out b/usr.bin/diff/tests/unified_9999.out new file mode 100644 index 00000000000..0f9303fbdc7 --- /dev/null +++ b/usr.bin/diff/tests/unified_9999.out @@ -0,0 +1,21 @@ +--- input_c1.in ++++ input_c2.in +@@ -1,15 +1,16 @@ + /* + * A comment + * +- * And another bla ++ * And another bla + * +- * And yet another ++ * and yet another + */ + + int + main(void) + { ++ + printf("something"); + + return (0); + } diff --git a/usr.bin/diff/tests/unified_c9999.out b/usr.bin/diff/tests/unified_c9999.out new file mode 100644 index 00000000000..87f439463e5 --- /dev/null +++ b/usr.bin/diff/tests/unified_c9999.out @@ -0,0 +1,36 @@ +*** input_c1.in +--- input_c2.in +*************** +*** 1,15 **** + /* + * A comment + * +! * And another bla + * +! * And yet another + */ + + int + main(void) + { + printf("something"); + + return (0); + } +--- 1,16 ---- + /* + * A comment + * +! * And another bla + * +! * and yet another + */ + + int + main(void) + { ++ + printf("something"); + + return (0); + } diff --git a/usr.bin/diff/tests/unified_p.out b/usr.bin/diff/tests/unified_p.out new file mode 100644 index 00000000000..938b07890fb --- /dev/null +++ b/usr.bin/diff/tests/unified_p.out @@ -0,0 +1,20 @@ +--- input_c1.in ++++ input_c2.in +@@ -1,14 +1,15 @@ + /* + * A comment + * +- * And another bla ++ * And another bla + * +- * And yet another ++ * and yet another + */ + + int + main(void) + { ++ + printf("something"); + + return (0); diff --git a/usr.bin/diff/xmalloc.c b/usr.bin/diff/xmalloc.c new file mode 100644 index 00000000000..7c83e452262 --- /dev/null +++ b/usr.bin/diff/xmalloc.c @@ -0,0 +1,88 @@ +/* $OpenBSD: xmalloc.c,v 1.9 2015/11/17 18:25:02 tobias Exp $ */ +/* + * Author: Tatu Ylonen + * Copyright (c) 1995 Tatu Ylonen , Espoo, Finland + * All rights reserved + * Versions of malloc and friends that check their results, and never return + * failure (they call fatal if they encounter an error). + * + * As far as I am concerned, the code I have written for this software + * can be used freely for any purpose. Any derived versions of this + * software must be clearly marked as such, and if the derived work is + * incompatible with the protocol description in the RFC file, it must be + * called by a name other than "ssh" or "Secure Shell". + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include "xmalloc.h" + +void * +xmalloc(size_t size) +{ + void *ptr; + + if (size == 0) + errx(2, "xmalloc: zero size"); + ptr = malloc(size); + if (ptr == NULL) + err(2, "xmalloc: allocating %zu bytes", size); + return ptr; +} + +void * +xcalloc(size_t nmemb, size_t size) +{ + void *ptr; + + ptr = calloc(nmemb, size); + if (ptr == NULL) + err(2, "xcalloc: allocating %zu * %zu bytes", nmemb, size); + return ptr; +} + +void * +xreallocarray(void *ptr, size_t nmemb, size_t size) +{ + void *new_ptr; + + new_ptr = reallocarray(ptr, nmemb, size); + if (new_ptr == NULL) + err(2, "xreallocarray: allocating %zu * %zu bytes", + nmemb, size); + return new_ptr; +} + +char * +xstrdup(const char *str) +{ + char *cp; + + if ((cp = strdup(str)) == NULL) + err(2, "xstrdup"); + return cp; +} + +int +xasprintf(char **ret, const char *fmt, ...) +{ + va_list ap; + int i; + + va_start(ap, fmt); + i = vasprintf(ret, fmt, ap); + va_end(ap); + + if (i < 0 || *ret == NULL) + err(2, "xasprintf"); + + return i; +} diff --git a/usr.bin/diff/xmalloc.h b/usr.bin/diff/xmalloc.h new file mode 100644 index 00000000000..4f4f9ffbaf1 --- /dev/null +++ b/usr.bin/diff/xmalloc.h @@ -0,0 +1,32 @@ +/* $OpenBSD: xmalloc.h,v 1.4 2015/11/12 16:30:30 mmcc Exp $ */ + +/* + * Author: Tatu Ylonen + * Copyright (c) 1995 Tatu Ylonen , Espoo, Finland + * All rights reserved + * Created: Mon Mar 20 22:09:17 1995 ylo + * + * Versions of malloc and friends that check their results, and never return + * failure (they call fatal if they encounter an error). + * + * As far as I am concerned, the code I have written for this software + * can be used freely for any purpose. Any derived versions of this + * software must be clearly marked as such, and if the derived work is + * incompatible with the protocol description in the RFC file, it must be + * called by a name other than "ssh" or "Secure Shell". + * + * $FreeBSD$ + */ + +#ifndef XMALLOC_H +#define XMALLOC_H + +void *xmalloc(size_t); +void *xcalloc(size_t, size_t); +void *xreallocarray(void *, size_t, size_t); +char *xstrdup(const char *); +int xasprintf(char **, const char *, ...) + __attribute__((__format__ (printf, 2, 3))) + __attribute__((__nonnull__ (2))); + +#endif /* XMALLOC_H */ -- 2.45.0