2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 static const char copyright[] =
32 "@(#) Copyright (c) 1980, 1993\n\
33 The Regents of the University of California. All rights reserved.\n";
38 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93";
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD$");
46 * checknr: check an nroff/troff input file for matching macro calls.
47 * we also attempt to match size and font changes, but only the embedded
48 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
49 * later but for now think of these restrictions as contributions to
50 * structured typesetting.
58 #define MAXSTK 100 /* Stack size */
59 #define MAXBR 100 /* Max number of bracket pairs known */
60 #define MAXCMDS 600 /* Max number of commands known */
62 static void addcmd(char *);
63 static void addmac(const char *);
64 static int binsrch(const char *);
65 static void checkknown(const char *);
66 static void chkcmd(const char *, const char *);
67 static void complain(int);
68 static int eq(const char *, const char *);
69 static void nomatch(const char *);
71 static void process(FILE *);
72 static void prop(int);
73 static void usage(void);
76 * The stack on which we remember what we've seen so far.
78 static struct stkstr {
79 int opno; /* number of opening bracket */
80 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
81 int parm; /* parm to size, font, etc */
82 int lno; /* line number */
87 * The kinds of opening and closing brackets.
93 /* A few bare bones troff commands */
95 {"sz", "sz"}, /* also \s */
97 {"ft", "ft"}, /* also \f */
111 /* the -ms package */
127 /* The -me package */
136 /* The -mdoc package */
149 /* Things needed by preprocessors */
158 * All commands known to nroff, plus macro packages.
159 * Used so we can complain about unrecognized commands.
161 static const char *knowncmds[MAXCMDS] = {
162 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
163 "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q", "(t", "(x",
164 "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", "+c", "1C",
165 "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", "@c", "@e", "@f",
166 "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL",
167 "AM", "AS", "AT", "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
168 "B", "B" , "B1", "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd",
169 "Bf", "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT", "Cd",
170 "Cm", "D", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc", "Dd",
171 "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM", "EN", "EQ", "EX",
172 "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er", "Ev", "FA", "FD", "FE", "FG",
173 "FJ", "FK", "FL", "FN", "FO", "FQ", "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl",
174 "Fn", "Fo", "Ft", "Fx", "H", "H" , "HC", "HD", "HM", "HO", "HU", "I", "I" ,
175 "ID", "IE", "IH", "IM", "IP", "IX", "IZ", "Ic", "In", "It", "KD", "KE", "KF",
176 "KQ", "KS", "LB", "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME",
177 "MF", "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd", "Nm",
178 "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op", "Os", "Ot", "Ox",
179 "P", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY", "Pa", "Pc", "Pf", "Po",
180 "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql", "Qo", "Qq", "R", "R" , "RA", "RC",
181 "RE", "RL", "RP", "RQ", "RS", "RT", "Re", "Rs", "S", "S" , "S0", "S2", "S3",
182 "SA", "SG", "SH", "SK", "SM", "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss",
183 "St", "Sx", "Sy", "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP",
184 "TQ", "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt",
185 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo", "Xr", "[",
186 "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "\\{", "\\}",
187 "]", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am", "ar", "as",
188 "b", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc",
189 "ce", "cf", "ch", "chop", "cs", "ct", "cu", "da", "de", "di", "dl", "dn", "do",
190 "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el", "em", "eo", "ep", "ev", "evc",
191 "ex", "fallback", "fc", "feature", "fi", "fl", "flig", "fo", "fp", "ft", "ftr",
192 "fz", "fzoom", "hc", "he", "hidechar", "hl", "hp", "ht", "hw", "hx", "hy",
193 "hylang", "i", "i" , "ie", "if", "ig", "in", "ip", "it", "ix", "kern",
194 "kernafter", "kernbefore", "kernpair", "lc", "lc_ctype", "lg", "lhang", "li",
195 "ll", "ln", "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo",
196 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
197 "of", "oh", "os", "pa", "papersize", "pc", "pi", "pl", "pm", "pn", "po", "pp",
198 "ps", "q", "q" , "r", "r" , "rb", "rd", "re", "recursionlimit", "return",
199 "rhang", "rm", "rn", "ro", "rr", "rs", "rt", "sb", "sc", "sh", "shift", "sk",
200 "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp",
201 "tr", "track", "u", "uf", "uh", "ul", "vs", "wh", "xflag", "xp", "yr",
205 static int lineno; /* current line number in input file */
206 static const char *cfilename; /* name of current file */
207 static int nfiles; /* number of files to process */
208 static int fflag; /* -f: ignore \f */
209 static int sflag; /* -s: ignore \s */
210 static int ncmds; /* size of knowncmds */
211 static int slot; /* slot in knowncmds found by binsrch */
214 main(int argc, char **argv)
221 /* Figure out how many known commands there are */
222 while (knowncmds[ncmds])
224 while (argc > 1 && argv[1][0] == '-') {
227 /* -a: add pairs of macros */
229 i = strlen(argv[1]) - 2;
232 /* look for empty macro slots */
233 for (i=0; br[i].opbr; i++)
235 for (cp=argv[1]+3; cp[-1]; cp += 6) {
239 errx(1, "too many pairs");
240 if ((tmp = malloc(3)) == NULL)
244 if ((tmp = malloc(3)) == NULL)
246 strlcpy(tmp, cp+3, 3);
248 addmac(br[i].opbr); /* knows pairs are also known cmds */
254 /* -c: add known commands */
256 i = strlen(argv[1]) - 2;
259 for (cp=argv[1]+3; cp[-1]; cp += 3) {
260 if (cp[2] && cp[2] != '.')
268 /* -f: ignore font changes */
273 /* -s: ignore size changes */
286 for (i = 1; i < argc; i++) {
288 f = fopen(cfilename, "r");
290 warn("%s", cfilename);
307 "usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n");
315 char mac[64]; /* The current macro or nroff command */
323 for (lineno = 1; getline(&line, &linecap, f) > 0; lineno++) {
324 if (line[0] == '.') {
326 * find and isolate the macro/command name.
328 strncpy(mac, line+1, 4);
329 if (isspace(mac[0])) {
331 printf("Empty command\n");
332 } else if (isspace(mac[1])) {
334 } else if (isspace(mac[2])) {
336 } else if (mac[0] != '\\' || mac[1] != '\"') {
338 printf("Command too long\n");
342 * Is it a known command?
356 * At this point we process the line looking
359 for (i = 0; line[i]; i++)
360 if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
361 if (!sflag && line[++i] == 's') {
368 while (isdigit(line[++i]))
369 n = 10 * n + line[i] - '0';
373 stk[stktop].opno == SZ) {
377 printf("unmatched \\s0\n");
380 stk[++stktop].opno = SZ;
382 stk[stktop].parm = n;
383 stk[stktop].lno = lineno;
385 } else if (!fflag && line[i] == 'f') {
389 stk[stktop].opno == FT) {
393 printf("unmatched \\fP\n");
396 stk[++stktop].opno = FT;
398 stk[stktop].parm = n;
399 stk[stktop].lno = lineno;
406 * We've hit the end and look at all this stuff that hasn't been
407 * matched yet! Complain, complain.
409 for (i = stktop; i >= 0; i--) {
418 printf("Unmatched ");
427 printf(".%s", br[stk[i].opno].opbr);
428 else switch(stk[i].opno) {
430 printf("\\s%c%d", stk[i].pl, stk[i].parm);
433 printf("\\f%c", stk[i].parm);
436 printf("Bug: stk[%d].opno = %d = .%s, .%s",
437 i, stk[i].opno, br[stk[i].opno].opbr,
438 br[stk[i].opno].clbr);
443 chkcmd(const char *line __unused, const char *mac)
448 * Check to see if it matches top of stack.
450 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
451 stktop--; /* OK. Pop & forget */
453 /* No. Maybe it's an opener */
454 for (i=0; br[i].opbr; i++) {
455 if (eq(mac, br[i].opbr)) {
456 /* Found. Push it. */
458 stk[stktop].opno = i;
460 stk[stktop].parm = 0;
461 stk[stktop].lno = lineno;
465 * Maybe it's an unmatched closer.
466 * NOTE: this depends on the fact
467 * that none of the closers can be
470 if (eq(mac, br[i].clbr)) {
479 nomatch(const char *mac)
484 * Look for a match further down on stack
485 * If we find one, it suggests that the stuff in
486 * between is supposed to match itself.
488 for (j=stktop; j>=0; j--)
489 if (eq(mac,br[stk[j].opno].clbr)) {
490 /* Found. Make a good diagnostic. */
493 * Check for special case \fx..\fR and don't
496 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
497 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
502 * We have two unmatched frobs. Chances are
503 * they were intended to match, so we mention
508 printf(" does not match %d: ", stk[j+2].lno);
511 } else for (i=j+1; i <= stktop; i++) {
517 /* Didn't find one. Throw this away. */
519 printf("Unmatched .%s\n", mac);
522 /* eq: are two strings equal? */
524 eq(const char *s1, const char *s2)
526 return (strcmp(s1, s2) == 0);
529 /* print the first part of an error message, given the line number */
534 printf("%s: ", cfilename);
535 printf("%d: ", linen);
539 checkknown(const char *mac)
544 if (binsrch(mac) >= 0)
546 if (mac[0] == '\\' && mac[1] == '"') /* comments */
550 printf("Unknown command: .%s\n", mac);
554 * We have a .de xx line in "line". Add xx to the list of known commands.
561 /* grab the macro being defined */
563 while (isspace(*mac))
567 printf("illegal define: %s\n", line);
571 if (isspace(mac[1]) || mac[1] == '\\')
573 if (ncmds >= MAXCMDS) {
574 printf("Only %d known commands allowed\n", MAXCMDS);
581 * Add mac to the list. We should really have some kind of tree
582 * structure here but this is a quick-and-dirty job and I just don't
583 * have time to mess with it. (I wonder if this will come back to haunt
584 * me someday?) Anyway, I claim that .de is fairly rare in user
585 * nroff programs, and the register loop below is pretty fast.
588 addmac(const char *mac)
590 const char **src, **dest, **loc;
592 if (binsrch(mac) >= 0){ /* it's OK to redefine something */
594 printf("binsrch(%s) -> already in table\n", mac);
598 /* binsrch sets slot as a side effect */
600 printf("binsrch(%s) -> %d\n", mac, slot);
602 loc = &knowncmds[slot];
603 src = &knowncmds[ncmds-1];
607 if ((*loc = strdup(mac)) == NULL)
611 printf("after: %s %s %s %s %s, %d cmds\n",
612 knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot],
613 knowncmds[slot+1], knowncmds[slot+2], ncmds);
618 * Do a binary search in knowncmds for mac.
619 * If found, return the index. If not, return -1.
622 binsrch(const char *mac)
624 const char *p; /* pointer to current cmd in list */
625 int d; /* difference if any */
626 int mid; /* mid point in binary search */
627 int top, bot; /* boundaries of bin search, inclusive */
644 slot = bot; /* place it would have gone */