2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 static const char copyright[] =
32 "@(#) Copyright (c) 1980, 1993\n\
33 The Regents of the University of California. All rights reserved.\n";
38 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93";
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD$");
46 * checknr: check an nroff/troff input file for matching macro calls.
47 * we also attempt to match size and font changes, but only the embedded
48 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
49 * later but for now think of these restrictions as contributions to
50 * structured typesetting.
59 #define MAXSTK 100 /* Stack size */
60 #define MAXBR 100 /* Max number of bracket pairs known */
61 #define MAXCMDS 600 /* Max number of commands known */
63 static void addcmd(char *);
64 static void addmac(const char *);
65 static int binsrch(const char *);
66 static void checkknown(const char *);
67 static void chkcmd(const char *, const char *);
68 static void complain(int);
69 static int eq(const char *, const char *);
70 static void nomatch(const char *);
72 static void process(FILE *);
73 static void prop(int);
74 static void usage(void);
77 * The stack on which we remember what we've seen so far.
79 static struct stkstr {
80 int opno; /* number of opening bracket */
81 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
82 int parm; /* parm to size, font, etc */
83 int lno; /* line number */
88 * The kinds of opening and closing brackets.
94 /* A few bare bones troff commands */
96 {"sz", "sz"}, /* also \s */
98 {"ft", "ft"}, /* also \f */
112 /* the -ms package */
128 /* The -me package */
137 /* The -mdoc package */
150 /* Things needed by preprocessors */
159 * All commands known to nroff, plus macro packages.
160 * Used so we can complain about unrecognized commands.
162 static const char *knowncmds[MAXCMDS] = {
163 "$c", "$f", "$h", "$p", "$s", "%A", "%B", "%C", "%D", "%I", "%J", "%N", "%O",
164 "%P", "%Q", "%R", "%T", "%V", "(b", "(c", "(d", "(f", "(l", "(q", "(t", "(x",
165 "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", "+c", "1C",
166 "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", "@c", "@e", "@f",
167 "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", "AE", "AF", "AI", "AL",
168 "AM", "AS", "AT", "AU", "AX", "Ac", "Ad", "An", "Ao", "Ap", "Aq", "Ar", "At",
169 "B", "B" , "B1", "B2", "BD", "BE", "BG", "BL", "BS", "BT", "BX", "Bc", "Bd",
170 "Bf", "Bk", "Bl", "Bo", "Bq", "Bsx", "Bx", "C1", "C2", "CD", "CM", "CT", "Cd",
171 "Cm", "D", "D" , "D1", "DA", "DE", "DF", "DL", "DS", "DT", "Db", "Dc", "Dd",
172 "Dl", "Do", "Dq", "Dt", "Dv", "EC", "EF", "EG", "EH", "EM", "EN", "EQ", "EX",
173 "Ec", "Ed", "Ef", "Ek", "El", "Em", "Eo", "Er", "Ev", "FA", "FD", "FE", "FG",
174 "FJ", "FK", "FL", "FN", "FO", "FQ", "FS", "FV", "FX", "Fa", "Fc", "Fd", "Fl",
175 "Fn", "Fo", "Ft", "Fx", "H", "H" , "HC", "HD", "HM", "HO", "HU", "I", "I" ,
176 "ID", "IE", "IH", "IM", "IP", "IX", "IZ", "Ic", "In", "It", "KD", "KE", "KF",
177 "KQ", "KS", "LB", "LC", "LD", "LE", "LG", "LI", "LP", "Lb", "Li", "MC", "ME",
178 "MF", "MH", "ML", "MR", "MT", "ND", "NE", "NH", "NL", "NP", "NS", "Nd", "Nm",
179 "No", "Ns", "Nx", "OF", "OH", "OK", "OP", "Oc", "Oo", "Op", "Os", "Ot", "Ox",
180 "P", "P" , "P1", "PF", "PH", "PP", "PT", "PX", "PY", "Pa", "Pc", "Pf", "Po",
181 "Pp", "Pq", "QE", "QP", "QS", "Qc", "Ql", "Qo", "Qq", "R", "R" , "RA", "RC",
182 "RE", "RL", "RP", "RQ", "RS", "RT", "Re", "Rs", "S", "S" , "S0", "S2", "S3",
183 "SA", "SG", "SH", "SK", "SM", "SP", "SY", "Sc", "Sh", "Sm", "So", "Sq", "Ss",
184 "St", "Sx", "Sy", "T&", "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP",
185 "TQ", "TR", "TS", "TX", "Tn", "UL", "US", "UX", "Ud", "Ux", "VL", "Va", "Vt",
186 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "Xc", "Xo", "Xr", "[",
187 "[" , "[-", "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "\\{", "\\}",
188 "]", "]" , "]-", "]<", "]>", "][", "ab", "ac", "ad", "af", "am", "ar", "as",
189 "b", "b" , "ba", "bc", "bd", "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc",
190 "ce", "cf", "ch", "chop", "cs", "ct", "cu", "da", "de", "di", "dl", "dn", "do",
191 "ds", "dt", "dw", "dy", "ec", "ef", "eh", "el", "em", "eo", "ep", "ev", "evc",
192 "ex", "fallback", "fc", "feature", "fi", "fl", "flig", "fo", "fp", "ft", "ftr",
193 "fz", "fzoom", "hc", "he", "hidechar", "hl", "hp", "ht", "hw", "hx", "hy",
194 "hylang", "i", "i" , "ie", "if", "ig", "in", "ip", "it", "ix", "kern",
195 "kernafter", "kernbefore", "kernpair", "lc", "lc_ctype", "lg", "lhang", "li",
196 "ll", "ln", "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo",
197 "n1", "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
198 "of", "oh", "os", "pa", "papersize", "pc", "pi", "pl", "pm", "pn", "po", "pp",
199 "ps", "q", "q" , "r", "r" , "rb", "rd", "re", "recursionlimit", "return",
200 "rhang", "rm", "rn", "ro", "rr", "rs", "rt", "sb", "sc", "sh", "shift", "sk",
201 "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", "ti", "tl", "tm", "tp",
202 "tr", "track", "u", "uf", "uh", "ul", "vs", "wh", "xflag", "xp", "yr",
206 static int lineno; /* current line number in input file */
207 static const char *cfilename; /* name of current file */
208 static int nfiles; /* number of files to process */
209 static int fflag; /* -f: ignore \f */
210 static int sflag; /* -s: ignore \s */
211 static int ncmds; /* size of knowncmds */
212 static int slot; /* slot in knowncmds found by binsrch */
215 main(int argc, char **argv)
222 /* Figure out how many known commands there are */
223 while (knowncmds[ncmds])
225 while (argc > 1 && argv[1][0] == '-') {
228 /* -a: add pairs of macros */
230 i = strlen(argv[1]) - 2;
233 /* look for empty macro slots */
234 for (i=0; br[i].opbr; i++)
236 for (cp=argv[1]+3; cp[-1]; cp += 6) {
240 errx(1, "too many pairs");
241 if ((tmp = malloc(3)) == NULL)
245 if ((tmp = malloc(3)) == NULL)
247 strlcpy(tmp, cp+3, 3);
249 addmac(br[i].opbr); /* knows pairs are also known cmds */
255 /* -c: add known commands */
257 i = strlen(argv[1]) - 2;
260 for (cp=argv[1]+3; cp[-1]; cp += 3) {
261 if (cp[2] && cp[2] != '.')
269 /* -f: ignore font changes */
274 /* -s: ignore size changes */
287 for (i = 1; i < argc; i++) {
289 f = fopen(cfilename, "r");
291 warn("%s", cfilename);
308 "usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n");
316 char mac[64]; /* The current macro or nroff command */
324 for (lineno = 1; getline(&line, &linecap, f) > 0; lineno++) {
325 if (line[0] == '.') {
327 * find and isolate the macro/command name.
329 strncpy(mac, line+1, 4);
330 if (isspace(mac[0])) {
332 printf("Empty command\n");
333 } else if (isspace(mac[1])) {
335 } else if (isspace(mac[2])) {
337 } else if (mac[0] != '\\' || mac[1] != '\"') {
339 printf("Command too long\n");
343 * Is it a known command?
357 * At this point we process the line looking
360 for (i = 0; line[i]; i++)
361 if (line[i] == '\\' && (i == 0 || line[i-1] != '\\')) {
362 if (!sflag && line[++i] == 's') {
369 while (isdigit(line[++i]))
370 n = 10 * n + line[i] - '0';
374 stk[stktop].opno == SZ) {
378 printf("unmatched \\s0\n");
381 stk[++stktop].opno = SZ;
383 stk[stktop].parm = n;
384 stk[stktop].lno = lineno;
386 } else if (!fflag && line[i] == 'f') {
390 stk[stktop].opno == FT) {
394 printf("unmatched \\fP\n");
397 stk[++stktop].opno = FT;
399 stk[stktop].parm = n;
400 stk[stktop].lno = lineno;
407 * We've hit the end and look at all this stuff that hasn't been
408 * matched yet! Complain, complain.
410 for (i = stktop; i >= 0; i--) {
419 printf("Unmatched ");
428 printf(".%s", br[stk[i].opno].opbr);
429 else switch(stk[i].opno) {
431 printf("\\s%c%d", stk[i].pl, stk[i].parm);
434 printf("\\f%c", stk[i].parm);
437 printf("Bug: stk[%d].opno = %d = .%s, .%s",
438 i, stk[i].opno, br[stk[i].opno].opbr,
439 br[stk[i].opno].clbr);
444 chkcmd(const char *line __unused, const char *mac)
449 * Check to see if it matches top of stack.
451 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
452 stktop--; /* OK. Pop & forget */
454 /* No. Maybe it's an opener */
455 for (i=0; br[i].opbr; i++) {
456 if (eq(mac, br[i].opbr)) {
457 /* Found. Push it. */
459 stk[stktop].opno = i;
461 stk[stktop].parm = 0;
462 stk[stktop].lno = lineno;
466 * Maybe it's an unmatched closer.
467 * NOTE: this depends on the fact
468 * that none of the closers can be
471 if (eq(mac, br[i].clbr)) {
480 nomatch(const char *mac)
485 * Look for a match further down on stack
486 * If we find one, it suggests that the stuff in
487 * between is supposed to match itself.
489 for (j=stktop; j>=0; j--)
490 if (eq(mac,br[stk[j].opno].clbr)) {
491 /* Found. Make a good diagnostic. */
494 * Check for special case \fx..\fR and don't
497 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
498 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
503 * We have two unmatched frobs. Chances are
504 * they were intended to match, so we mention
509 printf(" does not match %d: ", stk[j+2].lno);
512 } else for (i=j+1; i <= stktop; i++) {
518 /* Didn't find one. Throw this away. */
520 printf("Unmatched .%s\n", mac);
523 /* eq: are two strings equal? */
525 eq(const char *s1, const char *s2)
527 return (strcmp(s1, s2) == 0);
530 /* print the first part of an error message, given the line number */
535 printf("%s: ", cfilename);
536 printf("%d: ", linen);
540 checkknown(const char *mac)
545 if (binsrch(mac) >= 0)
547 if (mac[0] == '\\' && mac[1] == '"') /* comments */
551 printf("Unknown command: .%s\n", mac);
555 * We have a .de xx line in "line". Add xx to the list of known commands.
562 /* grab the macro being defined */
564 while (isspace(*mac))
568 printf("illegal define: %s\n", line);
572 if (isspace(mac[1]) || mac[1] == '\\')
574 if (ncmds >= MAXCMDS) {
575 printf("Only %d known commands allowed\n", MAXCMDS);
582 * Add mac to the list. We should really have some kind of tree
583 * structure here but this is a quick-and-dirty job and I just don't
584 * have time to mess with it. (I wonder if this will come back to haunt
585 * me someday?) Anyway, I claim that .de is fairly rare in user
586 * nroff programs, and the register loop below is pretty fast.
589 addmac(const char *mac)
591 const char **src, **dest, **loc;
593 if (binsrch(mac) >= 0){ /* it's OK to redefine something */
595 printf("binsrch(%s) -> already in table\n", mac);
599 /* binsrch sets slot as a side effect */
601 printf("binsrch(%s) -> %d\n", mac, slot);
603 loc = &knowncmds[slot];
604 src = &knowncmds[ncmds-1];
608 if ((*loc = strdup(mac)) == NULL)
612 printf("after: %s %s %s %s %s, %d cmds\n",
613 knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot],
614 knowncmds[slot+1], knowncmds[slot+2], ncmds);
619 * Do a binary search in knowncmds for mac.
620 * If found, return the index. If not, return -1.
623 binsrch(const char *mac)
625 const char *p; /* pointer to current cmd in list */
626 int d; /* difference if any */
627 int mid; /* mid point in binary search */
628 int top, bot; /* boundaries of bin search, inclusive */
645 slot = bot; /* place it would have gone */