2 * Copyright (c) 1980, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 static const char copyright[] =
32 "@(#) Copyright (c) 1980, 1993\n\
33 The Regents of the University of California. All rights reserved.\n";
38 static char sccsid[] = "@(#)checknr.c 8.1 (Berkeley) 6/6/93";
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD$");
46 * checknr: check an nroff/troff input file for matching macro calls.
47 * we also attempt to match size and font changes, but only the embedded
48 * kind. These must end in \s0 and \fP resp. Maybe more sophistication
49 * later but for now think of these restrictions as contributions to
50 * structured typesetting.
58 #define MAXSTK 100 /* Stack size */
59 #define MAXBR 100 /* Max number of bracket pairs known */
60 #define MAXCMDS 500 /* Max number of commands known */
62 static void addcmd(char *);
63 static void addmac(const char *);
64 static int binsrch(const char *);
65 static void checkknown(const char *);
66 static void chkcmd(const char *, const char *);
67 static void complain(int);
68 static int eq(const char *, const char *);
69 static void nomatch(const char *);
71 static void process(FILE *);
72 static void prop(int);
73 static void usage(void);
76 * The stack on which we remember what we've seen so far.
78 static struct stkstr {
79 int opno; /* number of opening bracket */
80 int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
81 int parm; /* parm to size, font, etc */
82 int lno; /* line number */
87 * The kinds of opening and closing brackets.
93 /* A few bare bones troff commands */
95 {"sz", "sz"}, /* also \s */
97 {"ft", "ft"}, /* also \f */
111 /* the -ms package */
127 /* The -me package */
136 /* Things needed by preprocessors */
145 * All commands known to nroff, plus macro packages.
146 * Used so we can complain about unrecognized commands.
148 static const char *knowncmds[MAXCMDS] = {
149 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
150 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
151 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
152 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
153 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2",
154 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
155 "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
156 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
157 "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID",
158 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
159 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
160 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
161 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA",
162 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA",
163 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
164 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
165 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0",
166 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>",
167 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd",
168 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
169 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
170 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
171 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
172 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
173 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
174 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
175 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
176 "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
177 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
178 "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp",
182 static int lineno; /* current line number in input file */
183 static const char *cfilename; /* name of current file */
184 static int nfiles; /* number of files to process */
185 static int fflag; /* -f: ignore \f */
186 static int sflag; /* -s: ignore \s */
187 static int ncmds; /* size of knowncmds */
188 static int slot; /* slot in knowncmds found by binsrch */
191 main(int argc, char **argv)
198 /* Figure out how many known commands there are */
199 while (knowncmds[ncmds])
201 while (argc > 1 && argv[1][0] == '-') {
204 /* -a: add pairs of macros */
206 i = strlen(argv[1]) - 2;
209 /* look for empty macro slots */
210 for (i=0; br[i].opbr; i++)
212 for (cp=argv[1]+3; cp[-1]; cp += 6) {
213 br[i].opbr = strncpy(malloc(3), cp, 2);
214 br[i].clbr = strncpy(malloc(3), cp+3, 2);
215 addmac(br[i].opbr); /* knows pairs are also known cmds */
221 /* -c: add known commands */
223 i = strlen(argv[1]) - 2;
226 for (cp=argv[1]+3; cp[-1]; cp += 3) {
227 if (cp[2] && cp[2] != '.')
235 /* -f: ignore font changes */
240 /* -s: ignore size changes */
253 for (i=1; i<argc; i++) {
255 f = fopen(cfilename, "r");
257 warn("%s", cfilename);
274 "usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n");
282 char mac[5]; /* The current macro or nroff command */
284 static char line[256]; /* the current line */
287 for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
288 if (line[0] == '.') {
290 * find and isolate the macro/command name.
292 strncpy(mac, line+1, 4);
293 if (isspace(mac[0])) {
295 printf("Empty command\n");
296 } else if (isspace(mac[1])) {
298 } else if (isspace(mac[2])) {
300 } else if (mac[0] != '\\' || mac[1] != '\"') {
302 printf("Command too long\n");
306 * Is it a known command?
320 * At this point we process the line looking
323 for (i=0; line[i]; i++)
324 if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
325 if (!sflag && line[++i]=='s') {
332 while (isdigit(line[++i]))
333 n = 10 * n + line[i] - '0';
336 if (stk[stktop].opno == SZ) {
340 printf("unmatched \\s0\n");
343 stk[++stktop].opno = SZ;
345 stk[stktop].parm = n;
346 stk[stktop].lno = lineno;
348 } else if (!fflag && line[i]=='f') {
351 if (stk[stktop].opno == FT) {
355 printf("unmatched \\fP\n");
358 stk[++stktop].opno = FT;
360 stk[stktop].parm = n;
361 stk[stktop].lno = lineno;
367 * We've hit the end and look at all this stuff that hasn't been
368 * matched yet! Complain, complain.
370 for (i=stktop; i>=0; i--) {
379 printf("Unmatched ");
388 printf(".%s", br[stk[i].opno].opbr);
389 else switch(stk[i].opno) {
391 printf("\\s%c%d", stk[i].pl, stk[i].parm);
394 printf("\\f%c", stk[i].parm);
397 printf("Bug: stk[%d].opno = %d = .%s, .%s",
398 i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
403 chkcmd(const char *line __unused, const char *mac)
408 * Check to see if it matches top of stack.
410 if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
411 stktop--; /* OK. Pop & forget */
413 /* No. Maybe it's an opener */
414 for (i=0; br[i].opbr; i++) {
415 if (eq(mac, br[i].opbr)) {
416 /* Found. Push it. */
418 stk[stktop].opno = i;
420 stk[stktop].parm = 0;
421 stk[stktop].lno = lineno;
425 * Maybe it's an unmatched closer.
426 * NOTE: this depends on the fact
427 * that none of the closers can be
430 if (eq(mac, br[i].clbr)) {
439 nomatch(const char *mac)
444 * Look for a match further down on stack
445 * If we find one, it suggests that the stuff in
446 * between is supposed to match itself.
448 for (j=stktop; j>=0; j--)
449 if (eq(mac,br[stk[j].opno].clbr)) {
450 /* Found. Make a good diagnostic. */
453 * Check for special case \fx..\fR and don't
456 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
457 && stk[j+2].opno==FT && stk[j+2].parm=='R') {
462 * We have two unmatched frobs. Chances are
463 * they were intended to match, so we mention
468 printf(" does not match %d: ", stk[j+2].lno);
471 } else for (i=j+1; i <= stktop; i++) {
477 /* Didn't find one. Throw this away. */
479 printf("Unmatched .%s\n", mac);
482 /* eq: are two strings equal? */
484 eq(const char *s1, const char *s2)
486 return (strcmp(s1, s2) == 0);
489 /* print the first part of an error message, given the line number */
494 printf("%s: ", cfilename);
495 printf("%d: ", linen);
499 checkknown(const char *mac)
504 if (binsrch(mac) >= 0)
506 if (mac[0] == '\\' && mac[1] == '"') /* comments */
510 printf("Unknown command: .%s\n", mac);
514 * We have a .de xx line in "line". Add xx to the list of known commands.
521 /* grab the macro being defined */
523 while (isspace(*mac))
527 printf("illegal define: %s\n", line);
531 if (isspace(mac[1]) || mac[1] == '\\')
533 if (ncmds >= MAXCMDS) {
534 printf("Only %d known commands allowed\n", MAXCMDS);
541 * Add mac to the list. We should really have some kind of tree
542 * structure here but this is a quick-and-dirty job and I just don't
543 * have time to mess with it. (I wonder if this will come back to haunt
544 * me someday?) Anyway, I claim that .de is fairly rare in user
545 * nroff programs, and the register loop below is pretty fast.
548 addmac(const char *mac)
550 const char **src, **dest, **loc;
552 if (binsrch(mac) >= 0){ /* it's OK to redefine something */
554 printf("binsrch(%s) -> already in table\n", mac);
558 /* binsrch sets slot as a side effect */
560 printf("binsrch(%s) -> %d\n", mac, slot);
562 loc = &knowncmds[slot];
563 src = &knowncmds[ncmds-1];
567 *loc = strcpy(malloc(3), mac);
570 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
575 * Do a binary search in knowncmds for mac.
576 * If found, return the index. If not, return -1.
579 binsrch(const char *mac)
581 const char *p; /* pointer to current cmd in list */
582 int d; /* difference if any */
583 int mid; /* mid point in binary search */
584 int top, bot; /* boundaries of bin search, inclusive */
601 slot = bot; /* place it would have gone */