]> CyberLeo.Net >> Repos - FreeBSD/releng/9.2.git/blob - usr.bin/checknr/checknr.c
- Copy stable/9 to releng/9.2 as part of the 9.2-RELEASE cycle.
[FreeBSD/releng/9.2.git] / usr.bin / checknr / checknr.c
1 /*
2  * Copyright (c) 1980, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #ifndef lint
31 static const char copyright[] =
32 "@(#) Copyright (c) 1980, 1993\n\
33         The Regents of the University of California.  All rights reserved.\n";
34 #endif /* not lint */
35
36 #if 0
37 #ifndef lint
38 static char sccsid[] = "@(#)checknr.c   8.1 (Berkeley) 6/6/93";
39 #endif /* not lint */
40 #endif
41
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD$");
44
45 /*
46  * checknr: check an nroff/troff input file for matching macro calls.
47  * we also attempt to match size and font changes, but only the embedded
48  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
49  * later but for now think of these restrictions as contributions to
50  * structured typesetting.
51  */
52 #include <err.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <ctype.h>
57
58 #define MAXSTK  100     /* Stack size */
59 #define MAXBR   100     /* Max number of bracket pairs known */
60 #define MAXCMDS 500     /* Max number of commands known */
61
62 void addcmd(char *);
63 void addmac(const char *);
64 int binsrch(const char *);
65 void checkknown(const char *);
66 void chkcmd(const char *, const char *);
67 void complain(int);
68 int eq(const char *, const char *);
69 void nomatch(const char *);
70 void pe(int);
71 void process(FILE *);
72 void prop(int);
73 static void usage(void);
74
75 /*
76  * The stack on which we remember what we've seen so far.
77  */
78 struct stkstr {
79         int opno;       /* number of opening bracket */
80         int pl;         /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
81         int parm;       /* parm to size, font, etc */
82         int lno;        /* line number */
83 } stk[MAXSTK];
84 int stktop;
85
86 /*
87  * The kinds of opening and closing brackets.
88  */
89 struct brstr {
90         const char *opbr;
91         const char *clbr;
92 } br[MAXBR] = {
93         /* A few bare bones troff commands */
94 #define SZ      0
95         {"sz",  "sz"},  /* also \s */
96 #define FT      1
97         {"ft",  "ft"},  /* also \f */
98         /* the -mm package */
99         {"AL",  "LE"},
100         {"AS",  "AE"},
101         {"BL",  "LE"},
102         {"BS",  "BE"},
103         {"DF",  "DE"},
104         {"DL",  "LE"},
105         {"DS",  "DE"},
106         {"FS",  "FE"},
107         {"ML",  "LE"},
108         {"NS",  "NE"},
109         {"RL",  "LE"},
110         {"VL",  "LE"},
111         /* the -ms package */
112         {"AB",  "AE"},
113         {"BD",  "DE"},
114         {"CD",  "DE"},
115         {"DS",  "DE"},
116         {"FS",  "FE"},
117         {"ID",  "DE"},
118         {"KF",  "KE"},
119         {"KS",  "KE"},
120         {"LD",  "DE"},
121         {"LG",  "NL"},
122         {"QS",  "QE"},
123         {"RS",  "RE"},
124         {"SM",  "NL"},
125         {"XA",  "XE"},
126         {"XS",  "XE"},
127         /* The -me package */
128         {"(b",  ")b"},
129         {"(c",  ")c"},
130         {"(d",  ")d"},
131         {"(f",  ")f"},
132         {"(l",  ")l"},
133         {"(q",  ")q"},
134         {"(x",  ")x"},
135         {"(z",  ")z"},
136         /* Things needed by preprocessors */
137         {"EQ",  "EN"},
138         {"TS",  "TE"},
139         /* Refer */
140         {"[",   "]"},
141         {0,     0}
142 };
143
144 /*
145  * All commands known to nroff, plus macro packages.
146  * Used so we can complain about unrecognized commands.
147  */
148 const char *knowncmds[MAXCMDS] = {
149 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
150 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
151 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
152 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
153 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
154 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
155 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
156 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
157 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
158 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
159 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
160 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
161 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
162 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
163 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
164 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
165 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
166 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
167 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
168 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
169 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
170 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
171 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
172 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
173 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
174 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
175 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
176 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
177 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
178 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
179 "yr", 0
180 };
181
182 int     lineno;         /* current line number in input file */
183 const char *cfilename;  /* name of current file */
184 int     nfiles;         /* number of files to process */
185 int     fflag;          /* -f: ignore \f */
186 int     sflag;          /* -s: ignore \s */
187 int     ncmds;          /* size of knowncmds */
188 int     slot;           /* slot in knowncmds found by binsrch */
189
190 int
191 main(int argc, char **argv)
192 {
193         FILE *f;
194         int i;
195         char *cp;
196         char b1[4];
197
198         /* Figure out how many known commands there are */
199         while (knowncmds[ncmds])
200                 ncmds++;
201         while (argc > 1 && argv[1][0] == '-') {
202                 switch(argv[1][1]) {
203
204                 /* -a: add pairs of macros */
205                 case 'a':
206                         i = strlen(argv[1]) - 2;
207                         if (i % 6 != 0)
208                                 usage();
209                         /* look for empty macro slots */
210                         for (i=0; br[i].opbr; i++)
211                                 ;
212                         for (cp=argv[1]+3; cp[-1]; cp += 6) {
213                                 br[i].opbr = strncpy(malloc(3), cp, 2);
214                                 br[i].clbr = strncpy(malloc(3), cp+3, 2);
215                                 addmac(br[i].opbr);     /* knows pairs are also known cmds */
216                                 addmac(br[i].clbr);
217                                 i++;
218                         }
219                         break;
220
221                 /* -c: add known commands */
222                 case 'c':
223                         i = strlen(argv[1]) - 2;
224                         if (i % 3 != 0)
225                                 usage();
226                         for (cp=argv[1]+3; cp[-1]; cp += 3) {
227                                 if (cp[2] && cp[2] != '.')
228                                         usage();
229                                 strncpy(b1, cp, 2);
230                                 b1[2] = '\0';
231                                 addmac(b1);
232                         }
233                         break;
234
235                 /* -f: ignore font changes */
236                 case 'f':
237                         fflag = 1;
238                         break;
239
240                 /* -s: ignore size changes */
241                 case 's':
242                         sflag = 1;
243                         break;
244                 default:
245                         usage();
246                 }
247                 argc--; argv++;
248         }
249
250         nfiles = argc - 1;
251
252         if (nfiles > 0) {
253                 for (i=1; i<argc; i++) {
254                         cfilename = argv[i];
255                         f = fopen(cfilename, "r");
256                         if (f == NULL)
257                                 warn("%s", cfilename);
258                         else {
259                                 process(f);
260                                 fclose(f);
261                         }
262                 }
263         } else {
264                 cfilename = "stdin";
265                 process(stdin);
266         }
267         exit(0);
268 }
269
270 static void
271 usage(void)
272 {
273         fprintf(stderr,
274         "usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n");
275         exit(1);
276 }
277
278 void
279 process(FILE *f)
280 {
281         int i, n;
282         char mac[5];    /* The current macro or nroff command */
283         int pl;
284         static char line[256];  /* the current line */
285
286         stktop = -1;
287         for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
288                 if (line[0] == '.') {
289                         /*
290                          * find and isolate the macro/command name.
291                          */
292                         strncpy(mac, line+1, 4);
293                         if (isspace(mac[0])) {
294                                 pe(lineno);
295                                 printf("Empty command\n");
296                         } else if (isspace(mac[1])) {
297                                 mac[1] = 0;
298                         } else if (isspace(mac[2])) {
299                                 mac[2] = 0;
300                         } else if (mac[0] != '\\' || mac[1] != '\"') {
301                                 pe(lineno);
302                                 printf("Command too long\n");
303                         }
304
305                         /*
306                          * Is it a known command?
307                          */
308                         checkknown(mac);
309
310                         /*
311                          * Should we add it?
312                          */
313                         if (eq(mac, "de"))
314                                 addcmd(line);
315
316                         chkcmd(line, mac);
317                 }
318
319                 /*
320                  * At this point we process the line looking
321                  * for \s and \f.
322                  */
323                 for (i=0; line[i]; i++)
324                         if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
325                                 if (!sflag && line[++i]=='s') {
326                                         pl = line[++i];
327                                         if (isdigit(pl)) {
328                                                 n = pl - '0';
329                                                 pl = ' ';
330                                         } else
331                                                 n = 0;
332                                         while (isdigit(line[++i]))
333                                                 n = 10 * n + line[i] - '0';
334                                         i--;
335                                         if (n == 0) {
336                                                 if (stk[stktop].opno == SZ) {
337                                                         stktop--;
338                                                 } else {
339                                                         pe(lineno);
340                                                         printf("unmatched \\s0\n");
341                                                 }
342                                         } else {
343                                                 stk[++stktop].opno = SZ;
344                                                 stk[stktop].pl = pl;
345                                                 stk[stktop].parm = n;
346                                                 stk[stktop].lno = lineno;
347                                         }
348                                 } else if (!fflag && line[i]=='f') {
349                                         n = line[++i];
350                                         if (n == 'P') {
351                                                 if (stk[stktop].opno == FT) {
352                                                         stktop--;
353                                                 } else {
354                                                         pe(lineno);
355                                                         printf("unmatched \\fP\n");
356                                                 }
357                                         } else {
358                                                 stk[++stktop].opno = FT;
359                                                 stk[stktop].pl = 1;
360                                                 stk[stktop].parm = n;
361                                                 stk[stktop].lno = lineno;
362                                         }
363                                 }
364                         }
365         }
366         /*
367          * We've hit the end and look at all this stuff that hasn't been
368          * matched yet!  Complain, complain.
369          */
370         for (i=stktop; i>=0; i--) {
371                 complain(i);
372         }
373 }
374
375 void
376 complain(int i)
377 {
378         pe(stk[i].lno);
379         printf("Unmatched ");
380         prop(i);
381         printf("\n");
382 }
383
384 void
385 prop(int i)
386 {
387         if (stk[i].pl == 0)
388                 printf(".%s", br[stk[i].opno].opbr);
389         else switch(stk[i].opno) {
390         case SZ:
391                 printf("\\s%c%d", stk[i].pl, stk[i].parm);
392                 break;
393         case FT:
394                 printf("\\f%c", stk[i].parm);
395                 break;
396         default:
397                 printf("Bug: stk[%d].opno = %d = .%s, .%s",
398                         i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
399         }
400 }
401
402 void
403 chkcmd(const char *line __unused, const char *mac)
404 {
405         int i;
406
407         /*
408          * Check to see if it matches top of stack.
409          */
410         if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
411                 stktop--;       /* OK. Pop & forget */
412         else {
413                 /* No. Maybe it's an opener */
414                 for (i=0; br[i].opbr; i++) {
415                         if (eq(mac, br[i].opbr)) {
416                                 /* Found. Push it. */
417                                 stktop++;
418                                 stk[stktop].opno = i;
419                                 stk[stktop].pl = 0;
420                                 stk[stktop].parm = 0;
421                                 stk[stktop].lno = lineno;
422                                 break;
423                         }
424                         /*
425                          * Maybe it's an unmatched closer.
426                          * NOTE: this depends on the fact
427                          * that none of the closers can be
428                          * openers too.
429                          */
430                         if (eq(mac, br[i].clbr)) {
431                                 nomatch(mac);
432                                 break;
433                         }
434                 }
435         }
436 }
437
438 void
439 nomatch(const char *mac)
440 {
441         int i, j;
442
443         /*
444          * Look for a match further down on stack
445          * If we find one, it suggests that the stuff in
446          * between is supposed to match itself.
447          */
448         for (j=stktop; j>=0; j--)
449                 if (eq(mac,br[stk[j].opno].clbr)) {
450                         /* Found.  Make a good diagnostic. */
451                         if (j == stktop-2) {
452                                 /*
453                                  * Check for special case \fx..\fR and don't
454                                  * complain.
455                                  */
456                                 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
457                                  && stk[j+2].opno==FT && stk[j+2].parm=='R') {
458                                         stktop = j -1;
459                                         return;
460                                 }
461                                 /*
462                                  * We have two unmatched frobs.  Chances are
463                                  * they were intended to match, so we mention
464                                  * them together.
465                                  */
466                                 pe(stk[j+1].lno);
467                                 prop(j+1);
468                                 printf(" does not match %d: ", stk[j+2].lno);
469                                 prop(j+2);
470                                 printf("\n");
471                         } else for (i=j+1; i <= stktop; i++) {
472                                 complain(i);
473                         }
474                         stktop = j-1;
475                         return;
476                 }
477         /* Didn't find one.  Throw this away. */
478         pe(lineno);
479         printf("Unmatched .%s\n", mac);
480 }
481
482 /* eq: are two strings equal? */
483 int
484 eq(const char *s1, const char *s2)
485 {
486         return (strcmp(s1, s2) == 0);
487 }
488
489 /* print the first part of an error message, given the line number */
490 void
491 pe(int linen)
492 {
493         if (nfiles > 1)
494                 printf("%s: ", cfilename);
495         printf("%d: ", linen);
496 }
497
498 void
499 checkknown(const char *mac)
500 {
501
502         if (eq(mac, "."))
503                 return;
504         if (binsrch(mac) >= 0)
505                 return;
506         if (mac[0] == '\\' && mac[1] == '"')    /* comments */
507                 return;
508
509         pe(lineno);
510         printf("Unknown command: .%s\n", mac);
511 }
512
513 /*
514  * We have a .de xx line in "line".  Add xx to the list of known commands.
515  */
516 void
517 addcmd(char *line)
518 {
519         char *mac;
520
521         /* grab the macro being defined */
522         mac = line+4;
523         while (isspace(*mac))
524                 mac++;
525         if (*mac == 0) {
526                 pe(lineno);
527                 printf("illegal define: %s\n", line);
528                 return;
529         }
530         mac[2] = 0;
531         if (isspace(mac[1]) || mac[1] == '\\')
532                 mac[1] = 0;
533         if (ncmds >= MAXCMDS) {
534                 printf("Only %d known commands allowed\n", MAXCMDS);
535                 exit(1);
536         }
537         addmac(mac);
538 }
539
540 /*
541  * Add mac to the list.  We should really have some kind of tree
542  * structure here but this is a quick-and-dirty job and I just don't
543  * have time to mess with it.  (I wonder if this will come back to haunt
544  * me someday?)  Anyway, I claim that .de is fairly rare in user
545  * nroff programs, and the register loop below is pretty fast.
546  */
547 void
548 addmac(const char *mac)
549 {
550         const char **src, **dest, **loc;
551
552         if (binsrch(mac) >= 0){ /* it's OK to redefine something */
553 #ifdef DEBUG
554                 printf("binsrch(%s) -> already in table\n", mac);
555 #endif
556                 return;
557         }
558         /* binsrch sets slot as a side effect */
559 #ifdef DEBUG
560 printf("binsrch(%s) -> %d\n", mac, slot);
561 #endif
562         loc = &knowncmds[slot];
563         src = &knowncmds[ncmds-1];
564         dest = src+1;
565         while (dest > loc)
566                 *dest-- = *src--;
567         *loc = strcpy(malloc(3), mac);
568         ncmds++;
569 #ifdef DEBUG
570 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
571 #endif
572 }
573
574 /*
575  * Do a binary search in knowncmds for mac.
576  * If found, return the index.  If not, return -1.
577  */
578 int
579 binsrch(const char *mac)
580 {
581         const char *p;  /* pointer to current cmd in list */
582         int d;          /* difference if any */
583         int mid;        /* mid point in binary search */
584         int top, bot;   /* boundaries of bin search, inclusive */
585
586         top = ncmds-1;
587         bot = 0;
588         while (top >= bot) {
589                 mid = (top+bot)/2;
590                 p = knowncmds[mid];
591                 d = p[0] - mac[0];
592                 if (d == 0)
593                         d = p[1] - mac[1];
594                 if (d == 0)
595                         return mid;
596                 if (d < 0)
597                         bot = mid + 1;
598                 else
599                         top = mid - 1;
600         }
601         slot = bot;     /* place it would have gone */
602         return -1;
603 }