]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - usr.bin/checknr/checknr.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / usr.bin / checknr / checknr.c
1 /*
2  * Copyright (c) 1980, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *      This product includes software developed by the University of
16  *      California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33
34 #ifndef lint
35 static const char copyright[] =
36 "@(#) Copyright (c) 1980, 1993\n\
37         The Regents of the University of California.  All rights reserved.\n";
38 #endif /* not lint */
39
40 #if 0
41 #ifndef lint
42 static char sccsid[] = "@(#)checknr.c   8.1 (Berkeley) 6/6/93";
43 #endif /* not lint */
44 #endif
45
46 #include <sys/cdefs.h>
47 __FBSDID("$FreeBSD$");
48
49 /*
50  * checknr: check an nroff/troff input file for matching macro calls.
51  * we also attempt to match size and font changes, but only the embedded
52  * kind.  These must end in \s0 and \fP resp.  Maybe more sophistication
53  * later but for now think of these restrictions as contributions to
54  * structured typesetting.
55  */
56 #include <err.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <ctype.h>
61
62 #define MAXSTK  100     /* Stack size */
63 #define MAXBR   100     /* Max number of bracket pairs known */
64 #define MAXCMDS 500     /* Max number of commands known */
65
66 void addcmd(char *);
67 void addmac(const char *);
68 int binsrch(const char *);
69 void checkknown(const char *);
70 void chkcmd(const char *, const char *);
71 void complain(int);
72 int eq(const char *, const char *);
73 void nomatch(const char *);
74 void pe(int);
75 void process(FILE *);
76 void prop(int);
77 static void usage(void);
78
79 /*
80  * The stack on which we remember what we've seen so far.
81  */
82 struct stkstr {
83         int opno;       /* number of opening bracket */
84         int pl;         /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */
85         int parm;       /* parm to size, font, etc */
86         int lno;        /* line number the thing came in */
87 } stk[MAXSTK];
88 int stktop;
89
90 /*
91  * The kinds of opening and closing brackets.
92  */
93 struct brstr {
94         const char *opbr;
95         const char *clbr;
96 } br[MAXBR] = {
97         /* A few bare bones troff commands */
98 #define SZ      0
99         {"sz",  "sz"},  /* also \s */
100 #define FT      1
101         {"ft",  "ft"},  /* also \f */
102         /* the -mm package */
103         {"AL",  "LE"},
104         {"AS",  "AE"},
105         {"BL",  "LE"},
106         {"BS",  "BE"},
107         {"DF",  "DE"},
108         {"DL",  "LE"},
109         {"DS",  "DE"},
110         {"FS",  "FE"},
111         {"ML",  "LE"},
112         {"NS",  "NE"},
113         {"RL",  "LE"},
114         {"VL",  "LE"},
115         /* the -ms package */
116         {"AB",  "AE"},
117         {"BD",  "DE"},
118         {"CD",  "DE"},
119         {"DS",  "DE"},
120         {"FS",  "FE"},
121         {"ID",  "DE"},
122         {"KF",  "KE"},
123         {"KS",  "KE"},
124         {"LD",  "DE"},
125         {"LG",  "NL"},
126         {"QS",  "QE"},
127         {"RS",  "RE"},
128         {"SM",  "NL"},
129         {"XA",  "XE"},
130         {"XS",  "XE"},
131         /* The -me package */
132         {"(b",  ")b"},
133         {"(c",  ")c"},
134         {"(d",  ")d"},
135         {"(f",  ")f"},
136         {"(l",  ")l"},
137         {"(q",  ")q"},
138         {"(x",  ")x"},
139         {"(z",  ")z"},
140         /* Things needed by preprocessors */
141         {"EQ",  "EN"},
142         {"TS",  "TE"},
143         /* Refer */
144         {"[",   "]"},
145         {0,     0}
146 };
147
148 /*
149  * All commands known to nroff, plus macro packages.
150  * Used so we can complain about unrecognized commands.
151  */
152 const char *knowncmds[MAXCMDS] = {
153 "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t",
154 "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++",
155 "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M",
156 "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB",
157 "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B",  "B1", "B2",
158 "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT",
159 "D",  "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM",
160 "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO",
161 "FQ", "FS", "FV", "FX", "H",  "HC", "HD", "HM", "HO", "HU", "I",  "ID",
162 "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB",
163 "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR",
164 "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P",
165 "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R",  "RA",
166 "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S",  "S0", "S2", "S3", "SA",
167 "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE",
168 "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL",
169 "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[",  "[-", "[0",
170 "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]",  "]-", "]<", "]>",
171 "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b",  "ba", "bc", "bd",
172 "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs",
173 "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec",
174 "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo",
175 "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i",
176 "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln",
177 "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1",
178 "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx",
179 "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps",
180 "q",  "r",  "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb",
181 "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th",
182 "ti", "tl", "tm", "tp", "tr", "u",  "uf", "uh", "ul", "vs", "wh", "xp",
183 "yr", 0
184 };
185
186 int     lineno;         /* current line number in input file */
187 const char *cfilename;  /* name of current file */
188 int     nfiles;         /* number of files to process */
189 int     fflag;          /* -f: ignore \f */
190 int     sflag;          /* -s: ignore \s */
191 int     ncmds;          /* size of knowncmds */
192 int     slot;           /* slot in knowncmds found by binsrch */
193
194 int
195 main(int argc, char **argv)
196 {
197         FILE *f;
198         int i;
199         char *cp;
200         char b1[4];
201
202         /* Figure out how many known commands there are */
203         while (knowncmds[ncmds])
204                 ncmds++;
205         while (argc > 1 && argv[1][0] == '-') {
206                 switch(argv[1][1]) {
207
208                 /* -a: add pairs of macros */
209                 case 'a':
210                         i = strlen(argv[1]) - 2;
211                         if (i % 6 != 0)
212                                 usage();
213                         /* look for empty macro slots */
214                         for (i=0; br[i].opbr; i++)
215                                 ;
216                         for (cp=argv[1]+3; cp[-1]; cp += 6) {
217                                 br[i].opbr = strncpy(malloc(3), cp, 2);
218                                 br[i].clbr = strncpy(malloc(3), cp+3, 2);
219                                 addmac(br[i].opbr);     /* knows pairs are also known cmds */
220                                 addmac(br[i].clbr);
221                                 i++;
222                         }
223                         break;
224
225                 /* -c: add known commands */
226                 case 'c':
227                         i = strlen(argv[1]) - 2;
228                         if (i % 3 != 0)
229                                 usage();
230                         for (cp=argv[1]+3; cp[-1]; cp += 3) {
231                                 if (cp[2] && cp[2] != '.')
232                                         usage();
233                                 strncpy(b1, cp, 2);
234                                 b1[2] = '\0';
235                                 addmac(b1);
236                         }
237                         break;
238
239                 /* -f: ignore font changes */
240                 case 'f':
241                         fflag = 1;
242                         break;
243
244                 /* -s: ignore size changes */
245                 case 's':
246                         sflag = 1;
247                         break;
248                 default:
249                         usage();
250                 }
251                 argc--; argv++;
252         }
253
254         nfiles = argc - 1;
255
256         if (nfiles > 0) {
257                 for (i=1; i<argc; i++) {
258                         cfilename = argv[i];
259                         f = fopen(cfilename, "r");
260                         if (f == NULL)
261                                 warn("%s", cfilename);
262                         else {
263                                 process(f);
264                                 fclose(f);
265                         }
266                 }
267         } else {
268                 cfilename = "stdin";
269                 process(stdin);
270         }
271         exit(0);
272 }
273
274 static void
275 usage(void)
276 {
277         fprintf(stderr,
278         "usage: checknr [-a.xx.yy.xx.yy...] [-c.xx.xx.xx...] [-s] [-f] file\n");
279         exit(1);
280 }
281
282 void
283 process(FILE *f)
284 {
285         int i, n;
286         char mac[5];    /* The current macro or nroff command */
287         int pl;
288         static char line[256];  /* the current line */
289
290         stktop = -1;
291         for (lineno = 1; fgets(line, sizeof line, f); lineno++) {
292                 if (line[0] == '.') {
293                         /*
294                          * find and isolate the macro/command name.
295                          */
296                         strncpy(mac, line+1, 4);
297                         if (isspace(mac[0])) {
298                                 pe(lineno);
299                                 printf("Empty command\n");
300                         } else if (isspace(mac[1])) {
301                                 mac[1] = 0;
302                         } else if (isspace(mac[2])) {
303                                 mac[2] = 0;
304                         } else if (mac[0] != '\\' || mac[1] != '\"') {
305                                 pe(lineno);
306                                 printf("Command too long\n");
307                         }
308
309                         /*
310                          * Is it a known command?
311                          */
312                         checkknown(mac);
313
314                         /*
315                          * Should we add it?
316                          */
317                         if (eq(mac, "de"))
318                                 addcmd(line);
319
320                         chkcmd(line, mac);
321                 }
322
323                 /*
324                  * At this point we process the line looking
325                  * for \s and \f.
326                  */
327                 for (i=0; line[i]; i++)
328                         if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) {
329                                 if (!sflag && line[++i]=='s') {
330                                         pl = line[++i];
331                                         if (isdigit(pl)) {
332                                                 n = pl - '0';
333                                                 pl = ' ';
334                                         } else
335                                                 n = 0;
336                                         while (isdigit(line[++i]))
337                                                 n = 10 * n + line[i] - '0';
338                                         i--;
339                                         if (n == 0) {
340                                                 if (stk[stktop].opno == SZ) {
341                                                         stktop--;
342                                                 } else {
343                                                         pe(lineno);
344                                                         printf("unmatched \\s0\n");
345                                                 }
346                                         } else {
347                                                 stk[++stktop].opno = SZ;
348                                                 stk[stktop].pl = pl;
349                                                 stk[stktop].parm = n;
350                                                 stk[stktop].lno = lineno;
351                                         }
352                                 } else if (!fflag && line[i]=='f') {
353                                         n = line[++i];
354                                         if (n == 'P') {
355                                                 if (stk[stktop].opno == FT) {
356                                                         stktop--;
357                                                 } else {
358                                                         pe(lineno);
359                                                         printf("unmatched \\fP\n");
360                                                 }
361                                         } else {
362                                                 stk[++stktop].opno = FT;
363                                                 stk[stktop].pl = 1;
364                                                 stk[stktop].parm = n;
365                                                 stk[stktop].lno = lineno;
366                                         }
367                                 }
368                         }
369         }
370         /*
371          * We've hit the end and look at all this stuff that hasn't been
372          * matched yet!  Complain, complain.
373          */
374         for (i=stktop; i>=0; i--) {
375                 complain(i);
376         }
377 }
378
379 void
380 complain(int i)
381 {
382         pe(stk[i].lno);
383         printf("Unmatched ");
384         prop(i);
385         printf("\n");
386 }
387
388 void
389 prop(int i)
390 {
391         if (stk[i].pl == 0)
392                 printf(".%s", br[stk[i].opno].opbr);
393         else switch(stk[i].opno) {
394         case SZ:
395                 printf("\\s%c%d", stk[i].pl, stk[i].parm);
396                 break;
397         case FT:
398                 printf("\\f%c", stk[i].parm);
399                 break;
400         default:
401                 printf("Bug: stk[%d].opno = %d = .%s, .%s",
402                         i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr);
403         }
404 }
405
406 void
407 chkcmd(const char *line __unused, const char *mac)
408 {
409         int i;
410
411         /*
412          * Check to see if it matches top of stack.
413          */
414         if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr))
415                 stktop--;       /* OK. Pop & forget */
416         else {
417                 /* No. Maybe it's an opener */
418                 for (i=0; br[i].opbr; i++) {
419                         if (eq(mac, br[i].opbr)) {
420                                 /* Found. Push it. */
421                                 stktop++;
422                                 stk[stktop].opno = i;
423                                 stk[stktop].pl = 0;
424                                 stk[stktop].parm = 0;
425                                 stk[stktop].lno = lineno;
426                                 break;
427                         }
428                         /*
429                          * Maybe it's an unmatched closer.
430                          * NOTE: this depends on the fact
431                          * that none of the closers can be
432                          * openers too.
433                          */
434                         if (eq(mac, br[i].clbr)) {
435                                 nomatch(mac);
436                                 break;
437                         }
438                 }
439         }
440 }
441
442 void
443 nomatch(const char *mac)
444 {
445         int i, j;
446
447         /*
448          * Look for a match further down on stack
449          * If we find one, it suggests that the stuff in
450          * between is supposed to match itself.
451          */
452         for (j=stktop; j>=0; j--)
453                 if (eq(mac,br[stk[j].opno].clbr)) {
454                         /* Found.  Make a good diagnostic. */
455                         if (j == stktop-2) {
456                                 /*
457                                  * Check for special case \fx..\fR and don't
458                                  * complain.
459                                  */
460                                 if (stk[j+1].opno==FT && stk[j+1].parm!='R'
461                                  && stk[j+2].opno==FT && stk[j+2].parm=='R') {
462                                         stktop = j -1;
463                                         return;
464                                 }
465                                 /*
466                                  * We have two unmatched frobs.  Chances are
467                                  * they were intended to match, so we mention
468                                  * them together.
469                                  */
470                                 pe(stk[j+1].lno);
471                                 prop(j+1);
472                                 printf(" does not match %d: ", stk[j+2].lno);
473                                 prop(j+2);
474                                 printf("\n");
475                         } else for (i=j+1; i <= stktop; i++) {
476                                 complain(i);
477                         }
478                         stktop = j-1;
479                         return;
480                 }
481         /* Didn't find one.  Throw this away. */
482         pe(lineno);
483         printf("Unmatched .%s\n", mac);
484 }
485
486 /* eq: are two strings equal? */
487 int
488 eq(const char *s1, const char *s2)
489 {
490         return (strcmp(s1, s2) == 0);
491 }
492
493 /* print the first part of an error message, given the line number */
494 void
495 pe(int linen)
496 {
497         if (nfiles > 1)
498                 printf("%s: ", cfilename);
499         printf("%d: ", linen);
500 }
501
502 void
503 checkknown(const char *mac)
504 {
505
506         if (eq(mac, "."))
507                 return;
508         if (binsrch(mac) >= 0)
509                 return;
510         if (mac[0] == '\\' && mac[1] == '"')    /* comments */
511                 return;
512
513         pe(lineno);
514         printf("Unknown command: .%s\n", mac);
515 }
516
517 /*
518  * We have a .de xx line in "line".  Add xx to the list of known commands.
519  */
520 void
521 addcmd(char *line)
522 {
523         char *mac;
524
525         /* grab the macro being defined */
526         mac = line+4;
527         while (isspace(*mac))
528                 mac++;
529         if (*mac == 0) {
530                 pe(lineno);
531                 printf("illegal define: %s\n", line);
532                 return;
533         }
534         mac[2] = 0;
535         if (isspace(mac[1]) || mac[1] == '\\')
536                 mac[1] = 0;
537         if (ncmds >= MAXCMDS) {
538                 printf("Only %d known commands allowed\n", MAXCMDS);
539                 exit(1);
540         }
541         addmac(mac);
542 }
543
544 /*
545  * Add mac to the list.  We should really have some kind of tree
546  * structure here but this is a quick-and-dirty job and I just don't
547  * have time to mess with it.  (I wonder if this will come back to haunt
548  * me someday?)  Anyway, I claim that .de is fairly rare in user
549  * nroff programs, and the register loop below is pretty fast.
550  */
551 void
552 addmac(const char *mac)
553 {
554         const char **src, **dest, **loc;
555
556         if (binsrch(mac) >= 0){ /* it's OK to redefine something */
557 #ifdef DEBUG
558                 printf("binsrch(%s) -> already in table\n", mac);
559 #endif
560                 return;
561         }
562         /* binsrch sets slot as a side effect */
563 #ifdef DEBUG
564 printf("binsrch(%s) -> %d\n", mac, slot);
565 #endif
566         loc = &knowncmds[slot];
567         src = &knowncmds[ncmds-1];
568         dest = src+1;
569         while (dest > loc)
570                 *dest-- = *src--;
571         *loc = strcpy(malloc(3), mac);
572         ncmds++;
573 #ifdef DEBUG
574 printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds);
575 #endif
576 }
577
578 /*
579  * Do a binary search in knowncmds for mac.
580  * If found, return the index.  If not, return -1.
581  */
582 int
583 binsrch(const char *mac)
584 {
585         const char *p;  /* pointer to current cmd in list */
586         int d;          /* difference if any */
587         int mid;        /* mid point in binary search */
588         int top, bot;   /* boundaries of bin search, inclusive */
589
590         top = ncmds-1;
591         bot = 0;
592         while (top >= bot) {
593                 mid = (top+bot)/2;
594                 p = knowncmds[mid];
595                 d = p[0] - mac[0];
596                 if (d == 0)
597                         d = p[1] - mac[1];
598                 if (d == 0)
599                         return mid;
600                 if (d < 0)
601                         bot = mid + 1;
602                 else
603                         top = mid - 1;
604         }
605         slot = bot;     /* place it would have gone */
606         return -1;
607 }