2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2002 - 2015 Tony Finch <dot@dotat.at>
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * unifdef - remove ifdef'ed lines
31 * This code was derived from software contributed to Berkeley by Dave Yost.
32 * It was rewritten to support ANSI C by Tony Finch. The original version
33 * of unifdef carried the 4-clause BSD copyright licence. None of its code
34 * remains in this version (though some of the names remain) so it now
35 * carries a more liberal licence.
38 * provide an option which will append the name of the
39 * appropriate symbol after #else's and #endif's
40 * provide an option which will check symbols after
41 * #else's and #endif's to see that they match their
42 * corresponding #ifdef or #ifndef
44 * These require better buffer handling, which would also make
45 * it possible to handle all "dodgy" directives correctly.
48 #include <sys/param.h>
62 static const char copyright[] =
63 "@(#) $Version: unifdef-2.11 $\n"
65 "@(#) $Author: Tony Finch (dot@dotat.at) $\n"
66 "@(#) $URL: https://dotat.at/prog/unifdef $\n"
69 /* types of input lines: */
71 LT_TRUEI, /* a true #if with ignore flag */
72 LT_FALSEI, /* a false #if with ignore flag */
73 LT_IF, /* an unknown #if */
74 LT_TRUE, /* a true #if */
75 LT_FALSE, /* a false #if */
76 LT_ELIF, /* an unknown #elif */
77 LT_ELTRUE, /* a true #elif */
78 LT_ELFALSE, /* a false #elif */
80 LT_ENDIF, /* #endif */
81 LT_DODGY, /* flag: directive is not on one line */
82 LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
83 LT_PLAIN, /* ordinary line */
84 LT_EOF, /* end of file */
85 LT_ERROR, /* unevaluable #if */
89 static char const * const linetype_name[] = {
90 "TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
91 "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
92 "DODGY TRUEI", "DODGY FALSEI",
93 "DODGY IF", "DODGY TRUE", "DODGY FALSE",
94 "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
95 "DODGY ELSE", "DODGY ENDIF",
96 "PLAIN", "EOF", "ERROR"
99 #define linetype_if2elif(lt) ((Linetype)(lt - LT_IF + LT_ELIF))
100 #define linetype_2dodgy(lt) ((Linetype)(lt + LT_DODGY))
102 /* state of #if processing */
105 IS_FALSE_PREFIX, /* false #if followed by false #elifs */
106 IS_TRUE_PREFIX, /* first non-false #(el)if is true */
107 IS_PASS_MIDDLE, /* first non-false #(el)if is unknown */
108 IS_FALSE_MIDDLE, /* a false #elif after a pass state */
109 IS_TRUE_MIDDLE, /* a true #elif after a pass state */
110 IS_PASS_ELSE, /* an else after a pass state */
111 IS_FALSE_ELSE, /* an else after a true state */
112 IS_TRUE_ELSE, /* an else after only false states */
113 IS_FALSE_TRAILER, /* #elifs after a true are false */
117 static char const * const ifstate_name[] = {
118 "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
119 "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
120 "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
124 /* state of comment parser */
126 NO_COMMENT = false, /* outside a comment */
127 C_COMMENT, /* in a comment like this one */
128 CXX_COMMENT, /* between // and end of line */
129 STARTING_COMMENT, /* just after slash-backslash-newline */
130 FINISHING_COMMENT, /* star-backslash-newline in a C comment */
131 CHAR_LITERAL, /* inside '' */
132 STRING_LITERAL /* inside "" */
135 static char const * const comment_name[] = {
136 "NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING"
139 /* state of preprocessor line parser */
141 LS_START, /* only space and comments on this line */
142 LS_HASH, /* only space, comments, and a hash */
143 LS_DIRTY /* this line can't be a preprocessor line */
146 static char const * const linestate_name[] = {
147 "START", "HASH", "DIRTY"
151 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
153 #define MAXDEPTH 64 /* maximum #if nesting */
154 #define MAXLINE 4096 /* maximum length of line */
157 * Sometimes when editing a keyword the replacement text is longer, so
158 * we leave some space at the end of the tline buffer to accommodate this.
163 * C17/18 allow 63 characters per macro name, but up to 127 arbitrarily large
167 RB_ENTRY(macro) entry;
170 bool ignore; /* -iDsym or -iUsym */
174 macro_cmp(struct macro *a, struct macro *b)
176 return (strcmp(a->name, b->name));
179 static RB_HEAD(MACROMAP, macro) macro_tree = RB_INITIALIZER(¯o_tree);
180 RB_GENERATE_STATIC(MACROMAP, macro, entry, macro_cmp);
186 static bool compblank; /* -B: compress blank lines */
187 static bool lnblank; /* -b: blank deleted lines */
188 static bool complement; /* -c: do the complement */
189 static bool debugging; /* -d: debugging reports */
190 static bool inplace; /* -m: modify in place */
191 static bool iocccok; /* -e: fewer IOCCC errors */
192 static bool strictlogic; /* -K: keep ambiguous #ifs */
193 static bool killconsts; /* -k: eval constant #ifs */
194 static bool lnnum; /* -n: add #line directives */
195 static bool symlist; /* -s: output symbol list */
196 static bool symdepth; /* -S: output symbol depth */
197 static bool text; /* -t: this is a text file */
199 static FILE *input; /* input file pointer */
200 static const char *filename; /* input file name */
201 static int linenum; /* current line number */
202 static const char *linefile; /* file name for #line */
203 static FILE *output; /* output file pointer */
204 static const char *ofilename; /* output file name */
205 static const char *backext; /* backup extension */
206 static char *tempname; /* avoid splatting input */
208 static char tline[MAXLINE+EDITSLOP];/* input buffer plus space */
209 static char *keyword; /* used for editing #elif's */
212 * When processing a file, the output's newline style will match the
213 * input's, and unifdef correctly handles CRLF or LF endings whatever
214 * the platform's native style. The stdio streams are opened in binary
215 * mode to accommodate platforms whose native newline style is CRLF.
216 * When the output isn't a processed input file (when it is error /
217 * debug / diagnostic messages) then unifdef uses native line endings.
220 static const char *newline; /* input file format */
221 static const char newline_unix[] = "\n";
222 static const char newline_crlf[] = "\r\n";
224 static Comment_state incomment; /* comment parser state */
225 static Line_state linestate; /* #if line parser state */
226 static Ifstate ifstate[MAXDEPTH]; /* #if processor state */
227 static bool ignoring[MAXDEPTH]; /* ignore comments state */
228 static int stifline[MAXDEPTH]; /* start of current #if */
229 static int depth; /* current #if nesting */
230 static int delcount; /* count of deleted lines */
231 static unsigned blankcount; /* count of blank lines */
232 static unsigned blankmax; /* maximum recent blankcount */
233 static bool constexpr; /* constant #if expression */
234 static bool zerosyms; /* to format symdepth output */
235 static bool firstsym; /* ditto */
237 static int exitmode; /* exit status mode */
238 static int exitstat; /* program exit status */
239 static bool altered; /* was this file modified? */
241 static void addsym1(bool, bool, char *);
242 static void addsym2(bool, const char *, const char *);
243 static char *astrcat(const char *, const char *);
244 static void cleantemp(void);
245 static void closeio(void);
246 static void debug(const char *, ...);
247 static void debugsym(const char *, const struct macro *);
248 static bool defundef(void);
249 static void defundefile(const char *);
250 static void done(void);
251 static void error(const char *);
252 static struct macro *findsym(const char **);
253 static void flushline(bool);
254 static void hashline(void);
255 static void help(void);
256 static Linetype ifeval(const char **);
257 static void ignoreoff(void);
258 static void ignoreon(void);
259 static void indirectsym(void);
260 static void keywordedit(const char *);
261 static const char *matchsym(const char *, const char *);
262 static void nest(void);
263 static Linetype parseline(void);
264 static void process(void);
265 static void processinout(const char *, const char *);
266 static const char *skipargs(const char *);
267 static const char *skipcomment(const char *);
268 static const char *skiphash(void);
269 static const char *skipline(const char *);
270 static const char *skipsym(const char *);
271 static void state(Ifstate);
272 static void unnest(void);
273 static void usage(void);
274 static void version(void);
275 static const char *xstrdup(const char *, const char *);
277 #define endsym(c) (!isalnum((unsigned char)c) && c != '_')
280 mktempmode(char *tmp, int mode)
284 mode &= (S_IRWXU|S_IRWXG|S_IRWXO);
287 err(2, "can't create %s", tmp);
288 rc = fchmod(fd, mode);
290 err(2, "can't fchmod %s mode=0o%o", tmp, mode);
291 return (fdopen(fd, "wb"));
298 main(int argc, char *argv[])
302 while ((opt = getopt(argc, argv, "i:D:U:f:I:M:o:x:bBcdehKklmnsStV")) != -1)
304 case 'i': /* treat stuff controlled by these symbols as text */
306 * For strict backwards-compatibility the U or D
307 * should be immediately after the -i but it doesn't
308 * matter much if we relax that requirement.
312 addsym1(true, true, optarg);
314 addsym1(true, false, optarg);
318 case 'D': /* define a symbol */
319 addsym1(false, true, optarg);
321 case 'U': /* undef a symbol */
322 addsym1(false, false, optarg);
324 case 'I': /* no-op for compatibility with cpp */
326 case 'b': /* blank deleted lines instead of omitting them */
327 case 'l': /* backwards compatibility */
330 case 'B': /* compress blank lines around removed section */
333 case 'c': /* treat -D as -U and vice versa */
339 case 'e': /* fewer errors from dodgy lines */
342 case 'f': /* definitions file */
348 case 'K': /* keep ambiguous #ifs */
351 case 'k': /* process constant #ifs */
354 case 'm': /* modify in place */
357 case 'M': /* modify in place and keep backup */
359 if (strlen(optarg) > 0)
362 case 'n': /* add #line directive after deleted lines */
365 case 'o': /* output to a file */
368 case 's': /* only output list of symbols that control #ifs */
371 case 'S': /* list symbols with their nesting depth */
372 symlist = symdepth = true;
374 case 't': /* don't parse C comments */
381 exitmode = atoi(optarg);
382 if(exitmode < 0 || exitmode > 2)
390 if (compblank && lnblank)
391 errx(2, "-B and -b are mutually exclusive");
392 if (symlist && (ofilename != NULL || inplace || argc > 1))
393 errx(2, "-s only works with one input file");
394 if (argc > 1 && ofilename != NULL)
395 errx(2, "-o cannot be used with multiple input files");
396 if (argc > 1 && !inplace)
397 errx(2, "multiple input files require -m or -M");
398 if (argc == 0 && inplace)
399 errx(2, "-m requires an input file");
402 if (argc == 1 && !inplace && ofilename == NULL)
407 if (ofilename != NULL)
408 processinout(*argv, ofilename);
409 else while (argc-- > 0) {
410 processinout(*argv, *argv);
414 case(0): exit(exitstat);
415 case(1): exit(!exitstat);
417 default: abort(); /* bug */
425 processinout(const char *ifn, const char *ofn)
429 if (ifn == NULL || strcmp(ifn, "-") == 0) {
430 filename = "[stdin]";
436 input = fopen(ifn, "rb");
438 err(2, "can't open %s", ifn);
440 if (strcmp(ofn, "-") == 0) {
445 if (stat(ofn, &st) < 0) {
446 output = fopen(ofn, "wb");
448 err(2, "can't create %s", ofn);
453 tempname = astrcat(ofn, ".XXXXXX");
454 output = mktempmode(tempname, st.st_mode);
456 err(2, "can't create %s", tempname);
460 if (backext != NULL) {
461 char *backname = astrcat(ofn, backext);
462 if (rename(ofn, backname) < 0)
463 err(2, "can't rename \"%s\" to \"%s\"", ofn, backname);
466 /* leave file unmodified if unifdef made no changes */
467 if (!altered && backext == NULL) {
468 if (remove(tempname) < 0)
469 warn("can't remove \"%s\"", tempname);
470 } else if (rename(tempname, ofn) < 0)
471 err(2, "can't rename \"%s\" to \"%s\"", tempname, ofn);
477 * For cleaning up if there is an error.
482 if (tempname != NULL)
487 * Self-identification functions.
493 const char *c = copyright;
508 "usage: unifdef [-bBcdehKkmnsStV] [-x{012}] [-Mext] [-opath] \\\n"
509 " [-[i]Dsym[=val]] [-[i]Usym] [-fpath] ... [file] ...\n");
524 " -Dsym=val define preprocessor symbol with given value\n"
525 " -Dsym define preprocessor symbol with value 1\n"
526 " -Usym preprocessor symbol is undefined\n"
527 " -iDsym=val \\ ignore C strings and comments\n"
528 " -iDsym ) in sections controlled by these\n"
529 " -iUsym / preprocessor symbols\n"
530 " -fpath file containing #define and #undef directives\n"
531 " -b blank lines instead of deleting them\n"
532 " -B compress blank lines around deleted section\n"
533 " -c complement (invert) keep vs. delete\n"
534 " -d debugging mode\n"
535 " -e ignore multiline preprocessor directives\n"
537 " -Ipath extra include file path (ignored)\n"
538 " -K disable && and || short-circuiting\n"
539 " -k process constant #if expressions\n"
540 " -Mext modify in place and keep backups\n"
541 " -m modify input files in place\n"
542 " -n add #line directives to output\n"
543 " -opath output file name\n"
544 " -S list #if control symbols with nesting\n"
545 " -s list #if control symbols\n"
546 " -t ignore C strings and comments\n"
547 " -V print version\n"
548 " -x{012} exit status mode\n"
554 * A state transition function alters the global #if processing state
555 * in a particular way. The table below is indexed by the current
556 * processing state and the type of the current line.
558 * Nesting is handled by keeping a stack of states; some transition
559 * functions increase or decrease the depth. They also maintain the
560 * ignore state on a stack. In some complicated cases they have to
561 * alter the preprocessor directive, as follows.
563 * When we have processed a group that starts off with a known-false
564 * #if/#elif sequence (which has therefore been deleted) followed by a
565 * #elif that we don't understand and therefore must keep, we edit the
566 * latter into a #if to keep the nesting correct. We use memcpy() to
567 * overwrite the 4 byte token "elif" with "if " without a '\0' byte.
569 * When we find a true #elif in a group, the following block will
570 * always be kept and the rest of the sequence after the next #elif or
571 * #else will be discarded. We edit the #elif into a #else and the
572 * following directive to #endif since this has the desired behaviour.
574 * "Dodgy" directives are split across multiple lines, the most common
575 * example being a multi-line comment hanging off the right of the
576 * directive. We can handle them correctly only if there is no change
577 * from printing to dropping (or vice versa) caused by that directive.
578 * If the directive is the first of a group we have a choice between
579 * failing with an error, or passing it through unchanged instead of
580 * evaluating it. The latter is not the default to avoid questions from
581 * users about unifdef unexpectedly leaving behind preprocessor directives.
583 typedef void state_fn(void);
585 /* report an error */
586 static void Eelif (void) { error("Inappropriate #elif"); }
587 static void Eelse (void) { error("Inappropriate #else"); }
588 static void Eendif(void) { error("Inappropriate #endif"); }
589 static void Eeof (void) { error("Premature EOF"); }
590 static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
591 /* plain line handling */
592 static void print (void) { flushline(true); }
593 static void drop (void) { flushline(false); }
594 /* output lacks group's start line */
595 static void Strue (void) { drop(); ignoreoff(); state(IS_TRUE_PREFIX); }
596 static void Sfalse(void) { drop(); ignoreoff(); state(IS_FALSE_PREFIX); }
597 static void Selse (void) { drop(); state(IS_TRUE_ELSE); }
598 /* print/pass this block */
599 static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
600 static void Pelse (void) { print(); state(IS_PASS_ELSE); }
601 static void Pendif(void) { print(); unnest(); }
602 /* discard this block */
603 static void Dfalse(void) { drop(); ignoreoff(); state(IS_FALSE_TRAILER); }
604 static void Delif (void) { drop(); ignoreoff(); state(IS_FALSE_MIDDLE); }
605 static void Delse (void) { drop(); state(IS_FALSE_ELSE); }
606 static void Dendif(void) { drop(); unnest(); }
607 /* first line of group */
608 static void Fdrop (void) { nest(); Dfalse(); }
609 static void Fpass (void) { nest(); Pelif(); }
610 static void Ftrue (void) { nest(); Strue(); }
611 static void Ffalse(void) { nest(); Sfalse(); }
612 /* variable pedantry for obfuscated lines */
613 static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); }
614 static void Oif (void) { if (!iocccok) Eioccc(); Fpass(); }
615 static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); }
616 /* ignore comments in this block */
617 static void Idrop (void) { Fdrop(); ignoreon(); }
618 static void Itrue (void) { Ftrue(); ignoreon(); }
619 static void Ifalse(void) { Ffalse(); ignoreon(); }
620 /* modify this line */
621 static void Mpass (void) { memcpy(keyword, "if ", 4); Pelif(); }
622 static void Mtrue (void) { keywordedit("else"); state(IS_TRUE_MIDDLE); }
623 static void Melif (void) { keywordedit("endif"); state(IS_FALSE_TRAILER); }
624 static void Melse (void) { keywordedit("endif"); state(IS_FALSE_ELSE); }
626 static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
628 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
629 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eendif,
630 print, done, abort },
631 /* IS_FALSE_PREFIX */
632 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
633 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
636 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
637 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
638 print, Eeof, abort },
640 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
641 Oiffy, Oiffy, Fpass, Oif, Oif, Pelif, Oelif, Oelif, Pelse, Pendif,
642 print, Eeof, abort },
643 /* IS_FALSE_MIDDLE */
644 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
645 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
648 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
649 Oiffy, Oiffy, Fpass, Oif, Oif, Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
650 print, Eeof, abort },
652 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
653 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Pendif,
654 print, Eeof, abort },
656 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
657 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
660 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
661 Oiffy, Oiffy, Fpass, Oif, Oif, Eelif, Eelif, Eelif, Eelse, Eioccc,
662 print, Eeof, abort },
663 /* IS_FALSE_TRAILER */
664 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
665 Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
667 /*TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF
668 TRUEI FALSEI IF TRUE FALSE ELIF ELTRUE ELFALSE ELSE ENDIF (DODGY)
673 * State machine utility functions
680 ignoring[depth] = ignoring[depth-1];
685 ignoring[depth] = true;
688 keywordedit(const char *replacement)
690 snprintf(keyword, tline + sizeof(tline) - keyword,
691 "%s%s", replacement, newline);
698 if (depth > MAXDEPTH-1)
700 if (depth == MAXDEPTH-1)
701 error("Too many levels of nesting");
703 stifline[depth] = linenum;
719 * The last state transition function. When this is called,
720 * lineval == LT_EOF, so the process() loop will terminate.
726 error("EOF in comment");
731 * Write a line to the output or not, according to command line options.
732 * If writing fails, closeio() will print the error and exit.
739 if (keep ^ complement) {
740 bool blankline = tline[strspn(tline, " \t\r\n")] == '\0';
741 if (blankline && compblank && blankcount != blankmax) {
745 if (lnnum && delcount > 0)
747 if (fputs(tline, output) == EOF)
750 blankmax = blankcount = blankline ? blankcount + 1 : 0;
753 if (lnblank && fputs(newline, output) == EOF)
759 if (debugging && fflush(output) == EOF)
764 * Format of #line directives depends on whether we know the input filename.
771 if (linefile == NULL)
772 e = fprintf(output, "#line %d%s", linenum, newline);
774 e = fprintf(output, "#line %d \"%s\"%s",
775 linenum, linefile, newline);
781 * Flush the output and handle errors.
786 /* Tidy up after findsym(). */
787 if (symdepth && !zerosyms)
789 if (output != NULL && (ferror(output) || fclose(output) == EOF))
790 err(2, "%s: can't write to output", filename);
795 * The driver for the state machine.
800 Linetype lineval = LT_PLAIN;
801 /* When compressing blank lines, act as if the file
802 is preceded by a large number of blank lines. */
803 blankmax = blankcount = 1000;
808 while (lineval != LT_EOF) {
809 lineval = parseline();
810 trans_table[ifstate[depth]][lineval]();
811 debug("process line %d %s -> %s depth %d",
812 linenum, linetype_name[lineval],
813 ifstate_name[ifstate[depth]], depth);
819 * Parse a line and determine its type. We keep the preprocessor line
820 * parser state between calls in the global variable linestate, with
821 * help from skipcomment().
827 struct macro *cursym;
829 Comment_state wascomment;
831 wascomment = incomment;
835 if (newline == NULL) {
836 if (strrchr(tline, '\n') == strrchr(tline, '\r') + 1)
837 newline = newline_crlf;
839 newline = newline_unix;
845 keyword = tline + (cp - tline);
846 if ((cp = matchsym("ifdef", keyword)) != NULL ||
847 (cp = matchsym("ifndef", keyword)) != NULL) {
848 cp = skipcomment(cp);
849 if ((cursym = findsym(&cp)) == NULL)
852 retval = (keyword[2] == 'n')
853 ? LT_FALSE : LT_TRUE;
854 if (cursym->value == NULL)
855 retval = (retval == LT_TRUE)
856 ? LT_FALSE : LT_TRUE;
858 retval = (retval == LT_TRUE)
859 ? LT_TRUEI : LT_FALSEI;
861 } else if ((cp = matchsym("if", keyword)) != NULL)
862 retval = ifeval(&cp);
863 else if ((cp = matchsym("elif", keyword)) != NULL)
864 retval = linetype_if2elif(ifeval(&cp));
865 else if ((cp = matchsym("else", keyword)) != NULL)
867 else if ((cp = matchsym("endif", keyword)) != NULL)
870 cp = skipsym(keyword);
871 /* no way can we deal with a continuation inside a keyword */
872 if (strncmp(cp, "\\\r\n", 3) == 0 ||
873 strncmp(cp, "\\\n", 2) == 0)
879 cp = skipcomment(cp);
882 if (retval == LT_TRUE || retval == LT_FALSE ||
883 retval == LT_TRUEI || retval == LT_FALSEI)
885 if (retval == LT_ELTRUE || retval == LT_ELFALSE)
888 /* the following can happen if the last line of the file lacks a
889 newline or if there is too much whitespace in a directive */
890 if (linestate == LS_HASH) {
891 long len = cp - tline;
892 if (fgets(tline + len, MAXLINE - len, input) == NULL) {
894 err(2, "can't read %s", filename);
895 /* append the missing newline at eof */
896 strcpy(tline + len, newline);
897 cp += strlen(newline);
898 linestate = LS_START;
900 linestate = LS_DIRTY;
903 if (retval != LT_PLAIN && (wascomment || linestate != LS_START)) {
904 retval = linetype_2dodgy(retval);
905 linestate = LS_DIRTY;
908 debug("parser line %d state %s comment %s line", linenum,
909 comment_name[incomment], linestate_name[linestate]);
914 * These are the binary operators that are supported by the expression
917 static Linetype op_strict(long *p, long v, Linetype at, Linetype bt) {
918 if(at == LT_IF || bt == LT_IF) return (LT_IF);
919 return (*p = v, v ? LT_TRUE : LT_FALSE);
921 static Linetype op_lt(long *p, Linetype at, long a, Linetype bt, long b) {
922 return op_strict(p, a < b, at, bt);
924 static Linetype op_gt(long *p, Linetype at, long a, Linetype bt, long b) {
925 return op_strict(p, a > b, at, bt);
927 static Linetype op_le(long *p, Linetype at, long a, Linetype bt, long b) {
928 return op_strict(p, a <= b, at, bt);
930 static Linetype op_ge(long *p, Linetype at, long a, Linetype bt, long b) {
931 return op_strict(p, a >= b, at, bt);
933 static Linetype op_eq(long *p, Linetype at, long a, Linetype bt, long b) {
934 return op_strict(p, a == b, at, bt);
936 static Linetype op_ne(long *p, Linetype at, long a, Linetype bt, long b) {
937 return op_strict(p, a != b, at, bt);
939 static Linetype op_or(long *p, Linetype at, long a, Linetype bt, long b) {
940 if (!strictlogic && (at == LT_TRUE || bt == LT_TRUE))
941 return (*p = 1, LT_TRUE);
942 return op_strict(p, a || b, at, bt);
944 static Linetype op_and(long *p, Linetype at, long a, Linetype bt, long b) {
945 if (!strictlogic && (at == LT_FALSE || bt == LT_FALSE))
946 return (*p = 0, LT_FALSE);
947 return op_strict(p, a && b, at, bt);
949 static Linetype op_blsh(long *p, Linetype at, long a, Linetype bt, long b) {
950 return op_strict(p, a << b, at, bt);
952 static Linetype op_brsh(long *p, Linetype at, long a, Linetype bt, long b) {
953 return op_strict(p, a >> b, at, bt);
955 static Linetype op_add(long *p, Linetype at, long a, Linetype bt, long b) {
956 return op_strict(p, a + b, at, bt);
958 static Linetype op_sub(long *p, Linetype at, long a, Linetype bt, long b) {
959 return op_strict(p, a - b, at, bt);
961 static Linetype op_mul(long *p, Linetype at, long a, Linetype bt, long b) {
962 return op_strict(p, a * b, at, bt);
964 static Linetype op_div(long *p, Linetype at, long a, Linetype bt, long b) {
966 debug("eval division by zero");
969 return op_strict(p, a / b, at, bt);
971 static Linetype op_mod(long *p, Linetype at, long a, Linetype bt, long b) {
972 return op_strict(p, a % b, at, bt);
974 static Linetype op_bor(long *p, Linetype at, long a, Linetype bt, long b) {
975 return op_strict(p, a | b, at, bt);
977 static Linetype op_bxor(long *p, Linetype at, long a, Linetype bt, long b) {
978 return op_strict(p, a ^ b, at, bt);
980 static Linetype op_band(long *p, Linetype at, long a, Linetype bt, long b) {
981 return op_strict(p, a & b, at, bt);
985 * An evaluation function takes three arguments, as follows: (1) a pointer to
986 * an element of the precedence table which lists the operators at the current
987 * level of precedence; (2) a pointer to an integer which will receive the
988 * value of the expression; and (3) a pointer to a char* that points to the
989 * expression to be evaluated and that is updated to the end of the expression
990 * when evaluation is complete. The function returns LT_FALSE if the value of
991 * the expression is zero, LT_TRUE if it is non-zero, LT_IF if the expression
992 * depends on an unknown symbol, or LT_ERROR if there is a parse failure.
996 typedef Linetype eval_fn(const struct ops *, long *, const char **);
998 static eval_fn eval_table, eval_unary;
1001 * The precedence table. Expressions involving binary operators are evaluated
1002 * in a table-driven way by eval_table. When it evaluates a subexpression it
1003 * calls the inner function with its first argument pointing to the next
1004 * element of the table. Innermost expressions have special non-table-driven
1007 * The stop characters help with lexical analysis: an operator is not
1008 * recognized if it is followed by one of the stop characters because
1009 * that would make it a different operator.
1013 Linetype (*fn)(long *, Linetype, long, Linetype, long);
1020 static const struct ops eval_ops[] = {
1021 { eval_table, { { "||", op_or, NULL } } },
1022 { eval_table, { { "&&", op_and, NULL } } },
1023 { eval_table, { { "|", op_bor, "|" } } },
1024 { eval_table, { { "^", op_bxor, NULL } } },
1025 { eval_table, { { "&", op_band, "&" } } },
1026 { eval_table, { { "==", op_eq, NULL },
1027 { "!=", op_ne, NULL } } },
1028 { eval_table, { { "<=", op_le, NULL },
1029 { ">=", op_ge, NULL },
1030 { "<", op_lt, "<=" },
1031 { ">", op_gt, ">=" } } },
1032 { eval_table, { { "<<", op_blsh, NULL },
1033 { ">>", op_brsh, NULL } } },
1034 { eval_table, { { "+", op_add, NULL },
1035 { "-", op_sub, NULL } } },
1036 { eval_unary, { { "*", op_mul, NULL },
1037 { "/", op_div, NULL },
1038 { "%", op_mod, NULL } } },
1041 /* Current operator precedence level */
1042 static long prec(const struct ops *ops)
1044 return (ops - eval_ops);
1048 * Function for evaluating the innermost parts of expressions,
1049 * viz. !expr (expr) number defined(symbol) symbol
1050 * We reset the constexpr flag in the last two cases.
1053 eval_unary(const struct ops *ops, long *valp, const char **cpp)
1061 cp = skipcomment(*cpp);
1063 debug("eval%d !", prec(ops));
1065 lt = eval_unary(ops, valp, &cp);
1070 lt = *valp ? LT_TRUE : LT_FALSE;
1072 } else if (*cp == '~') {
1073 debug("eval%d ~", prec(ops));
1075 lt = eval_unary(ops, valp, &cp);
1080 lt = *valp ? LT_TRUE : LT_FALSE;
1082 } else if (*cp == '-') {
1083 debug("eval%d -", prec(ops));
1085 lt = eval_unary(ops, valp, &cp);
1090 lt = *valp ? LT_TRUE : LT_FALSE;
1092 } else if (*cp == '(') {
1094 debug("eval%d (", prec(ops));
1095 lt = eval_table(eval_ops, valp, &cp);
1098 cp = skipcomment(cp);
1101 } else if (isdigit((unsigned char)*cp)) {
1102 debug("eval%d number", prec(ops));
1103 *valp = strtol(cp, &ep, 0);
1106 lt = *valp ? LT_TRUE : LT_FALSE;
1108 } else if (matchsym("defined", cp) != NULL) {
1109 cp = skipcomment(cp+7);
1111 cp = skipcomment(cp+1);
1117 cp = skipcomment(cp);
1118 if (defparen && *cp++ != ')') {
1119 debug("eval%d defined missing ')'", prec(ops));
1123 debug("eval%d defined unknown", prec(ops));
1126 debug("eval%d defined %s", prec(ops), sym->name);
1127 *valp = (sym->value != NULL);
1128 lt = *valp ? LT_TRUE : LT_FALSE;
1131 } else if (!endsym(*cp)) {
1132 debug("eval%d symbol", prec(ops));
1137 } else if (sym->value == NULL) {
1141 *valp = strtol(sym->value, &ep, 0);
1142 if (*ep != '\0' || ep == sym->value)
1144 lt = *valp ? LT_TRUE : LT_FALSE;
1149 debug("eval%d bad expr", prec(ops));
1154 debug("eval%d = %d", prec(ops), *valp);
1159 * Table-driven evaluation of binary operators.
1162 eval_table(const struct ops *ops, long *valp, const char **cpp)
1164 const struct op *op;
1169 debug("eval%d", prec(ops));
1171 lt = ops->inner(ops+1, valp, &cp);
1175 cp = skipcomment(cp);
1176 for (op = ops->op; op->str != NULL; op++) {
1177 if (strncmp(cp, op->str, strlen(op->str)) == 0) {
1178 /* assume only one-char operators have stop chars */
1179 if (op->stop != NULL && cp[1] != '\0' &&
1180 strchr(op->stop, cp[1]) != NULL)
1186 if (op->str == NULL)
1188 cp += strlen(op->str);
1189 debug("eval%d %s", prec(ops), op->str);
1190 rt = ops->inner(ops+1, &val, &cp);
1193 lt = op->fn(valp, lt, *valp, rt, val);
1197 debug("eval%d = %d", prec(ops), *valp);
1198 debug("eval%d lt = %s", prec(ops), linetype_name[lt]);
1203 * Evaluate the expression on a #if or #elif line. If we can work out
1204 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
1205 * return just a generic LT_IF.
1208 ifeval(const char **cpp)
1213 debug("eval %s", *cpp);
1214 constexpr = killconsts ? false : true;
1215 ret = eval_table(eval_ops, &val, cpp);
1216 debug("eval = %d", val);
1217 return (constexpr ? LT_IF : ret == LT_ERROR ? LT_IF : ret);
1221 * Read a line and examine its initial part to determine if it is a
1222 * preprocessor directive. Returns NULL on EOF, or a pointer to a
1223 * preprocessor directive name, or a pointer to the zero byte at the
1232 if (fgets(tline, MAXLINE, input) == NULL) {
1234 err(2, "can't read %s", filename);
1238 cp = skipcomment(tline);
1239 if (linestate == LS_START && *cp == '#') {
1240 linestate = LS_HASH;
1241 return (skipcomment(cp + 1));
1242 } else if (*cp == '\0') {
1245 return (skipline(cp));
1250 * Mark a line dirty and consume the rest of it, keeping track of the
1254 skipline(const char *cp)
1258 linestate = LS_DIRTY;
1259 while (*cp != '\0') {
1260 cp = skipcomment(pcp = cp);
1268 * Skip over comments, strings, and character literals and stop at the
1269 * next character position that is not whitespace. Between calls we keep
1270 * the comment state in the global variable incomment, and we also adjust
1271 * the global variable linestate when we see a newline.
1272 * XXX: doesn't cope with the buffer splitting inside a state transition.
1275 skipcomment(const char *cp)
1277 if (text || ignoring[depth]) {
1278 for (; isspace((unsigned char)*cp); cp++)
1280 linestate = LS_START;
1284 /* don't reset to LS_START after a line continuation */
1285 if (strncmp(cp, "\\\r\n", 3) == 0)
1287 else if (strncmp(cp, "\\\n", 2) == 0)
1289 else switch (incomment) {
1291 if (strncmp(cp, "/\\\r\n", 4) == 0) {
1292 incomment = STARTING_COMMENT;
1294 } else if (strncmp(cp, "/\\\n", 3) == 0) {
1295 incomment = STARTING_COMMENT;
1297 } else if (strncmp(cp, "/*", 2) == 0) {
1298 incomment = C_COMMENT;
1300 } else if (strncmp(cp, "//", 2) == 0) {
1301 incomment = CXX_COMMENT;
1303 } else if (strncmp(cp, "\'", 1) == 0) {
1304 incomment = CHAR_LITERAL;
1305 linestate = LS_DIRTY;
1307 } else if (strncmp(cp, "\"", 1) == 0) {
1308 incomment = STRING_LITERAL;
1309 linestate = LS_DIRTY;
1311 } else if (strncmp(cp, "\n", 1) == 0) {
1312 linestate = LS_START;
1314 } else if (strchr(" \r\t", *cp) != NULL) {
1320 if (strncmp(cp, "\n", 1) == 0) {
1321 incomment = NO_COMMENT;
1322 linestate = LS_START;
1327 case STRING_LITERAL:
1328 if ((incomment == CHAR_LITERAL && cp[0] == '\'') ||
1329 (incomment == STRING_LITERAL && cp[0] == '\"')) {
1330 incomment = NO_COMMENT;
1332 } else if (cp[0] == '\\') {
1337 } else if (strncmp(cp, "\n", 1) == 0) {
1338 if (incomment == CHAR_LITERAL)
1339 error("Unterminated char literal");
1341 error("Unterminated string literal");
1346 if (strncmp(cp, "*\\\r\n", 4) == 0) {
1347 incomment = FINISHING_COMMENT;
1349 } else if (strncmp(cp, "*\\\n", 3) == 0) {
1350 incomment = FINISHING_COMMENT;
1352 } else if (strncmp(cp, "*/", 2) == 0) {
1353 incomment = NO_COMMENT;
1358 case STARTING_COMMENT:
1360 incomment = C_COMMENT;
1362 } else if (*cp == '/') {
1363 incomment = CXX_COMMENT;
1366 incomment = NO_COMMENT;
1367 linestate = LS_DIRTY;
1370 case FINISHING_COMMENT:
1372 incomment = NO_COMMENT;
1375 incomment = C_COMMENT;
1384 * Skip macro arguments.
1387 skipargs(const char *cp)
1389 const char *ocp = cp;
1391 cp = skipcomment(cp);
1399 cp = skipcomment(cp+1);
1400 } while (level != 0 && *cp != '\0');
1404 /* Rewind and re-detect the syntax error later. */
1409 * Skip over an identifier.
1412 skipsym(const char *cp)
1414 while (!endsym(*cp))
1420 * Skip whitespace and take a copy of any following identifier.
1423 getsym(const char **cpp)
1425 const char *cp = *cpp, *sym;
1427 cp = skipcomment(cp);
1428 cp = skipsym(sym = cp);
1432 return (xstrdup(sym, cp));
1436 * Check that s (a symbol) matches the start of t, and that the
1437 * following character in t is not a symbol character. Returns a
1438 * pointer to the following character in t if there is a match,
1442 matchsym(const char *s, const char *t)
1444 while (*s != '\0' && *t != '\0')
1449 if (*s == '\0' && endsym(*t))
1456 * Look for the symbol in the symbol table. If it is found, we return
1457 * the symbol table index, else we return -1.
1459 static struct macro *
1460 findsym(const char **strp)
1464 struct macro key, *res;
1467 *strp = skipsym(str);
1471 if (symdepth && firstsym)
1472 printf("%s%3d", zerosyms ? "" : "\n", depth);
1473 firstsym = zerosyms = false;
1475 symdepth ? " " : "",
1476 (int)(*strp-str), str,
1477 symdepth ? "" : "\n");
1478 /* we don't care about the value of the symbol */
1483 * 'str' just points into the current mid-parse input and is not
1484 * nul-terminated. We know the length of the symbol, *strp - str, but
1485 * need to provide a nul-terminated lookup key for RB_FIND's comparison
1486 * function. Create one here.
1488 strkey = malloc(*strp - str + 1);
1489 memcpy(strkey, str, *strp - str);
1490 strkey[*strp - str] = 0;
1493 res = RB_FIND(MACROMAP, ¯o_tree, &key);
1495 debugsym("findsym", res);
1502 * Resolve indirect symbol values to their final definitions.
1509 struct macro *sym, *ind;
1513 RB_FOREACH(sym, MACROMAP, ¯o_tree) {
1514 if (sym->value == NULL)
1518 if (ind == NULL || ind == sym ||
1520 ind->value == NULL ||
1521 ind->value == sym->value)
1523 debugsym("indir...", sym);
1524 sym->value = ind->value;
1525 debugsym("...ectsym", sym);
1532 * Add a symbol to the symbol table, specified with the format sym=val
1535 addsym1(bool ignorethis, bool definethis, char *symval)
1537 const char *sym, *val;
1541 if (definethis && *val == '=') {
1542 symval[val - sym] = '\0';
1544 } else if (*val == '\0') {
1545 val = definethis ? "1" : NULL;
1549 addsym2(ignorethis, sym, val);
1553 * Add a symbol to the symbol table.
1556 addsym2(bool ignorethis, const char *symname, const char *val)
1558 const char *cp = symname;
1559 struct macro *sym, *r;
1563 sym = calloc(1, sizeof(*sym));
1564 sym->ignore = ignorethis;
1565 sym->name = symname;
1567 r = RB_INSERT(MACROMAP, ¯o_tree, sym);
1570 debugsym("addsym", sym);
1574 debugsym(const char *why, const struct macro *sym)
1576 debug("%s %s%c%s", why, sym->name,
1577 sym->value ? '=' : ' ',
1578 sym->value ? sym->value : "undef");
1582 * Add symbols to the symbol table from a file containing
1583 * #define and #undef preprocessor directives.
1586 defundefile(const char *fn)
1589 input = fopen(fn, "rb");
1591 err(2, "can't open %s", fn);
1596 err(2, "can't read %s", filename);
1600 error("EOF in comment");
1604 * Read and process one #define or #undef directive
1609 const char *cp, *kw, *sym, *val, *end;
1616 /* strip trailing whitespace, and do a fairly rough check to
1617 avoid unsupported multi-line preprocessor directives */
1618 end = cp + strlen(cp);
1619 while (end > tline && strchr(" \t\n\r", end[-1]) != NULL)
1621 if (end > tline && end[-1] == '\\')
1625 if ((cp = matchsym("define", kw)) != NULL) {
1628 error("Missing macro name in #define");
1632 cp = skipcomment(cp);
1633 val = (cp < end) ? xstrdup(cp, end) : "";
1636 addsym2(false, sym, val);
1637 } else if ((cp = matchsym("undef", kw)) != NULL) {
1640 error("Missing macro name in #undef");
1641 cp = skipcomment(cp);
1643 addsym2(false, sym, NULL);
1645 error("Unrecognized preprocessor directive");
1649 debug("parser line %d state %s comment %s line", linenum,
1650 comment_name[incomment], linestate_name[linestate]);
1655 * Concatenate two strings into new memory, checking for failure.
1658 astrcat(const char *s1, const char *s2)
1664 len = snprintf(NULL, 0, "%s%s", s1, s2);
1667 size = (size_t)len + 1;
1668 s = (char *)malloc(size);
1671 snprintf(s, size, "%s%s", s1, s2);
1676 * Duplicate a segment of a string, checking for failure.
1679 xstrdup(const char *start, const char *end)
1684 if (end < start) abort(); /* bug */
1685 n = (size_t)(end - start) + 1;
1689 snprintf(s, n, "%s", start);
1697 debug(const char *msg, ...)
1709 error(const char *msg)
1712 warnx("%s: %d: %s", filename, linenum, msg);
1714 warnx("%s: %d: %s (#if line %d depth %d)",
1715 filename, linenum, msg, stifline[depth], depth);
1717 errx(2, "Output may be truncated");