Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2002 - 2011 Tony Finch <dot@dotat.at>
0003  *
0004  * Redistribution and use in source and binary forms, with or without
0005  * modification, are permitted provided that the following conditions
0006  * are met:
0007  * 1. Redistributions of source code must retain the above copyright
0008  *    notice, this list of conditions and the following disclaimer.
0009  * 2. Redistributions in binary form must reproduce the above copyright
0010  *    notice, this list of conditions and the following disclaimer in the
0011  *    documentation and/or other materials provided with the distribution.
0012  *
0013  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
0014  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
0015  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
0016  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
0017  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
0018  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
0019  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
0020  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
0021  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
0022  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
0023  * SUCH DAMAGE.
0024  */
0025 
0026 /*
0027  * unifdef - remove ifdef'ed lines
0028  *
0029  * This code was derived from software contributed to Berkeley by Dave Yost.
0030  * It was rewritten to support ANSI C by Tony Finch. The original version
0031  * of unifdef carried the 4-clause BSD copyright licence. None of its code
0032  * remains in this version (though some of the names remain) so it now
0033  * carries a more liberal licence.
0034  *
0035  *  Wishlist:
0036  *      provide an option which will append the name of the
0037  *        appropriate symbol after #else's and #endif's
0038  *      provide an option which will check symbols after
0039  *        #else's and #endif's to see that they match their
0040  *        corresponding #ifdef or #ifndef
0041  *
0042  *   These require better buffer handling, which would also make
0043  *   it possible to handle all "dodgy" directives correctly.
0044  */
0045 
0046 #include <sys/types.h>
0047 #include <sys/stat.h>
0048 
0049 #include <ctype.h>
0050 #include <err.h>
0051 #include <errno.h>
0052 #include <stdarg.h>
0053 #include <stdbool.h>
0054 #include <stdio.h>
0055 #include <stdlib.h>
0056 #include <string.h>
0057 #include <unistd.h>
0058 
0059 const char copyright[] =
0060     "@(#) $Version: unifdef-2.5 $\n"
0061     "@(#) $Author: Tony Finch (dot@dotat.at) $\n"
0062     "@(#) $URL: http://dotat.at/prog/unifdef $\n"
0063 ;
0064 
0065 /* types of input lines: */
0066 typedef enum {
0067     LT_TRUEI,       /* a true #if with ignore flag */
0068     LT_FALSEI,      /* a false #if with ignore flag */
0069     LT_IF,          /* an unknown #if */
0070     LT_TRUE,        /* a true #if */
0071     LT_FALSE,       /* a false #if */
0072     LT_ELIF,        /* an unknown #elif */
0073     LT_ELTRUE,      /* a true #elif */
0074     LT_ELFALSE,     /* a false #elif */
0075     LT_ELSE,        /* #else */
0076     LT_ENDIF,       /* #endif */
0077     LT_DODGY,       /* flag: directive is not on one line */
0078     LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
0079     LT_PLAIN,       /* ordinary line */
0080     LT_EOF,         /* end of file */
0081     LT_ERROR,       /* unevaluable #if */
0082     LT_COUNT
0083 } Linetype;
0084 
0085 static char const * const linetype_name[] = {
0086     "TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
0087     "ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
0088     "DODGY TRUEI", "DODGY FALSEI",
0089     "DODGY IF", "DODGY TRUE", "DODGY FALSE",
0090     "DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
0091     "DODGY ELSE", "DODGY ENDIF",
0092     "PLAIN", "EOF", "ERROR"
0093 };
0094 
0095 /* state of #if processing */
0096 typedef enum {
0097     IS_OUTSIDE,
0098     IS_FALSE_PREFIX,    /* false #if followed by false #elifs */
0099     IS_TRUE_PREFIX,     /* first non-false #(el)if is true */
0100     IS_PASS_MIDDLE,     /* first non-false #(el)if is unknown */
0101     IS_FALSE_MIDDLE,    /* a false #elif after a pass state */
0102     IS_TRUE_MIDDLE,     /* a true #elif after a pass state */
0103     IS_PASS_ELSE,       /* an else after a pass state */
0104     IS_FALSE_ELSE,      /* an else after a true state */
0105     IS_TRUE_ELSE,       /* an else after only false states */
0106     IS_FALSE_TRAILER,   /* #elifs after a true are false */
0107     IS_COUNT
0108 } Ifstate;
0109 
0110 static char const * const ifstate_name[] = {
0111     "OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
0112     "PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
0113     "PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
0114     "FALSE_TRAILER"
0115 };
0116 
0117 /* state of comment parser */
0118 typedef enum {
0119     NO_COMMENT = false, /* outside a comment */
0120     C_COMMENT,      /* in a comment like this one */
0121     CXX_COMMENT,        /* between // and end of line */
0122     STARTING_COMMENT,   /* just after slash-backslash-newline */
0123     FINISHING_COMMENT,  /* star-backslash-newline in a C comment */
0124     CHAR_LITERAL,       /* inside '' */
0125     STRING_LITERAL      /* inside "" */
0126 } Comment_state;
0127 
0128 static char const * const comment_name[] = {
0129     "NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING"
0130 };
0131 
0132 /* state of preprocessor line parser */
0133 typedef enum {
0134     LS_START,       /* only space and comments on this line */
0135     LS_HASH,        /* only space, comments, and a hash */
0136     LS_DIRTY        /* this line can't be a preprocessor line */
0137 } Line_state;
0138 
0139 static char const * const linestate_name[] = {
0140     "START", "HASH", "DIRTY"
0141 };
0142 
0143 /*
0144  * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
0145  */
0146 #define MAXDEPTH        64          /* maximum #if nesting */
0147 #define MAXLINE         4096            /* maximum length of line */
0148 #define MAXSYMS         4096            /* maximum number of symbols */
0149 
0150 /*
0151  * Sometimes when editing a keyword the replacement text is longer, so
0152  * we leave some space at the end of the tline buffer to accommodate this.
0153  */
0154 #define EDITSLOP        10
0155 
0156 /*
0157  * For temporary filenames
0158  */
0159 #define TEMPLATE        "unifdef.XXXXXX"
0160 
0161 /*
0162  * Globals.
0163  */
0164 
0165 static bool             compblank;      /* -B: compress blank lines */
0166 static bool             lnblank;        /* -b: blank deleted lines */
0167 static bool             complement;     /* -c: do the complement */
0168 static bool             debugging;      /* -d: debugging reports */
0169 static bool             iocccok;        /* -e: fewer IOCCC errors */
0170 static bool             strictlogic;        /* -K: keep ambiguous #ifs */
0171 static bool             killconsts;     /* -k: eval constant #ifs */
0172 static bool             lnnum;          /* -n: add #line directives */
0173 static bool             symlist;        /* -s: output symbol list */
0174 static bool             symdepth;       /* -S: output symbol depth */
0175 static bool             text;           /* -t: this is a text file */
0176 
0177 static const char      *symname[MAXSYMS];   /* symbol name */
0178 static const char      *value[MAXSYMS];     /* -Dsym=value */
0179 static bool             ignore[MAXSYMS];    /* -iDsym or -iUsym */
0180 static int              nsyms;          /* number of symbols */
0181 
0182 static FILE            *input;          /* input file pointer */
0183 static const char      *filename;       /* input file name */
0184 static int              linenum;        /* current line number */
0185 static FILE            *output;         /* output file pointer */
0186 static const char      *ofilename;      /* output file name */
0187 static bool             overwriting;        /* output overwrites input */
0188 static char             tempname[FILENAME_MAX]; /* used when overwriting */
0189 
0190 static char             tline[MAXLINE+EDITSLOP];/* input buffer plus space */
0191 static char            *keyword;        /* used for editing #elif's */
0192 
0193 static const char      *newline;        /* input file format */
0194 static const char       newline_unix[] = "\n";
0195 static const char       newline_crlf[] = "\r\n";
0196 
0197 static Comment_state    incomment;      /* comment parser state */
0198 static Line_state       linestate;      /* #if line parser state */
0199 static Ifstate          ifstate[MAXDEPTH];  /* #if processor state */
0200 static bool             ignoring[MAXDEPTH]; /* ignore comments state */
0201 static int              stifline[MAXDEPTH]; /* start of current #if */
0202 static int              depth;          /* current #if nesting */
0203 static int              delcount;       /* count of deleted lines */
0204 static unsigned         blankcount;     /* count of blank lines */
0205 static unsigned         blankmax;       /* maximum recent blankcount */
0206 static bool             constexpr;      /* constant #if expression */
0207 static bool             zerosyms = true;    /* to format symdepth output */
0208 static bool             firstsym;       /* ditto */
0209 
0210 static int              exitstat;       /* program exit status */
0211 
0212 static void             addsym(bool, bool, char *);
0213 static void             closeout(void);
0214 static void             debug(const char *, ...);
0215 static void             done(void);
0216 static void             error(const char *);
0217 static int              findsym(const char *);
0218 static void             flushline(bool);
0219 static Linetype         parseline(void);
0220 static Linetype         ifeval(const char **);
0221 static void             ignoreoff(void);
0222 static void             ignoreon(void);
0223 static void             keywordedit(const char *);
0224 static void             nest(void);
0225 static void             process(void);
0226 static const char      *skipargs(const char *);
0227 static const char      *skipcomment(const char *);
0228 static const char      *skipsym(const char *);
0229 static void             state(Ifstate);
0230 static int              strlcmp(const char *, const char *, size_t);
0231 static void             unnest(void);
0232 static void             usage(void);
0233 static void             version(void);
0234 
0235 #define endsym(c) (!isalnum((unsigned char)c) && c != '_')
0236 
0237 /*
0238  * The main program.
0239  */
0240 int
0241 main(int argc, char *argv[])
0242 {
0243     int opt;
0244 
0245     while ((opt = getopt(argc, argv, "i:D:U:I:o:bBcdeKklnsStV")) != -1)
0246         switch (opt) {
0247         case 'i': /* treat stuff controlled by these symbols as text */
0248             /*
0249              * For strict backwards-compatibility the U or D
0250              * should be immediately after the -i but it doesn't
0251              * matter much if we relax that requirement.
0252              */
0253             opt = *optarg++;
0254             if (opt == 'D')
0255                 addsym(true, true, optarg);
0256             else if (opt == 'U')
0257                 addsym(true, false, optarg);
0258             else
0259                 usage();
0260             break;
0261         case 'D': /* define a symbol */
0262             addsym(false, true, optarg);
0263             break;
0264         case 'U': /* undef a symbol */
0265             addsym(false, false, optarg);
0266             break;
0267         case 'I': /* no-op for compatibility with cpp */
0268             break;
0269         case 'b': /* blank deleted lines instead of omitting them */
0270         case 'l': /* backwards compatibility */
0271             lnblank = true;
0272             break;
0273         case 'B': /* compress blank lines around removed section */
0274             compblank = true;
0275             break;
0276         case 'c': /* treat -D as -U and vice versa */
0277             complement = true;
0278             break;
0279         case 'd':
0280             debugging = true;
0281             break;
0282         case 'e': /* fewer errors from dodgy lines */
0283             iocccok = true;
0284             break;
0285         case 'K': /* keep ambiguous #ifs */
0286             strictlogic = true;
0287             break;
0288         case 'k': /* process constant #ifs */
0289             killconsts = true;
0290             break;
0291         case 'n': /* add #line directive after deleted lines */
0292             lnnum = true;
0293             break;
0294         case 'o': /* output to a file */
0295             ofilename = optarg;
0296             break;
0297         case 's': /* only output list of symbols that control #ifs */
0298             symlist = true;
0299             break;
0300         case 'S': /* list symbols with their nesting depth */
0301             symlist = symdepth = true;
0302             break;
0303         case 't': /* don't parse C comments */
0304             text = true;
0305             break;
0306         case 'V': /* print version */
0307             version();
0308         default:
0309             usage();
0310         }
0311     argc -= optind;
0312     argv += optind;
0313     if (compblank && lnblank)
0314         errx(2, "-B and -b are mutually exclusive");
0315     if (argc > 1) {
0316         errx(2, "can only do one file");
0317     } else if (argc == 1 && strcmp(*argv, "-") != 0) {
0318         filename = *argv;
0319         input = fopen(filename, "rb");
0320         if (input == NULL)
0321             err(2, "can't open %s", filename);
0322     } else {
0323         filename = "[stdin]";
0324         input = stdin;
0325     }
0326     if (ofilename == NULL) {
0327         ofilename = "[stdout]";
0328         output = stdout;
0329     } else {
0330         struct stat ist, ost;
0331         if (stat(ofilename, &ost) == 0 &&
0332             fstat(fileno(input), &ist) == 0)
0333             overwriting = (ist.st_dev == ost.st_dev
0334                     && ist.st_ino == ost.st_ino);
0335         if (overwriting) {
0336             const char *dirsep;
0337             int ofd;
0338 
0339             dirsep = strrchr(ofilename, '/');
0340             if (dirsep != NULL)
0341                 snprintf(tempname, sizeof(tempname),
0342                     "%.*s/" TEMPLATE,
0343                     (int)(dirsep - ofilename), ofilename);
0344             else
0345                 snprintf(tempname, sizeof(tempname),
0346                     TEMPLATE);
0347             ofd = mkstemp(tempname);
0348             if (ofd != -1)
0349                 output = fdopen(ofd, "wb+");
0350             if (output == NULL)
0351                 err(2, "can't create temporary file");
0352             fchmod(ofd, ist.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO));
0353         } else {
0354             output = fopen(ofilename, "wb");
0355             if (output == NULL)
0356                 err(2, "can't open %s", ofilename);
0357         }
0358     }
0359     process();
0360     abort(); /* bug */
0361 }
0362 
0363 static void
0364 version(void)
0365 {
0366     const char *c = copyright;
0367     for (;;) {
0368         while (*++c != '$')
0369             if (*c == '\0')
0370                 exit(0);
0371         while (*++c != '$')
0372             putc(*c, stderr);
0373         putc('\n', stderr);
0374     }
0375 }
0376 
0377 static void
0378 usage(void)
0379 {
0380     fprintf(stderr, "usage: unifdef [-bBcdeKknsStV] [-Ipath]"
0381         " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
0382     exit(2);
0383 }
0384 
0385 /*
0386  * A state transition function alters the global #if processing state
0387  * in a particular way. The table below is indexed by the current
0388  * processing state and the type of the current line.
0389  *
0390  * Nesting is handled by keeping a stack of states; some transition
0391  * functions increase or decrease the depth. They also maintain the
0392  * ignore state on a stack. In some complicated cases they have to
0393  * alter the preprocessor directive, as follows.
0394  *
0395  * When we have processed a group that starts off with a known-false
0396  * #if/#elif sequence (which has therefore been deleted) followed by a
0397  * #elif that we don't understand and therefore must keep, we edit the
0398  * latter into a #if to keep the nesting correct. We use memcpy() to
0399  * overwrite the 4 byte token "elif" with "if  " without a '\0' byte.
0400  *
0401  * When we find a true #elif in a group, the following block will
0402  * always be kept and the rest of the sequence after the next #elif or
0403  * #else will be discarded. We edit the #elif into a #else and the
0404  * following directive to #endif since this has the desired behaviour.
0405  *
0406  * "Dodgy" directives are split across multiple lines, the most common
0407  * example being a multi-line comment hanging off the right of the
0408  * directive. We can handle them correctly only if there is no change
0409  * from printing to dropping (or vice versa) caused by that directive.
0410  * If the directive is the first of a group we have a choice between
0411  * failing with an error, or passing it through unchanged instead of
0412  * evaluating it. The latter is not the default to avoid questions from
0413  * users about unifdef unexpectedly leaving behind preprocessor directives.
0414  */
0415 typedef void state_fn(void);
0416 
0417 /* report an error */
0418 static void Eelif (void) { error("Inappropriate #elif"); }
0419 static void Eelse (void) { error("Inappropriate #else"); }
0420 static void Eendif(void) { error("Inappropriate #endif"); }
0421 static void Eeof  (void) { error("Premature EOF"); }
0422 static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
0423 /* plain line handling */
0424 static void print (void) { flushline(true); }
0425 static void drop  (void) { flushline(false); }
0426 /* output lacks group's start line */
0427 static void Strue (void) { drop();  ignoreoff(); state(IS_TRUE_PREFIX); }
0428 static void Sfalse(void) { drop();  ignoreoff(); state(IS_FALSE_PREFIX); }
0429 static void Selse (void) { drop();               state(IS_TRUE_ELSE); }
0430 /* print/pass this block */
0431 static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
0432 static void Pelse (void) { print();              state(IS_PASS_ELSE); }
0433 static void Pendif(void) { print(); unnest(); }
0434 /* discard this block */
0435 static void Dfalse(void) { drop();  ignoreoff(); state(IS_FALSE_TRAILER); }
0436 static void Delif (void) { drop();  ignoreoff(); state(IS_FALSE_MIDDLE); }
0437 static void Delse (void) { drop();               state(IS_FALSE_ELSE); }
0438 static void Dendif(void) { drop();  unnest(); }
0439 /* first line of group */
0440 static void Fdrop (void) { nest();  Dfalse(); }
0441 static void Fpass (void) { nest();  Pelif(); }
0442 static void Ftrue (void) { nest();  Strue(); }
0443 static void Ffalse(void) { nest();  Sfalse(); }
0444 /* variable pedantry for obfuscated lines */
0445 static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); }
0446 static void Oif   (void) { if (!iocccok) Eioccc(); Fpass(); }
0447 static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); }
0448 /* ignore comments in this block */
0449 static void Idrop (void) { Fdrop();  ignoreon(); }
0450 static void Itrue (void) { Ftrue();  ignoreon(); }
0451 static void Ifalse(void) { Ffalse(); ignoreon(); }
0452 /* modify this line */
0453 static void Mpass (void) { memcpy(keyword, "if  ", 4); Pelif(); }
0454 static void Mtrue (void) { keywordedit("else");  state(IS_TRUE_MIDDLE); }
0455 static void Melif (void) { keywordedit("endif"); state(IS_FALSE_TRAILER); }
0456 static void Melse (void) { keywordedit("endif"); state(IS_FALSE_ELSE); }
0457 
0458 static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
0459 /* IS_OUTSIDE */
0460 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
0461   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eendif,
0462   print, done,  abort },
0463 /* IS_FALSE_PREFIX */
0464 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
0465   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
0466   drop,  Eeof,  abort },
0467 /* IS_TRUE_PREFIX */
0468 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
0469   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
0470   print, Eeof,  abort },
0471 /* IS_PASS_MIDDLE */
0472 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
0473   Oiffy, Oiffy, Fpass, Oif,   Oif,   Pelif, Oelif, Oelif, Pelse, Pendif,
0474   print, Eeof,  abort },
0475 /* IS_FALSE_MIDDLE */
0476 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
0477   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
0478   drop,  Eeof,  abort },
0479 /* IS_TRUE_MIDDLE */
0480 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
0481   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
0482   print, Eeof,  abort },
0483 /* IS_PASS_ELSE */
0484 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
0485   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Pendif,
0486   print, Eeof,  abort },
0487 /* IS_FALSE_ELSE */
0488 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
0489   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
0490   drop,  Eeof,  abort },
0491 /* IS_TRUE_ELSE */
0492 { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
0493   Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eioccc,
0494   print, Eeof,  abort },
0495 /* IS_FALSE_TRAILER */
0496 { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
0497   Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
0498   drop,  Eeof,  abort }
0499 /*TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF
0500   TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF (DODGY)
0501   PLAIN  EOF    ERROR */
0502 };
0503 
0504 /*
0505  * State machine utility functions
0506  */
0507 static void
0508 ignoreoff(void)
0509 {
0510     if (depth == 0)
0511         abort(); /* bug */
0512     ignoring[depth] = ignoring[depth-1];
0513 }
0514 static void
0515 ignoreon(void)
0516 {
0517     ignoring[depth] = true;
0518 }
0519 static void
0520 keywordedit(const char *replacement)
0521 {
0522     snprintf(keyword, tline + sizeof(tline) - keyword,
0523         "%s%s", replacement, newline);
0524     print();
0525 }
0526 static void
0527 nest(void)
0528 {
0529     if (depth > MAXDEPTH-1)
0530         abort(); /* bug */
0531     if (depth == MAXDEPTH-1)
0532         error("Too many levels of nesting");
0533     depth += 1;
0534     stifline[depth] = linenum;
0535 }
0536 static void
0537 unnest(void)
0538 {
0539     if (depth == 0)
0540         abort(); /* bug */
0541     depth -= 1;
0542 }
0543 static void
0544 state(Ifstate is)
0545 {
0546     ifstate[depth] = is;
0547 }
0548 
0549 /*
0550  * Write a line to the output or not, according to command line options.
0551  */
0552 static void
0553 flushline(bool keep)
0554 {
0555     if (symlist)
0556         return;
0557     if (keep ^ complement) {
0558         bool blankline = tline[strspn(tline, " \t\r\n")] == '\0';
0559         if (blankline && compblank && blankcount != blankmax) {
0560             delcount += 1;
0561             blankcount += 1;
0562         } else {
0563             if (lnnum && delcount > 0)
0564                 printf("#line %d%s", linenum, newline);
0565             fputs(tline, output);
0566             delcount = 0;
0567             blankmax = blankcount = blankline ? blankcount + 1 : 0;
0568         }
0569     } else {
0570         if (lnblank)
0571             fputs(newline, output);
0572         exitstat = 1;
0573         delcount += 1;
0574         blankcount = 0;
0575     }
0576     if (debugging)
0577         fflush(output);
0578 }
0579 
0580 /*
0581  * The driver for the state machine.
0582  */
0583 static void
0584 process(void)
0585 {
0586     /* When compressing blank lines, act as if the file
0587        is preceded by a large number of blank lines. */
0588     blankmax = blankcount = 1000;
0589     for (;;) {
0590         Linetype lineval = parseline();
0591         trans_table[ifstate[depth]][lineval]();
0592         debug("process line %d %s -> %s depth %d",
0593             linenum, linetype_name[lineval],
0594             ifstate_name[ifstate[depth]], depth);
0595     }
0596 }
0597 
0598 /*
0599  * Flush the output and handle errors.
0600  */
0601 static void
0602 closeout(void)
0603 {
0604     if (symdepth && !zerosyms)
0605         printf("\n");
0606     if (fclose(output) == EOF) {
0607         warn("couldn't write to %s", ofilename);
0608         if (overwriting) {
0609             unlink(tempname);
0610             errx(2, "%s unchanged", filename);
0611         } else {
0612             exit(2);
0613         }
0614     }
0615 }
0616 
0617 /*
0618  * Clean up and exit.
0619  */
0620 static void
0621 done(void)
0622 {
0623     if (incomment)
0624         error("EOF in comment");
0625     closeout();
0626     if (overwriting && rename(tempname, ofilename) == -1) {
0627         warn("couldn't rename temporary file");
0628         unlink(tempname);
0629         errx(2, "%s unchanged", ofilename);
0630     }
0631     exit(exitstat);
0632 }
0633 
0634 /*
0635  * Parse a line and determine its type. We keep the preprocessor line
0636  * parser state between calls in the global variable linestate, with
0637  * help from skipcomment().
0638  */
0639 static Linetype
0640 parseline(void)
0641 {
0642     const char *cp;
0643     int cursym;
0644     int kwlen;
0645     Linetype retval;
0646     Comment_state wascomment;
0647 
0648     linenum++;
0649     if (fgets(tline, MAXLINE, input) == NULL)
0650         return (LT_EOF);
0651     if (newline == NULL) {
0652         if (strrchr(tline, '\n') == strrchr(tline, '\r') + 1)
0653             newline = newline_crlf;
0654         else
0655             newline = newline_unix;
0656     }
0657     retval = LT_PLAIN;
0658     wascomment = incomment;
0659     cp = skipcomment(tline);
0660     if (linestate == LS_START) {
0661         if (*cp == '#') {
0662             linestate = LS_HASH;
0663             firstsym = true;
0664             cp = skipcomment(cp + 1);
0665         } else if (*cp != '\0')
0666             linestate = LS_DIRTY;
0667     }
0668     if (!incomment && linestate == LS_HASH) {
0669         keyword = tline + (cp - tline);
0670         cp = skipsym(cp);
0671         kwlen = cp - keyword;
0672         /* no way can we deal with a continuation inside a keyword */
0673         if (strncmp(cp, "\\\r\n", 3) == 0 ||
0674             strncmp(cp, "\\\n", 2) == 0)
0675             Eioccc();
0676         if (strlcmp("ifdef", keyword, kwlen) == 0 ||
0677             strlcmp("ifndef", keyword, kwlen) == 0) {
0678             cp = skipcomment(cp);
0679             if ((cursym = findsym(cp)) < 0)
0680                 retval = LT_IF;
0681             else {
0682                 retval = (keyword[2] == 'n')
0683                     ? LT_FALSE : LT_TRUE;
0684                 if (value[cursym] == NULL)
0685                     retval = (retval == LT_TRUE)
0686                         ? LT_FALSE : LT_TRUE;
0687                 if (ignore[cursym])
0688                     retval = (retval == LT_TRUE)
0689                         ? LT_TRUEI : LT_FALSEI;
0690             }
0691             cp = skipsym(cp);
0692         } else if (strlcmp("if", keyword, kwlen) == 0)
0693             retval = ifeval(&cp);
0694         else if (strlcmp("elif", keyword, kwlen) == 0)
0695             retval = ifeval(&cp) - LT_IF + LT_ELIF;
0696         else if (strlcmp("else", keyword, kwlen) == 0)
0697             retval = LT_ELSE;
0698         else if (strlcmp("endif", keyword, kwlen) == 0)
0699             retval = LT_ENDIF;
0700         else {
0701             linestate = LS_DIRTY;
0702             retval = LT_PLAIN;
0703         }
0704         cp = skipcomment(cp);
0705         if (*cp != '\0') {
0706             linestate = LS_DIRTY;
0707             if (retval == LT_TRUE || retval == LT_FALSE ||
0708                 retval == LT_TRUEI || retval == LT_FALSEI)
0709                 retval = LT_IF;
0710             if (retval == LT_ELTRUE || retval == LT_ELFALSE)
0711                 retval = LT_ELIF;
0712         }
0713         if (retval != LT_PLAIN && (wascomment || incomment)) {
0714             retval += LT_DODGY;
0715             if (incomment)
0716                 linestate = LS_DIRTY;
0717         }
0718         /* skipcomment normally changes the state, except
0719            if the last line of the file lacks a newline, or
0720            if there is too much whitespace in a directive */
0721         if (linestate == LS_HASH) {
0722             size_t len = cp - tline;
0723             if (fgets(tline + len, MAXLINE - len, input) == NULL) {
0724                 /* append the missing newline */
0725                 strcpy(tline + len, newline);
0726                 cp += strlen(newline);
0727                 linestate = LS_START;
0728             } else {
0729                 linestate = LS_DIRTY;
0730             }
0731         }
0732     }
0733     if (linestate == LS_DIRTY) {
0734         while (*cp != '\0')
0735             cp = skipcomment(cp + 1);
0736     }
0737     debug("parser line %d state %s comment %s line", linenum,
0738         comment_name[incomment], linestate_name[linestate]);
0739     return (retval);
0740 }
0741 
0742 /*
0743  * These are the binary operators that are supported by the expression
0744  * evaluator.
0745  */
0746 static Linetype op_strict(int *p, int v, Linetype at, Linetype bt) {
0747     if(at == LT_IF || bt == LT_IF) return (LT_IF);
0748     return (*p = v, v ? LT_TRUE : LT_FALSE);
0749 }
0750 static Linetype op_lt(int *p, Linetype at, int a, Linetype bt, int b) {
0751     return op_strict(p, a < b, at, bt);
0752 }
0753 static Linetype op_gt(int *p, Linetype at, int a, Linetype bt, int b) {
0754     return op_strict(p, a > b, at, bt);
0755 }
0756 static Linetype op_le(int *p, Linetype at, int a, Linetype bt, int b) {
0757     return op_strict(p, a <= b, at, bt);
0758 }
0759 static Linetype op_ge(int *p, Linetype at, int a, Linetype bt, int b) {
0760     return op_strict(p, a >= b, at, bt);
0761 }
0762 static Linetype op_eq(int *p, Linetype at, int a, Linetype bt, int b) {
0763     return op_strict(p, a == b, at, bt);
0764 }
0765 static Linetype op_ne(int *p, Linetype at, int a, Linetype bt, int b) {
0766     return op_strict(p, a != b, at, bt);
0767 }
0768 static Linetype op_or(int *p, Linetype at, int a, Linetype bt, int b) {
0769     if (!strictlogic && (at == LT_TRUE || bt == LT_TRUE))
0770         return (*p = 1, LT_TRUE);
0771     return op_strict(p, a || b, at, bt);
0772 }
0773 static Linetype op_and(int *p, Linetype at, int a, Linetype bt, int b) {
0774     if (!strictlogic && (at == LT_FALSE || bt == LT_FALSE))
0775         return (*p = 0, LT_FALSE);
0776     return op_strict(p, a && b, at, bt);
0777 }
0778 
0779 /*
0780  * An evaluation function takes three arguments, as follows: (1) a pointer to
0781  * an element of the precedence table which lists the operators at the current
0782  * level of precedence; (2) a pointer to an integer which will receive the
0783  * value of the expression; and (3) a pointer to a char* that points to the
0784  * expression to be evaluated and that is updated to the end of the expression
0785  * when evaluation is complete. The function returns LT_FALSE if the value of
0786  * the expression is zero, LT_TRUE if it is non-zero, LT_IF if the expression
0787  * depends on an unknown symbol, or LT_ERROR if there is a parse failure.
0788  */
0789 struct ops;
0790 
0791 typedef Linetype eval_fn(const struct ops *, int *, const char **);
0792 
0793 static eval_fn eval_table, eval_unary;
0794 
0795 /*
0796  * The precedence table. Expressions involving binary operators are evaluated
0797  * in a table-driven way by eval_table. When it evaluates a subexpression it
0798  * calls the inner function with its first argument pointing to the next
0799  * element of the table. Innermost expressions have special non-table-driven
0800  * handling.
0801  */
0802 static const struct ops {
0803     eval_fn *inner;
0804     struct op {
0805         const char *str;
0806         Linetype (*fn)(int *, Linetype, int, Linetype, int);
0807     } op[5];
0808 } eval_ops[] = {
0809     { eval_table, { { "||", op_or } } },
0810     { eval_table, { { "&&", op_and } } },
0811     { eval_table, { { "==", op_eq },
0812             { "!=", op_ne } } },
0813     { eval_unary, { { "<=", op_le },
0814             { ">=", op_ge },
0815             { "<", op_lt },
0816             { ">", op_gt } } }
0817 };
0818 
0819 /*
0820  * Function for evaluating the innermost parts of expressions,
0821  * viz. !expr (expr) number defined(symbol) symbol
0822  * We reset the constexpr flag in the last two cases.
0823  */
0824 static Linetype
0825 eval_unary(const struct ops *ops, int *valp, const char **cpp)
0826 {
0827     const char *cp;
0828     char *ep;
0829     int sym;
0830     bool defparen;
0831     Linetype lt;
0832 
0833     cp = skipcomment(*cpp);
0834     if (*cp == '!') {
0835         debug("eval%d !", ops - eval_ops);
0836         cp++;
0837         lt = eval_unary(ops, valp, &cp);
0838         if (lt == LT_ERROR)
0839             return (LT_ERROR);
0840         if (lt != LT_IF) {
0841             *valp = !*valp;
0842             lt = *valp ? LT_TRUE : LT_FALSE;
0843         }
0844     } else if (*cp == '(') {
0845         cp++;
0846         debug("eval%d (", ops - eval_ops);
0847         lt = eval_table(eval_ops, valp, &cp);
0848         if (lt == LT_ERROR)
0849             return (LT_ERROR);
0850         cp = skipcomment(cp);
0851         if (*cp++ != ')')
0852             return (LT_ERROR);
0853     } else if (isdigit((unsigned char)*cp)) {
0854         debug("eval%d number", ops - eval_ops);
0855         *valp = strtol(cp, &ep, 0);
0856         if (ep == cp)
0857             return (LT_ERROR);
0858         lt = *valp ? LT_TRUE : LT_FALSE;
0859         cp = skipsym(cp);
0860     } else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
0861         cp = skipcomment(cp+7);
0862         debug("eval%d defined", ops - eval_ops);
0863         if (*cp == '(') {
0864             cp = skipcomment(cp+1);
0865             defparen = true;
0866         } else {
0867             defparen = false;
0868         }
0869         sym = findsym(cp);
0870         if (sym < 0) {
0871             lt = LT_IF;
0872         } else {
0873             *valp = (value[sym] != NULL);
0874             lt = *valp ? LT_TRUE : LT_FALSE;
0875         }
0876         cp = skipsym(cp);
0877         cp = skipcomment(cp);
0878         if (defparen && *cp++ != ')')
0879             return (LT_ERROR);
0880         constexpr = false;
0881     } else if (!endsym(*cp)) {
0882         debug("eval%d symbol", ops - eval_ops);
0883         sym = findsym(cp);
0884         cp = skipsym(cp);
0885         if (sym < 0) {
0886             lt = LT_IF;
0887             cp = skipargs(cp);
0888         } else if (value[sym] == NULL) {
0889             *valp = 0;
0890             lt = LT_FALSE;
0891         } else {
0892             *valp = strtol(value[sym], &ep, 0);
0893             if (*ep != '\0' || ep == value[sym])
0894                 return (LT_ERROR);
0895             lt = *valp ? LT_TRUE : LT_FALSE;
0896             cp = skipargs(cp);
0897         }
0898         constexpr = false;
0899     } else {
0900         debug("eval%d bad expr", ops - eval_ops);
0901         return (LT_ERROR);
0902     }
0903 
0904     *cpp = cp;
0905     debug("eval%d = %d", ops - eval_ops, *valp);
0906     return (lt);
0907 }
0908 
0909 /*
0910  * Table-driven evaluation of binary operators.
0911  */
0912 static Linetype
0913 eval_table(const struct ops *ops, int *valp, const char **cpp)
0914 {
0915     const struct op *op;
0916     const char *cp;
0917     int val;
0918     Linetype lt, rt;
0919 
0920     debug("eval%d", ops - eval_ops);
0921     cp = *cpp;
0922     lt = ops->inner(ops+1, valp, &cp);
0923     if (lt == LT_ERROR)
0924         return (LT_ERROR);
0925     for (;;) {
0926         cp = skipcomment(cp);
0927         for (op = ops->op; op->str != NULL; op++)
0928             if (strncmp(cp, op->str, strlen(op->str)) == 0)
0929                 break;
0930         if (op->str == NULL)
0931             break;
0932         cp += strlen(op->str);
0933         debug("eval%d %s", ops - eval_ops, op->str);
0934         rt = ops->inner(ops+1, &val, &cp);
0935         if (rt == LT_ERROR)
0936             return (LT_ERROR);
0937         lt = op->fn(valp, lt, *valp, rt, val);
0938     }
0939 
0940     *cpp = cp;
0941     debug("eval%d = %d", ops - eval_ops, *valp);
0942     debug("eval%d lt = %s", ops - eval_ops, linetype_name[lt]);
0943     return (lt);
0944 }
0945 
0946 /*
0947  * Evaluate the expression on a #if or #elif line. If we can work out
0948  * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
0949  * return just a generic LT_IF.
0950  */
0951 static Linetype
0952 ifeval(const char **cpp)
0953 {
0954     int ret;
0955     int val = 0;
0956 
0957     debug("eval %s", *cpp);
0958     constexpr = killconsts ? false : true;
0959     ret = eval_table(eval_ops, &val, cpp);
0960     debug("eval = %d", val);
0961     return (constexpr ? LT_IF : ret == LT_ERROR ? LT_IF : ret);
0962 }
0963 
0964 /*
0965  * Skip over comments, strings, and character literals and stop at the
0966  * next character position that is not whitespace. Between calls we keep
0967  * the comment state in the global variable incomment, and we also adjust
0968  * the global variable linestate when we see a newline.
0969  * XXX: doesn't cope with the buffer splitting inside a state transition.
0970  */
0971 static const char *
0972 skipcomment(const char *cp)
0973 {
0974     if (text || ignoring[depth]) {
0975         for (; isspace((unsigned char)*cp); cp++)
0976             if (*cp == '\n')
0977                 linestate = LS_START;
0978         return (cp);
0979     }
0980     while (*cp != '\0')
0981         /* don't reset to LS_START after a line continuation */
0982         if (strncmp(cp, "\\\r\n", 3) == 0)
0983             cp += 3;
0984         else if (strncmp(cp, "\\\n", 2) == 0)
0985             cp += 2;
0986         else switch (incomment) {
0987         case NO_COMMENT:
0988             if (strncmp(cp, "/\\\r\n", 4) == 0) {
0989                 incomment = STARTING_COMMENT;
0990                 cp += 4;
0991             } else if (strncmp(cp, "/\\\n", 3) == 0) {
0992                 incomment = STARTING_COMMENT;
0993                 cp += 3;
0994             } else if (strncmp(cp, "/*", 2) == 0) {
0995                 incomment = C_COMMENT;
0996                 cp += 2;
0997             } else if (strncmp(cp, "//", 2) == 0) {
0998                 incomment = CXX_COMMENT;
0999                 cp += 2;
1000             } else if (strncmp(cp, "\'", 1) == 0) {
1001                 incomment = CHAR_LITERAL;
1002                 linestate = LS_DIRTY;
1003                 cp += 1;
1004             } else if (strncmp(cp, "\"", 1) == 0) {
1005                 incomment = STRING_LITERAL;
1006                 linestate = LS_DIRTY;
1007                 cp += 1;
1008             } else if (strncmp(cp, "\n", 1) == 0) {
1009                 linestate = LS_START;
1010                 cp += 1;
1011             } else if (strchr(" \r\t", *cp) != NULL) {
1012                 cp += 1;
1013             } else
1014                 return (cp);
1015             continue;
1016         case CXX_COMMENT:
1017             if (strncmp(cp, "\n", 1) == 0) {
1018                 incomment = NO_COMMENT;
1019                 linestate = LS_START;
1020             }
1021             cp += 1;
1022             continue;
1023         case CHAR_LITERAL:
1024         case STRING_LITERAL:
1025             if ((incomment == CHAR_LITERAL && cp[0] == '\'') ||
1026                 (incomment == STRING_LITERAL && cp[0] == '\"')) {
1027                 incomment = NO_COMMENT;
1028                 cp += 1;
1029             } else if (cp[0] == '\\') {
1030                 if (cp[1] == '\0')
1031                     cp += 1;
1032                 else
1033                     cp += 2;
1034             } else if (strncmp(cp, "\n", 1) == 0) {
1035                 if (incomment == CHAR_LITERAL)
1036                     error("unterminated char literal");
1037                 else
1038                     error("unterminated string literal");
1039             } else
1040                 cp += 1;
1041             continue;
1042         case C_COMMENT:
1043             if (strncmp(cp, "*\\\r\n", 4) == 0) {
1044                 incomment = FINISHING_COMMENT;
1045                 cp += 4;
1046             } else if (strncmp(cp, "*\\\n", 3) == 0) {
1047                 incomment = FINISHING_COMMENT;
1048                 cp += 3;
1049             } else if (strncmp(cp, "*/", 2) == 0) {
1050                 incomment = NO_COMMENT;
1051                 cp += 2;
1052             } else
1053                 cp += 1;
1054             continue;
1055         case STARTING_COMMENT:
1056             if (*cp == '*') {
1057                 incomment = C_COMMENT;
1058                 cp += 1;
1059             } else if (*cp == '/') {
1060                 incomment = CXX_COMMENT;
1061                 cp += 1;
1062             } else {
1063                 incomment = NO_COMMENT;
1064                 linestate = LS_DIRTY;
1065             }
1066             continue;
1067         case FINISHING_COMMENT:
1068             if (*cp == '/') {
1069                 incomment = NO_COMMENT;
1070                 cp += 1;
1071             } else
1072                 incomment = C_COMMENT;
1073             continue;
1074         default:
1075             abort(); /* bug */
1076         }
1077     return (cp);
1078 }
1079 
1080 /*
1081  * Skip macro arguments.
1082  */
1083 static const char *
1084 skipargs(const char *cp)
1085 {
1086     const char *ocp = cp;
1087     int level = 0;
1088     cp = skipcomment(cp);
1089     if (*cp != '(')
1090         return (cp);
1091     do {
1092         if (*cp == '(')
1093             level++;
1094         if (*cp == ')')
1095             level--;
1096         cp = skipcomment(cp+1);
1097     } while (level != 0 && *cp != '\0');
1098     if (level == 0)
1099         return (cp);
1100     else
1101     /* Rewind and re-detect the syntax error later. */
1102         return (ocp);
1103 }
1104 
1105 /*
1106  * Skip over an identifier.
1107  */
1108 static const char *
1109 skipsym(const char *cp)
1110 {
1111     while (!endsym(*cp))
1112         ++cp;
1113     return (cp);
1114 }
1115 
1116 /*
1117  * Look for the symbol in the symbol table. If it is found, we return
1118  * the symbol table index, else we return -1.
1119  */
1120 static int
1121 findsym(const char *str)
1122 {
1123     const char *cp;
1124     int symind;
1125 
1126     cp = skipsym(str);
1127     if (cp == str)
1128         return (-1);
1129     if (symlist) {
1130         if (symdepth && firstsym)
1131             printf("%s%3d", zerosyms ? "" : "\n", depth);
1132         firstsym = zerosyms = false;
1133         printf("%s%.*s%s",
1134             symdepth ? " " : "",
1135             (int)(cp-str), str,
1136             symdepth ? "" : "\n");
1137         /* we don't care about the value of the symbol */
1138         return (0);
1139     }
1140     for (symind = 0; symind < nsyms; ++symind) {
1141         if (strlcmp(symname[symind], str, cp-str) == 0) {
1142             debug("findsym %s %s", symname[symind],
1143                 value[symind] ? value[symind] : "");
1144             return (symind);
1145         }
1146     }
1147     return (-1);
1148 }
1149 
1150 /*
1151  * Add a symbol to the symbol table.
1152  */
1153 static void
1154 addsym(bool ignorethis, bool definethis, char *sym)
1155 {
1156     int symind;
1157     char *val;
1158 
1159     symind = findsym(sym);
1160     if (symind < 0) {
1161         if (nsyms >= MAXSYMS)
1162             errx(2, "too many symbols");
1163         symind = nsyms++;
1164     }
1165     symname[symind] = sym;
1166     ignore[symind] = ignorethis;
1167     val = sym + (skipsym(sym) - sym);
1168     if (definethis) {
1169         if (*val == '=') {
1170             value[symind] = val+1;
1171             *val = '\0';
1172         } else if (*val == '\0')
1173             value[symind] = "1";
1174         else
1175             usage();
1176     } else {
1177         if (*val != '\0')
1178             usage();
1179         value[symind] = NULL;
1180     }
1181     debug("addsym %s=%s", symname[symind],
1182         value[symind] ? value[symind] : "undef");
1183 }
1184 
1185 /*
1186  * Compare s with n characters of t.
1187  * The same as strncmp() except that it checks that s[n] == '\0'.
1188  */
1189 static int
1190 strlcmp(const char *s, const char *t, size_t n)
1191 {
1192     while (n-- && *t != '\0')
1193         if (*s != *t)
1194             return ((unsigned char)*s - (unsigned char)*t);
1195         else
1196             ++s, ++t;
1197     return ((unsigned char)*s);
1198 }
1199 
1200 /*
1201  * Diagnostics.
1202  */
1203 static void
1204 debug(const char *msg, ...)
1205 {
1206     va_list ap;
1207 
1208     if (debugging) {
1209         va_start(ap, msg);
1210         vwarnx(msg, ap);
1211         va_end(ap);
1212     }
1213 }
1214 
1215 static void
1216 error(const char *msg)
1217 {
1218     if (depth == 0)
1219         warnx("%s: %d: %s", filename, linenum, msg);
1220     else
1221         warnx("%s: %d: %s (#if line %d depth %d)",
1222             filename, linenum, msg, stifline[depth], depth);
1223     closeout();
1224     errx(2, "output may be truncated");
1225 }