0001 /* SPDX-License-Identifier: GPL-2.0-or-later */
0002 /*
0003 * Lexical analysis for genksyms.
0004 * Copyright 1996, 1997 Linux International.
0005 *
0006 * New implementation contributed by Richard Henderson <rth@tamu.edu>
0007 * Based on original work by Bjorn Ekwall <bj0rn@blox.se>
0008 *
0009 * Taken from Linux modutils 2.4.22.
0010 */
0011
0012 %{
0013
0014 #include <limits.h>
0015 #include <stdlib.h>
0016 #include <string.h>
0017 #include <ctype.h>
0018
0019 #include "genksyms.h"
0020 #include "parse.tab.h"
0021
0022 /* We've got a two-level lexer here. We let flex do basic tokenization
0023 and then we categorize those basic tokens in the second stage. */
0024 #define YY_DECL static int yylex1(void)
0025
0026 %}
0027
0028 IDENT [A-Za-z_\$][A-Za-z0-9_\$]*
0029
0030 O_INT 0[0-7]*
0031 D_INT [1-9][0-9]*
0032 X_INT 0[Xx][0-9A-Fa-f]+
0033 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
0034 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}?
0035
0036 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.)
0037 EXP [Ee][+-]?[0-9]+
0038 F_SUF [FfLl]
0039 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
0040
0041 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\"
0042 CHAR L?\'([^\\\']*\\.)*[^\\\']*\'
0043
0044 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
0045
0046 /* We don't do multiple input files. */
0047 %option noyywrap
0048
0049 %option noinput
0050
0051 %%
0052
0053
0054 /* Keep track of our location in the original source files. */
0055 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME;
0056 ^#.*\n cur_line++;
0057 \n cur_line++;
0058
0059 /* Ignore all other whitespace. */
0060 [ \t\f\v\r]+ ;
0061
0062
0063 {STRING} return STRING;
0064 {CHAR} return CHAR;
0065 {IDENT} return IDENT;
0066
0067 /* The Pedant requires that the other C multi-character tokens be
0068 recognized as tokens. We don't actually use them since we don't
0069 parse expressions, but we do want whitespace to be arranged
0070 around them properly. */
0071 {MC_TOKEN} return OTHER;
0072 {INT} return INT;
0073 {REAL} return REAL;
0074
0075 "..." return DOTS;
0076
0077 /* All other tokens are single characters. */
0078 . return yytext[0];
0079
0080
0081 %%
0082
0083 /* Bring in the keyword recognizer. */
0084
0085 #include "keywords.c"
0086
0087
0088 /* Macros to append to our phrase collection list. */
0089
0090 /*
0091 * We mark any token, that that equals to a known enumerator, as
0092 * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
0093 * the only problem is struct and union members:
0094 * enum e { a, b }; struct s { int a, b; }
0095 * but in this case, the only effect will be, that the ABI checksums become
0096 * more volatile, which is acceptable. Also, such collisions are quite rare,
0097 * so far it was only observed in include/linux/telephony.h.
0098 */
0099 #define _APP(T,L) do { \
0100 cur_node = next_node; \
0101 next_node = xmalloc(sizeof(*next_node)); \
0102 next_node->next = cur_node; \
0103 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
0104 cur_node->tag = \
0105 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
0106 SYM_ENUM_CONST : SYM_NORMAL ; \
0107 cur_node->in_source_file = in_source_file; \
0108 } while (0)
0109
0110 #define APP _APP(yytext, yyleng)
0111
0112
0113 /* The second stage lexer. Here we incorporate knowledge of the state
0114 of the parser to tailor the tokens that are returned. */
0115
0116 int
0117 yylex(void)
0118 {
0119 static enum {
0120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
0121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
0122 } lexstate = ST_NOTSTARTED;
0123
0124 static int suppress_type_lookup, dont_want_brace_phrase;
0125 static struct string_list *next_node;
0126 static char *source_file;
0127
0128 int token, count = 0;
0129 struct string_list *cur_node;
0130
0131 if (lexstate == ST_NOTSTARTED)
0132 {
0133 next_node = xmalloc(sizeof(*next_node));
0134 next_node->next = NULL;
0135 lexstate = ST_NORMAL;
0136 }
0137
0138 repeat:
0139 token = yylex1();
0140
0141 if (token == 0)
0142 return 0;
0143 else if (token == FILENAME)
0144 {
0145 char *file, *e;
0146
0147 /* Save the filename and line number for later error messages. */
0148
0149 if (cur_filename)
0150 free(cur_filename);
0151
0152 file = strchr(yytext, '\"')+1;
0153 e = strchr(file, '\"');
0154 *e = '\0';
0155 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
0156 cur_line = atoi(yytext+2);
0157
0158 if (!source_file) {
0159 source_file = xstrdup(cur_filename);
0160 in_source_file = 1;
0161 } else {
0162 in_source_file = (strcmp(cur_filename, source_file) == 0);
0163 }
0164
0165 goto repeat;
0166 }
0167
0168 switch (lexstate)
0169 {
0170 case ST_NORMAL:
0171 switch (token)
0172 {
0173 case IDENT:
0174 APP;
0175 {
0176 int r = is_reserved_word(yytext, yyleng);
0177 if (r >= 0)
0178 {
0179 switch (token = r)
0180 {
0181 case ATTRIBUTE_KEYW:
0182 lexstate = ST_ATTRIBUTE;
0183 count = 0;
0184 goto repeat;
0185 case ASM_KEYW:
0186 lexstate = ST_ASM;
0187 count = 0;
0188 goto repeat;
0189 case TYPEOF_KEYW:
0190 lexstate = ST_TYPEOF;
0191 count = 0;
0192 goto repeat;
0193
0194 case STRUCT_KEYW:
0195 case UNION_KEYW:
0196 case ENUM_KEYW:
0197 dont_want_brace_phrase = 3;
0198 suppress_type_lookup = 2;
0199 goto fini;
0200
0201 case EXPORT_SYMBOL_KEYW:
0202 goto fini;
0203
0204 case STATIC_ASSERT_KEYW:
0205 lexstate = ST_STATIC_ASSERT;
0206 count = 0;
0207 goto repeat;
0208 }
0209 }
0210 if (!suppress_type_lookup)
0211 {
0212 if (find_symbol(yytext, SYM_TYPEDEF, 1))
0213 token = TYPE;
0214 }
0215 }
0216 break;
0217
0218 case '[':
0219 APP;
0220 lexstate = ST_BRACKET;
0221 count = 1;
0222 goto repeat;
0223
0224 case '{':
0225 APP;
0226 if (dont_want_brace_phrase)
0227 break;
0228 lexstate = ST_BRACE;
0229 count = 1;
0230 goto repeat;
0231
0232 case '=': case ':':
0233 APP;
0234 lexstate = ST_EXPRESSION;
0235 break;
0236
0237 default:
0238 APP;
0239 break;
0240 }
0241 break;
0242
0243 case ST_ATTRIBUTE:
0244 APP;
0245 switch (token)
0246 {
0247 case '(':
0248 ++count;
0249 goto repeat;
0250 case ')':
0251 if (--count == 0)
0252 {
0253 lexstate = ST_NORMAL;
0254 token = ATTRIBUTE_PHRASE;
0255 break;
0256 }
0257 goto repeat;
0258 default:
0259 goto repeat;
0260 }
0261 break;
0262
0263 case ST_ASM:
0264 APP;
0265 switch (token)
0266 {
0267 case '(':
0268 ++count;
0269 goto repeat;
0270 case ')':
0271 if (--count == 0)
0272 {
0273 lexstate = ST_NORMAL;
0274 token = ASM_PHRASE;
0275 break;
0276 }
0277 goto repeat;
0278 default:
0279 goto repeat;
0280 }
0281 break;
0282
0283 case ST_TYPEOF_1:
0284 if (token == IDENT)
0285 {
0286 if (is_reserved_word(yytext, yyleng) >= 0
0287 || find_symbol(yytext, SYM_TYPEDEF, 1))
0288 {
0289 yyless(0);
0290 unput('(');
0291 lexstate = ST_NORMAL;
0292 token = TYPEOF_KEYW;
0293 break;
0294 }
0295 _APP("(", 1);
0296 }
0297 lexstate = ST_TYPEOF;
0298 /* FALLTHRU */
0299
0300 case ST_TYPEOF:
0301 switch (token)
0302 {
0303 case '(':
0304 if ( ++count == 1 )
0305 lexstate = ST_TYPEOF_1;
0306 else
0307 APP;
0308 goto repeat;
0309 case ')':
0310 APP;
0311 if (--count == 0)
0312 {
0313 lexstate = ST_NORMAL;
0314 token = TYPEOF_PHRASE;
0315 break;
0316 }
0317 goto repeat;
0318 default:
0319 APP;
0320 goto repeat;
0321 }
0322 break;
0323
0324 case ST_BRACKET:
0325 APP;
0326 switch (token)
0327 {
0328 case '[':
0329 ++count;
0330 goto repeat;
0331 case ']':
0332 if (--count == 0)
0333 {
0334 lexstate = ST_NORMAL;
0335 token = BRACKET_PHRASE;
0336 break;
0337 }
0338 goto repeat;
0339 default:
0340 goto repeat;
0341 }
0342 break;
0343
0344 case ST_BRACE:
0345 APP;
0346 switch (token)
0347 {
0348 case '{':
0349 ++count;
0350 goto repeat;
0351 case '}':
0352 if (--count == 0)
0353 {
0354 lexstate = ST_NORMAL;
0355 token = BRACE_PHRASE;
0356 break;
0357 }
0358 goto repeat;
0359 default:
0360 goto repeat;
0361 }
0362 break;
0363
0364 case ST_EXPRESSION:
0365 switch (token)
0366 {
0367 case '(': case '[': case '{':
0368 ++count;
0369 APP;
0370 goto repeat;
0371 case '}':
0372 /* is this the last line of an enum declaration? */
0373 if (count == 0)
0374 {
0375 /* Put back the token we just read so's we can find it again
0376 after registering the expression. */
0377 unput(token);
0378
0379 lexstate = ST_NORMAL;
0380 token = EXPRESSION_PHRASE;
0381 break;
0382 }
0383 /* FALLTHRU */
0384 case ')': case ']':
0385 --count;
0386 APP;
0387 goto repeat;
0388 case ',': case ';':
0389 if (count == 0)
0390 {
0391 /* Put back the token we just read so's we can find it again
0392 after registering the expression. */
0393 unput(token);
0394
0395 lexstate = ST_NORMAL;
0396 token = EXPRESSION_PHRASE;
0397 break;
0398 }
0399 APP;
0400 goto repeat;
0401 default:
0402 APP;
0403 goto repeat;
0404 }
0405 break;
0406
0407 case ST_STATIC_ASSERT:
0408 APP;
0409 switch (token)
0410 {
0411 case '(':
0412 ++count;
0413 goto repeat;
0414 case ')':
0415 if (--count == 0)
0416 {
0417 lexstate = ST_NORMAL;
0418 token = STATIC_ASSERT_PHRASE;
0419 break;
0420 }
0421 goto repeat;
0422 default:
0423 goto repeat;
0424 }
0425 break;
0426
0427 default:
0428 exit(1);
0429 }
0430 fini:
0431
0432 if (suppress_type_lookup > 0)
0433 --suppress_type_lookup;
0434 if (dont_want_brace_phrase > 0)
0435 --dont_want_brace_phrase;
0436
0437 yylval = &next_node->next;
0438
0439 return token;
0440 }