2 * SPDX-License-Identifier: BSD-4-Clause
4 * Copyright (c) 1985 Sun Microsystems, Inc.
5 * Copyright (c) 1980, 1993
6 * The Regents of the University of California. All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
43 #include <sys/cdefs.h>
44 __FBSDID("$FreeBSD$");
47 * Here we have the token scanner for indent. It scans off one token and puts
48 * it in the global variable "token". It returns a code, indicating the type
57 #include "indent_globs.h"
58 #include "indent_codes.h"
72 * This table has to be sorted alphabetically, because it'll be used in binary
73 * search. For the same reason, string must be the first thing in struct templ.
75 struct templ specials[] =
121 const char **typenames;
123 int typename_top = -1;
126 { /* this is used to facilitate the decision of
127 * what type (alphanumeric, operator) each
129 0, 0, 0, 0, 0, 0, 0, 0,
130 0, 0, 0, 0, 0, 0, 0, 0,
131 0, 0, 0, 0, 0, 0, 0, 0,
132 0, 0, 0, 0, 0, 0, 0, 0,
133 0, 3, 0, 0, 1, 3, 3, 0,
134 0, 0, 3, 3, 0, 3, 0, 3,
135 1, 1, 1, 1, 1, 1, 1, 1,
136 1, 1, 0, 0, 3, 3, 3, 3,
137 0, 1, 1, 1, 1, 1, 1, 1,
138 1, 1, 1, 1, 1, 1, 1, 1,
139 1, 1, 1, 1, 1, 1, 1, 1,
140 1, 1, 1, 0, 0, 0, 3, 1,
141 0, 1, 1, 1, 1, 1, 1, 1,
142 1, 1, 1, 1, 1, 1, 1, 1,
143 1, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 0, 3, 0, 3, 0
148 strcmp_type(const void *e1, const void *e2)
150 return (strcmp(e1, *(const char * const *)e2));
154 lexi(struct parser_state *state)
156 int unary_delim; /* this is set to 1 if the current token
157 * forces a following operator to be unary */
158 int code; /* internal code to be returned */
159 char qchar; /* the delimiter character for a string */
161 e_token = s_token; /* point to start of place to save token */
163 state->col_1 = state->last_nl; /* tell world that this token started
164 * in column 1 iff the last thing
165 * scanned was a newline */
166 state->last_nl = false;
168 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
169 state->col_1 = false; /* leading blanks imply token is not in column
171 if (++buf_ptr >= buf_end)
175 /* Scan an alphanumeric token */
176 if (chartype[*buf_ptr & 127] == alphanum ||
177 (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
179 * we have a character or number
183 if (isdigit((unsigned char)*buf_ptr) ||
184 (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) {
190 * base 2, base 8, base 16:
192 if (buf_ptr[0] == '0' && buf_ptr[1] != '.') {
195 if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B')
196 len = strspn(buf_ptr + 2, "01") + 2;
197 else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')
198 len = strspn(buf_ptr + 2, "0123456789ABCDEFabcdef") + 2;
200 len = strspn(buf_ptr + 1, "012345678") + 1;
202 CHECK_SIZE_TOKEN(len);
203 memcpy(e_token, buf_ptr, len);
208 diag2(1, "Unterminated literal");
212 if (*buf_ptr == '.') {
219 *e_token++ = *buf_ptr++;
220 if (!isdigit((unsigned char)*buf_ptr) && *buf_ptr != '.') {
221 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
226 *e_token++ = *buf_ptr++;
227 if (*buf_ptr == '+' || *buf_ptr == '-')
228 *e_token++ = *buf_ptr++;
235 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
236 *e_token++ = *buf_ptr++;
240 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
241 if (buf_ptr[1] == buf_ptr[0])
242 *e_token++ = *buf_ptr++;
243 *e_token++ = *buf_ptr++;
251 while (chartype[*buf_ptr & 127] == alphanum || *buf_ptr == BACKSLASH) {
252 /* fill_buffer() terminates buffer with newline */
253 if (*buf_ptr == BACKSLASH) {
254 if (*(buf_ptr + 1) == '\n') {
256 if (buf_ptr >= buf_end)
263 *e_token++ = *buf_ptr++;
264 if (buf_ptr >= buf_end)
269 if (s_token[0] == 'L' && s_token[1] == '\0' &&
270 (*buf_ptr == '"' || *buf_ptr == '\''))
273 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
274 if (++buf_ptr >= buf_end)
278 if (state->last_token == structure && !state->p_l_follow) {
279 /* if last token was 'struct' and we're not
280 * in parentheses, then this token
281 * should be treated as a declaration */
282 state->last_u_d = true;
286 * Operator after identifier is binary unless last token was 'struct'
288 state->last_u_d = (state->last_token == structure);
292 sizeof(specials) / sizeof(specials[0]),
295 if (p == NULL) { /* not a special keyword... */
298 /* ... so maybe a type_t or a typedef */
299 if ((opt.auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) &&
300 strcmp(u, "_t") == 0) || (typename_top >= 0 &&
301 bsearch(s_token, typenames, typename_top + 1,
302 sizeof(typenames[0]), strcmp_type))) {
303 state->keyword = 4; /* a type name */
304 state->last_u_d = true;
307 } else { /* we have a keyword */
308 state->keyword = p->rwcode;
309 state->last_u_d = true;
311 case 7: /* it is a switch */
313 case 8: /* a case or default */
316 case 3: /* a "struct" */
318 case 4: /* one of the declaration keywords */
320 if (state->p_l_follow) {
321 /* inside parens: cast, param list, offsetof or sizeof */
322 state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask;
324 if (state->last_token == period || state->last_token == unary_op) {
328 if (p != NULL && p->rwcode == 3)
330 if (state->p_l_follow)
334 case 5: /* if, while, for */
337 case 6: /* do, else */
340 case 10: /* storage class specifier */
343 case 11: /* typedef */
346 default: /* all others are treated like any other
349 } /* end of switch */
350 } /* end of if (found_it) */
351 if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 &&
352 state->in_parameter_declaration == 0 && state->block_init == 0) {
355 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
357 strncpy(state->procname, token, sizeof state->procname - 1);
359 state->in_parameter_declaration = 1;
364 * The following hack attempts to guess whether or not the current
365 * token is in fact a declaration keyword -- one that has been
368 else if (!state->p_l_follow && !state->block_init &&
370 ((*buf_ptr == '*' && buf_ptr[1] != '=') ||
371 isalpha((unsigned char)*buf_ptr)) &&
372 (state->last_token == semicolon || state->last_token == lbrace ||
373 state->last_token == rbrace)) {
374 state->keyword = 4; /* a type name */
375 state->last_u_d = true;
378 if (state->last_token == decl) /* if this is a declared variable,
379 * then following sign is unary */
380 state->last_u_d = true; /* will make "int a -1" work */
381 return (ident); /* the ident is not in the list */
382 } /* end of procesing for alpanum character */
384 /* Scan a non-alphanumeric token */
386 CHECK_SIZE_TOKEN(3); /* things like "<<=" */
387 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
390 if (++buf_ptr >= buf_end)
395 unary_delim = state->last_u_d;
396 state->last_nl = true; /* remember that we just had a newline */
397 code = (had_eof ? 0 : newline);
400 * if data has been exhausted, the newline is a dummy, and we should
401 * return code to stop
405 case '\'': /* start of quoted character */
406 case '"': /* start of string */
408 do { /* copy the string */
409 while (1) { /* move one character or [/<char>]<char> */
410 if (*buf_ptr == '\n') {
411 diag2(1, "Unterminated literal");
415 *e_token = *buf_ptr++;
416 if (buf_ptr >= buf_end)
418 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
419 if (*buf_ptr == '\n') /* check for escaped newline */
421 *++e_token = *buf_ptr++;
422 ++e_token; /* we must increment this again because we
423 * copied two chars */
424 if (buf_ptr >= buf_end)
428 break; /* we copied one character */
429 } /* end of while (1) */
430 } while (*e_token++ != qchar);
447 unary_delim = state->last_u_d;
470 * if (state->in_or_st) state->block_init = 1;
472 /* ? code = state->block_init ? lparen : lbrace; */
478 /* ? code = state->block_init ? rparen : rbrace; */
482 case 014: /* a form feed */
483 unary_delim = state->last_u_d;
484 state->last_nl = true; /* remember this so we can set 'state->col_1'
500 case '+': /* check for -, +, --, ++ */
501 code = (state->last_u_d ? unary_op : binary_op);
504 if (*buf_ptr == token[0]) {
505 /* check for doubled character */
506 *e_token++ = *buf_ptr++;
507 /* buffer overflow will be checked at end of loop */
508 if (state->last_token == ident || state->last_token == rparen) {
509 code = (state->last_u_d ? unary_op : postop);
510 /* check for following ++ or -- */
514 else if (*buf_ptr == '=')
515 /* check for operator += */
516 *e_token++ = *buf_ptr++;
517 else if (*buf_ptr == '>') {
518 /* check for operator -> */
519 *e_token++ = *buf_ptr++;
522 state->want_blank = false;
524 break; /* buffer overflow will be checked at end of
529 state->block_init = 1;
531 if (chartype[*buf_ptr & 127] == opchar) { /* we have two char assignment */
532 e_token[-1] = *buf_ptr++;
533 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
534 *e_token++ = *buf_ptr++;
535 *e_token++ = '='; /* Flip =+ to += */
539 if (*buf_ptr == '=') {/* == */
540 *e_token++ = '='; /* Flip =+ to += */
548 /* can drop thru!!! */
552 case '!': /* ops like <, <<, <=, !=, etc */
553 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
554 *e_token++ = *buf_ptr;
555 if (++buf_ptr >= buf_end)
559 *e_token++ = *buf_ptr++;
560 code = (state->last_u_d ? unary_op : binary_op);
566 if (!state->last_u_d) {
568 *e_token++ = *buf_ptr++;
572 while (*buf_ptr == '*' || isspace((unsigned char)*buf_ptr)) {
573 if (*buf_ptr == '*') {
575 *e_token++ = *buf_ptr;
577 if (++buf_ptr >= buf_end)
583 while (isalpha((unsigned char)*tp) ||
584 isspace((unsigned char)*tp)) {
589 ps.procname[0] = ' ';
595 if (token[0] == '/' && *buf_ptr == '*') {
596 /* it is start of comment */
599 if (++buf_ptr >= buf_end)
603 unary_delim = state->last_u_d;
606 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
608 * handle ||, &&, etc, and also things as in int *****i
611 *e_token++ = *buf_ptr;
612 if (++buf_ptr >= buf_end)
615 code = (state->last_u_d ? unary_op : binary_op);
619 } /* end of switch */
620 if (buf_ptr >= buf_end) /* check for input buffer empty */
622 state->last_u_d = unary_delim;
624 *e_token = '\0'; /* null terminate the token */
629 alloc_typenames(void)
632 typenames = (const char **)malloc(sizeof(typenames[0]) *
633 (typename_count = 16));
634 if (typenames == NULL)
639 add_typename(const char *key)
644 if (typename_top + 1 >= typename_count) {
645 typenames = realloc((void *)typenames,
646 sizeof(typenames[0]) * (typename_count *= 2));
647 if (typenames == NULL)
650 if (typename_top == -1)
651 typenames[++typename_top] = copy = strdup(key);
652 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) {
653 /* take advantage of sorted input */
654 if (comparison == 0) /* remove duplicates */
656 typenames[++typename_top] = copy = strdup(key);
661 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++)
662 /* find place for the new key */;
663 if (comparison == 0) /* remove duplicates */
665 memmove(&typenames[p + 1], &typenames[p],
666 sizeof(typenames[0]) * (++typename_top - p));
667 typenames[p] = copy = strdup(key);