2 * SPDX-License-Identifier: BSD-4-Clause
4 * Copyright (c) 1985 Sun Microsystems, Inc.
5 * Copyright (c) 1980, 1993
6 * The Regents of the University of California. All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
43 #include <sys/cdefs.h>
44 __FBSDID("$FreeBSD$");
47 * Here we have the token scanner for indent. It scans off one token and puts
48 * it in the global variable "token". It returns a code, indicating the type
57 #include "indent_globs.h"
58 #include "indent_codes.h"
72 * This table has to be sorted alphabetically, because it'll be used in binary
73 * search. For the same reason, string must be the first thing in struct templ.
75 struct templ specials[] =
111 const char **typenames;
113 int typename_top = -1;
116 { /* this is used to facilitate the decision of
117 * what type (alphanumeric, operator) each
119 0, 0, 0, 0, 0, 0, 0, 0,
120 0, 0, 0, 0, 0, 0, 0, 0,
121 0, 0, 0, 0, 0, 0, 0, 0,
122 0, 0, 0, 0, 0, 0, 0, 0,
123 0, 3, 0, 0, 1, 3, 3, 0,
124 0, 0, 3, 3, 0, 3, 0, 3,
125 1, 1, 1, 1, 1, 1, 1, 1,
126 1, 1, 0, 0, 3, 3, 3, 3,
127 0, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 0, 0, 0, 3, 1,
131 0, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 0, 3, 0, 3, 0
138 strcmp_type(const void *e1, const void *e2)
140 return (strcmp(e1, *(const char * const *)e2));
144 lexi(struct parser_state *state)
146 int unary_delim; /* this is set to 1 if the current token
147 * forces a following operator to be unary */
148 int code; /* internal code to be returned */
149 char qchar; /* the delimiter character for a string */
151 e_token = s_token; /* point to start of place to save token */
153 state->col_1 = state->last_nl; /* tell world that this token started
154 * in column 1 iff the last thing
155 * scanned was a newline */
156 state->last_nl = false;
158 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
159 state->col_1 = false; /* leading blanks imply token is not in column
161 if (++buf_ptr >= buf_end)
165 /* Scan an alphanumeric token */
166 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
168 * we have a character or number
172 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
174 BASE_2, BASE_8, BASE_10, BASE_16
179 enum base in_base = BASE_10;
181 if (*buf_ptr == '0') {
182 if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B')
184 else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')
186 else if (isdigit(buf_ptr[1]))
191 *e_token++ = *buf_ptr++;
192 *e_token++ = *buf_ptr++;
193 while (*buf_ptr == '0' || *buf_ptr == '1') {
195 *e_token++ = *buf_ptr++;
199 *e_token++ = *buf_ptr++;
200 while (*buf_ptr >= '0' && *buf_ptr <= '8') {
202 *e_token++ = *buf_ptr++;
206 *e_token++ = *buf_ptr++;
207 *e_token++ = *buf_ptr++;
208 while (isxdigit(*buf_ptr)) {
210 *e_token++ = *buf_ptr++;
215 if (*buf_ptr == '.') {
222 *e_token++ = *buf_ptr++;
223 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
224 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
230 *e_token++ = *buf_ptr++;
231 if (*buf_ptr == '+' || *buf_ptr == '-')
232 *e_token++ = *buf_ptr++;
239 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
241 *e_token++ = *buf_ptr++;
245 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
247 if (buf_ptr[1] == buf_ptr[0])
248 *e_token++ = *buf_ptr++;
249 *e_token++ = *buf_ptr++;
257 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
258 /* fill_buffer() terminates buffer with newline */
259 if (*buf_ptr == BACKSLASH) {
260 if (*(buf_ptr + 1) == '\n') {
262 if (buf_ptr >= buf_end)
269 *e_token++ = *buf_ptr++;
270 if (buf_ptr >= buf_end)
275 if (s_token[0] == 'L' && s_token[1] == '\0' &&
276 (*buf_ptr == '"' || *buf_ptr == '\''))
279 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
280 if (++buf_ptr >= buf_end)
284 if (state->last_token == structure && !state->p_l_follow) {
285 /* if last token was 'struct' and we're not
286 * in parentheses, then this token
287 * should be treated as a declaration */
288 state->last_u_d = true;
292 * Operator after identifier is binary unless last token was 'struct'
294 state->last_u_d = (state->last_token == structure);
298 sizeof(specials) / sizeof(specials[0]),
301 if (p == NULL) { /* not a special keyword... */
304 /* ... so maybe a type_t or a typedef */
305 if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) &&
306 strcmp(u, "_t") == 0) || (typename_top >= 0 &&
307 bsearch(s_token, typenames, typename_top + 1,
308 sizeof(typenames[0]), strcmp_type))) {
309 state->keyword = 4; /* a type name */
310 state->last_u_d = true;
313 } else { /* we have a keyword */
314 state->keyword = p->rwcode;
315 state->last_u_d = true;
317 case 7: /* it is a switch */
319 case 8: /* a case or default */
322 case 3: /* a "struct" */
324 case 4: /* one of the declaration keywords */
326 if (state->p_l_follow) {
327 /* inside parens: cast, param list, offsetof or sizeof */
328 state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask;
330 if (p != NULL && p->rwcode == 3)
332 if (state->p_l_follow)
336 case 5: /* if, while, for */
339 case 6: /* do, else */
342 case 10: /* storage class specifier */
345 case 11: /* typedef */
348 default: /* all others are treated like any other
351 } /* end of switch */
352 } /* end of if (found_it) */
353 if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 &&
354 state->in_parameter_declaration == 0 && state->block_init == 0) {
357 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
359 strncpy(state->procname, token, sizeof state->procname - 1);
361 state->in_parameter_declaration = 1;
366 * The following hack attempts to guess whether or not the current
367 * token is in fact a declaration keyword -- one that has been
370 else if (!state->p_l_follow && !state->block_init &&
372 ((*buf_ptr == '*' && buf_ptr[1] != '=') ||
373 isalpha((unsigned char)*buf_ptr)) &&
374 (state->last_token == semicolon || state->last_token == lbrace ||
375 state->last_token == rbrace)) {
376 state->keyword = 4; /* a type name */
377 state->last_u_d = true;
380 if (state->last_token == decl) /* if this is a declared variable,
381 * then following sign is unary */
382 state->last_u_d = true; /* will make "int a -1" work */
383 return (ident); /* the ident is not in the list */
384 } /* end of procesing for alpanum character */
386 /* Scan a non-alphanumeric token */
388 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
391 if (++buf_ptr >= buf_end)
396 unary_delim = state->last_u_d;
397 state->last_nl = true; /* remember that we just had a newline */
398 code = (had_eof ? 0 : newline);
401 * if data has been exhausted, the newline is a dummy, and we should
402 * return code to stop
406 case '\'': /* start of quoted character */
407 case '"': /* start of string */
413 e_token = chfont(&bodyf, &stringf, e_token);
415 do { /* copy the string */
416 while (1) { /* move one character or [/<char>]<char> */
417 if (*buf_ptr == '\n') {
418 diag2(1, "Unterminated literal");
421 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
422 * since CHECK_SIZE guarantees that there
423 * are at least 5 entries left */
424 *e_token = *buf_ptr++;
425 if (buf_ptr >= buf_end)
427 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
428 if (*buf_ptr == '\n') /* check for escaped newline */
431 *++e_token = BACKSLASH;
432 if (*buf_ptr == BACKSLASH)
433 *++e_token = BACKSLASH;
435 *++e_token = *buf_ptr++;
436 ++e_token; /* we must increment this again because we
437 * copied two chars */
438 if (buf_ptr >= buf_end)
442 break; /* we copied one character */
443 } /* end of while (1) */
444 } while (*e_token++ != qchar);
446 e_token = chfont(&stringf, &bodyf, e_token - 1);
466 unary_delim = state->last_u_d;
489 * if (state->in_or_st) state->block_init = 1;
491 /* ? code = state->block_init ? lparen : lbrace; */
497 /* ? code = state->block_init ? rparen : rbrace; */
501 case 014: /* a form feed */
502 unary_delim = state->last_u_d;
503 state->last_nl = true; /* remember this so we can set 'state->col_1'
519 case '+': /* check for -, +, --, ++ */
520 code = (state->last_u_d ? unary_op : binary_op);
523 if (*buf_ptr == token[0]) {
524 /* check for doubled character */
525 *e_token++ = *buf_ptr++;
526 /* buffer overflow will be checked at end of loop */
527 if (state->last_token == ident || state->last_token == rparen) {
528 code = (state->last_u_d ? unary_op : postop);
529 /* check for following ++ or -- */
533 else if (*buf_ptr == '=')
534 /* check for operator += */
535 *e_token++ = *buf_ptr++;
536 else if (*buf_ptr == '>') {
537 /* check for operator -> */
538 *e_token++ = *buf_ptr++;
541 state->want_blank = false;
543 break; /* buffer overflow will be checked at end of
548 state->block_init = 1;
550 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
551 e_token[-1] = *buf_ptr++;
552 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
553 *e_token++ = *buf_ptr++;
554 *e_token++ = '='; /* Flip =+ to += */
558 if (*buf_ptr == '=') {/* == */
559 *e_token++ = '='; /* Flip =+ to += */
567 /* can drop thru!!! */
571 case '!': /* ops like <, <<, <=, !=, etc */
572 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
573 *e_token++ = *buf_ptr;
574 if (++buf_ptr >= buf_end)
578 *e_token++ = *buf_ptr++;
579 code = (state->last_u_d ? unary_op : binary_op);
585 if (!state->last_u_d) {
587 *e_token++ = *buf_ptr++;
591 while (*buf_ptr == '*' || isspace((unsigned char)*buf_ptr)) {
593 *e_token++ = *buf_ptr;
594 if (++buf_ptr >= buf_end)
600 while (isalpha((unsigned char)*tp) ||
601 isspace((unsigned char)*tp)) {
606 ps.procname[0] = ' ';
612 if (token[0] == '/' && *buf_ptr == '*') {
613 /* it is start of comment */
616 if (++buf_ptr >= buf_end)
620 unary_delim = state->last_u_d;
623 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
625 * handle ||, &&, etc, and also things as in int *****i
627 *e_token++ = *buf_ptr;
628 if (++buf_ptr >= buf_end)
631 code = (state->last_u_d ? unary_op : binary_op);
635 } /* end of switch */
636 if (buf_ptr >= buf_end) /* check for input buffer empty */
638 state->last_u_d = unary_delim;
639 *e_token = '\0'; /* null terminate the token */
644 alloc_typenames(void)
647 typenames = (const char **)malloc(sizeof(typenames[0]) *
648 (typename_count = 16));
649 if (typenames == NULL)
654 add_typename(const char *key)
659 if (typename_top + 1 >= typename_count) {
660 typenames = realloc((void *)typenames,
661 sizeof(typenames[0]) * (typename_count *= 2));
662 if (typenames == NULL)
665 if (typename_top == -1)
666 typenames[++typename_top] = copy = strdup(key);
667 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) {
668 /* take advantage of sorted input */
669 if (comparison == 0) /* remove duplicates */
671 typenames[++typename_top] = copy = strdup(key);
676 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++)
677 /* find place for the new key */;
678 if (comparison == 0) /* remove duplicates */
680 memmove(&typenames[p + 1], &typenames[p],
681 sizeof(typenames[0]) * (++typename_top - p));
682 typenames[p] = copy = strdup(key);