2 * SPDX-License-Identifier: BSD-4-Clause
4 * Copyright (c) 1985 Sun Microsystems, Inc.
5 * Copyright (c) 1980, 1993
6 * The Regents of the University of California. All rights reserved.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
43 #include <sys/cdefs.h>
44 __FBSDID("$FreeBSD$");
47 * Here we have the token scanner for indent. It scans off one token and puts
48 * it in the global variable "token". It returns a code, indicating the type
57 #include "indent_globs.h"
58 #include "indent_codes.h"
72 * This table has to be sorted alphabetically, because it'll be used in binary
73 * search. For the same reason, string must be the first thing in struct templ.
75 struct templ specials[] =
111 const char **typenames;
113 int typename_top = -1;
116 { /* this is used to facilitate the decision of
117 * what type (alphanumeric, operator) each
119 0, 0, 0, 0, 0, 0, 0, 0,
120 0, 0, 0, 0, 0, 0, 0, 0,
121 0, 0, 0, 0, 0, 0, 0, 0,
122 0, 0, 0, 0, 0, 0, 0, 0,
123 0, 3, 0, 0, 1, 3, 3, 0,
124 0, 0, 3, 3, 0, 3, 0, 3,
125 1, 1, 1, 1, 1, 1, 1, 1,
126 1, 1, 0, 0, 3, 3, 3, 3,
127 0, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 0, 0, 0, 3, 1,
131 0, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 1, 1, 1, 1, 1,
133 1, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 0, 3, 0, 3, 0
138 strcmp_type(const void *e1, const void *e2)
140 return (strcmp(e1, *(const char * const *)e2));
146 int unary_delim; /* this is set to 1 if the current token
147 * forces a following operator to be unary */
148 static int last_code; /* the last token type returned */
149 static int l_struct; /* set to 1 if the last token was 'struct' */
150 int code; /* internal code to be returned */
151 char qchar; /* the delimiter character for a string */
153 e_token = s_token; /* point to start of place to save token */
155 ps.col_1 = ps.last_nl; /* tell world that this token started in
156 * column 1 iff the last thing scanned was nl */
159 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
160 ps.col_1 = false; /* leading blanks imply token is not in column
162 if (++buf_ptr >= buf_end)
166 /* Scan an alphanumeric token */
167 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
169 * we have a character or number
173 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
175 BASE_2, BASE_8, BASE_10, BASE_16
180 enum base in_base = BASE_10;
182 if (*buf_ptr == '0') {
183 if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B')
185 else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')
187 else if (isdigit(buf_ptr[1]))
192 *e_token++ = *buf_ptr++;
193 *e_token++ = *buf_ptr++;
194 while (*buf_ptr == '0' || *buf_ptr == '1') {
196 *e_token++ = *buf_ptr++;
200 *e_token++ = *buf_ptr++;
201 while (*buf_ptr >= '0' && *buf_ptr <= '8') {
203 *e_token++ = *buf_ptr++;
207 *e_token++ = *buf_ptr++;
208 *e_token++ = *buf_ptr++;
209 while (isxdigit(*buf_ptr)) {
211 *e_token++ = *buf_ptr++;
216 if (*buf_ptr == '.') {
223 *e_token++ = *buf_ptr++;
224 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
225 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
231 *e_token++ = *buf_ptr++;
232 if (*buf_ptr == '+' || *buf_ptr == '-')
233 *e_token++ = *buf_ptr++;
240 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
242 *e_token++ = *buf_ptr++;
246 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
248 if (buf_ptr[1] == buf_ptr[0])
249 *e_token++ = *buf_ptr++;
250 *e_token++ = *buf_ptr++;
258 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
259 /* fill_buffer() terminates buffer with newline */
260 if (*buf_ptr == BACKSLASH) {
261 if (*(buf_ptr + 1) == '\n') {
263 if (buf_ptr >= buf_end)
270 *e_token++ = *buf_ptr++;
271 if (buf_ptr >= buf_end)
276 if (s_token[0] == 'L' && s_token[1] == '\0' &&
277 (*buf_ptr == '"' || *buf_ptr == '\''))
280 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
281 if (++buf_ptr >= buf_end)
285 if (l_struct && !ps.p_l_follow) {
286 /* if last token was 'struct' and we're not
287 * in parentheses, then this token
288 * should be treated as a declaration */
294 ps.last_u_d = l_struct; /* Operator after identifier is binary
295 * unless last token was 'struct' */
297 last_code = ident; /* Remember that this is the code we will
302 sizeof(specials) / sizeof(specials[0]),
305 if (p == NULL) { /* not a special keyword... */
308 /* ... so maybe a type_t or a typedef */
309 if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) &&
310 strcmp(u, "_t") == 0) || (typename_top >= 0 &&
311 bsearch(s_token, typenames, typename_top + 1,
312 sizeof(typenames[0]), strcmp_type))) {
313 ps.keyword = 4; /* a type name */
317 } else { /* we have a keyword */
318 ps.keyword = p->rwcode;
321 case 7: /* it is a switch */
323 case 8: /* a case or default */
326 case 3: /* a "struct" */
328 * Next time around, we will want to know that we have had a
334 case 4: /* one of the declaration keywords */
337 /* inside parens: cast, param list, offsetof or sizeof */
338 ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.not_cast_mask;
344 case 5: /* if, while, for */
347 case 6: /* do, else */
350 case 10: /* storage class specifier */
353 default: /* all others are treated like any other
356 } /* end of switch */
357 } /* end of if (found_it) */
358 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0 &&
359 ps.in_parameter_declaration == 0 && ps.block_init == 0) {
362 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
364 strncpy(ps.procname, token, sizeof ps.procname - 1);
366 ps.in_parameter_declaration = 1;
367 return (last_code = funcname);
371 * The following hack attempts to guess whether or not the current
372 * token is in fact a declaration keyword -- one that has been
375 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
378 && (ps.last_token == rparen || ps.last_token == semicolon ||
379 ps.last_token == decl ||
380 ps.last_token == lbrace || ps.last_token == rbrace)) {
381 ps.keyword = 4; /* a type name */
386 if (last_code == decl) /* if this is a declared variable, then
387 * following sign is unary */
388 ps.last_u_d = true; /* will make "int a -1" work */
390 return (ident); /* the ident is not in the list */
391 } /* end of procesing for alpanum character */
393 /* Scan a non-alphanumeric token */
395 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
398 if (++buf_ptr >= buf_end)
403 unary_delim = ps.last_u_d;
404 ps.last_nl = true; /* remember that we just had a newline */
405 code = (had_eof ? 0 : newline);
408 * if data has been exhausted, the newline is a dummy, and we should
409 * return code to stop
413 case '\'': /* start of quoted character */
414 case '"': /* start of string */
420 e_token = chfont(&bodyf, &stringf, e_token);
422 do { /* copy the string */
423 while (1) { /* move one character or [/<char>]<char> */
424 if (*buf_ptr == '\n') {
425 diag2(1, "Unterminated literal");
428 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
429 * since CHECK_SIZE guarantees that there
430 * are at least 5 entries left */
431 *e_token = *buf_ptr++;
432 if (buf_ptr >= buf_end)
434 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
435 if (*buf_ptr == '\n') /* check for escaped newline */
438 *++e_token = BACKSLASH;
439 if (*buf_ptr == BACKSLASH)
440 *++e_token = BACKSLASH;
442 *++e_token = *buf_ptr++;
443 ++e_token; /* we must increment this again because we
444 * copied two chars */
445 if (buf_ptr >= buf_end)
449 break; /* we copied one character */
450 } /* end of while (1) */
451 } while (*e_token++ != qchar);
453 e_token = chfont(&stringf, &bodyf, e_token - 1);
473 unary_delim = ps.last_u_d;
496 * if (ps.in_or_st) ps.block_init = 1;
498 /* ? code = ps.block_init ? lparen : lbrace; */
504 /* ? code = ps.block_init ? rparen : rbrace; */
508 case 014: /* a form feed */
509 unary_delim = ps.last_u_d;
510 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
526 case '+': /* check for -, +, --, ++ */
527 code = (ps.last_u_d ? unary_op : binary_op);
530 if (*buf_ptr == token[0]) {
531 /* check for doubled character */
532 *e_token++ = *buf_ptr++;
533 /* buffer overflow will be checked at end of loop */
534 if (last_code == ident || last_code == rparen) {
535 code = (ps.last_u_d ? unary_op : postop);
536 /* check for following ++ or -- */
540 else if (*buf_ptr == '=')
541 /* check for operator += */
542 *e_token++ = *buf_ptr++;
543 else if (*buf_ptr == '>') {
544 /* check for operator -> */
545 *e_token++ = *buf_ptr++;
546 if (!pointer_as_binop) {
549 ps.want_blank = false;
552 break; /* buffer overflow will be checked at end of
559 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
560 e_token[-1] = *buf_ptr++;
561 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
562 *e_token++ = *buf_ptr++;
563 *e_token++ = '='; /* Flip =+ to += */
567 if (*buf_ptr == '=') {/* == */
568 *e_token++ = '='; /* Flip =+ to += */
576 /* can drop thru!!! */
580 case '!': /* ops like <, <<, <=, !=, etc */
581 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
582 *e_token++ = *buf_ptr;
583 if (++buf_ptr >= buf_end)
587 *e_token++ = *buf_ptr++;
588 code = (ps.last_u_d ? unary_op : binary_op);
593 if (token[0] == '/' && *buf_ptr == '*') {
594 /* it is start of comment */
597 if (++buf_ptr >= buf_end)
601 unary_delim = ps.last_u_d;
604 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
606 * handle ||, &&, etc, and also things as in int *****i
608 *e_token++ = *buf_ptr;
609 if (++buf_ptr >= buf_end)
612 code = (ps.last_u_d ? unary_op : binary_op);
616 } /* end of switch */
617 if (code != newline) {
621 if (buf_ptr >= buf_end) /* check for input buffer empty */
623 ps.last_u_d = unary_delim;
624 *e_token = '\0'; /* null terminate the token */
629 alloc_typenames(void)
632 typenames = (const char **)malloc(sizeof(typenames[0]) *
633 (typename_count = 16));
634 if (typenames == NULL)
639 add_typename(const char *key)
644 if (typename_top + 1 >= typename_count) {
645 typenames = realloc((void *)typenames,
646 sizeof(typenames[0]) * (typename_count *= 2));
647 if (typenames == NULL)
650 if (typename_top == -1)
651 typenames[++typename_top] = copy = strdup(key);
652 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) {
653 /* take advantage of sorted input */
654 if (comparison == 0) /* remove duplicates */
656 typenames[++typename_top] = copy = strdup(key);
661 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++)
662 /* find place for the new key */;
663 if (comparison == 0) /* remove duplicates */
665 memmove(&typenames[p + 1], &typenames[p],
666 sizeof(typenames[0]) * (++typename_top - p));
667 typenames[p] = copy = strdup(key);