2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
45 * Here we have the token scanner for indent. It scans off one token and puts
46 * it in the global variable "token". It returns a code, indicating the type
55 #include "indent_globs.h"
56 #include "indent_codes.h"
70 * This table has to be sorted alphabetically, because it'll be used in binary
71 * search. For the same reason, string must be the first thing in struct templ.
73 struct templ specials[] =
109 const char **typenames;
111 int typename_top = -1;
114 { /* this is used to facilitate the decision of
115 * what type (alphanumeric, operator) each
117 0, 0, 0, 0, 0, 0, 0, 0,
118 0, 0, 0, 0, 0, 0, 0, 0,
119 0, 0, 0, 0, 0, 0, 0, 0,
120 0, 0, 0, 0, 0, 0, 0, 0,
121 0, 3, 0, 0, 1, 3, 3, 0,
122 0, 0, 3, 3, 0, 3, 0, 3,
123 1, 1, 1, 1, 1, 1, 1, 1,
124 1, 1, 0, 0, 3, 3, 3, 3,
125 0, 1, 1, 1, 1, 1, 1, 1,
126 1, 1, 1, 1, 1, 1, 1, 1,
127 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 0, 0, 0, 3, 1,
129 0, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 0, 3, 0, 3, 0
136 strcmp_type(const void *e1, const void *e2)
138 return (strcmp(e1, *(const char * const *)e2));
144 int unary_delim; /* this is set to 1 if the current token
145 * forces a following operator to be unary */
146 static int last_code; /* the last token type returned */
147 static int l_struct; /* set to 1 if the last token was 'struct' */
148 int code; /* internal code to be returned */
149 char qchar; /* the delimiter character for a string */
151 e_token = s_token; /* point to start of place to save token */
153 ps.col_1 = ps.last_nl; /* tell world that this token started in
154 * column 1 iff the last thing scanned was nl */
157 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
158 ps.col_1 = false; /* leading blanks imply token is not in column
160 if (++buf_ptr >= buf_end)
164 /* Scan an alphanumeric token */
165 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
167 * we have a character or number
171 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
173 BASE_2, BASE_8, BASE_10, BASE_16
178 enum base in_base = BASE_10;
180 if (*buf_ptr == '0') {
181 if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B')
183 else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')
185 else if (isdigit(buf_ptr[1]))
190 *e_token++ = *buf_ptr++;
191 *e_token++ = *buf_ptr++;
192 while (*buf_ptr == '0' || *buf_ptr == '1') {
194 *e_token++ = *buf_ptr++;
198 *e_token++ = *buf_ptr++;
199 while (*buf_ptr >= '0' && *buf_ptr <= '8') {
201 *e_token++ = *buf_ptr++;
205 *e_token++ = *buf_ptr++;
206 *e_token++ = *buf_ptr++;
207 while (isxdigit(*buf_ptr)) {
209 *e_token++ = *buf_ptr++;
214 if (*buf_ptr == '.') {
221 *e_token++ = *buf_ptr++;
222 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
223 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
229 *e_token++ = *buf_ptr++;
230 if (*buf_ptr == '+' || *buf_ptr == '-')
231 *e_token++ = *buf_ptr++;
238 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
240 *e_token++ = *buf_ptr++;
244 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
246 if (buf_ptr[1] == buf_ptr[0])
247 *e_token++ = *buf_ptr++;
248 *e_token++ = *buf_ptr++;
256 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
257 /* fill_buffer() terminates buffer with newline */
258 if (*buf_ptr == BACKSLASH) {
259 if (*(buf_ptr + 1) == '\n') {
261 if (buf_ptr >= buf_end)
268 *e_token++ = *buf_ptr++;
269 if (buf_ptr >= buf_end)
274 if (s_token[0] == 'L' && s_token[1] == '\0' &&
275 (*buf_ptr == '"' || *buf_ptr == '\''))
278 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
279 if (++buf_ptr >= buf_end)
283 if (l_struct && !ps.p_l_follow) {
284 /* if last token was 'struct' and we're not
285 * in parentheses, then this token
286 * should be treated as a declaration */
292 ps.last_u_d = l_struct; /* Operator after identifier is binary
293 * unless last token was 'struct' */
295 last_code = ident; /* Remember that this is the code we will
300 sizeof(specials) / sizeof(specials[0]),
303 if (p == NULL) { /* not a special keyword... */
306 /* ... so maybe a type_t or a typedef */
307 if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) &&
308 strcmp(u, "_t") == 0) || (typename_top >= 0 &&
309 bsearch(s_token, typenames, typename_top + 1,
310 sizeof(typenames[0]), strcmp_type))) {
311 ps.keyword = 4; /* a type name */
315 } else { /* we have a keyword */
316 ps.keyword = p->rwcode;
319 case 7: /* it is a switch */
321 case 8: /* a case or default */
324 case 3: /* a "struct" */
326 * Next time around, we will want to know that we have had a
332 case 4: /* one of the declaration keywords */
335 /* inside parens: cast, param list, offsetof or sizeof */
336 ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.not_cast_mask;
342 case 5: /* if, while, for */
345 case 6: /* do, else */
348 case 10: /* storage class specifier */
351 default: /* all others are treated like any other
354 } /* end of switch */
355 } /* end of if (found_it) */
356 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0 &&
357 ps.in_parameter_declaration == 0 && ps.block_init == 0) {
360 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
362 strncpy(ps.procname, token, sizeof ps.procname - 1);
364 ps.in_parameter_declaration = 1;
365 return (last_code = funcname);
369 * The following hack attempts to guess whether or not the current
370 * token is in fact a declaration keyword -- one that has been
373 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
376 && (ps.last_token == rparen || ps.last_token == semicolon ||
377 ps.last_token == decl ||
378 ps.last_token == lbrace || ps.last_token == rbrace)) {
379 ps.keyword = 4; /* a type name */
384 if (last_code == decl) /* if this is a declared variable, then
385 * following sign is unary */
386 ps.last_u_d = true; /* will make "int a -1" work */
388 return (ident); /* the ident is not in the list */
389 } /* end of procesing for alpanum character */
391 /* Scan a non-alphanumeric token */
393 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
396 if (++buf_ptr >= buf_end)
401 unary_delim = ps.last_u_d;
402 ps.last_nl = true; /* remember that we just had a newline */
403 code = (had_eof ? 0 : newline);
406 * if data has been exhausted, the newline is a dummy, and we should
407 * return code to stop
411 case '\'': /* start of quoted character */
412 case '"': /* start of string */
418 e_token = chfont(&bodyf, &stringf, e_token);
420 do { /* copy the string */
421 while (1) { /* move one character or [/<char>]<char> */
422 if (*buf_ptr == '\n') {
423 diag2(1, "Unterminated literal");
426 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
427 * since CHECK_SIZE guarantees that there
428 * are at least 5 entries left */
429 *e_token = *buf_ptr++;
430 if (buf_ptr >= buf_end)
432 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
433 if (*buf_ptr == '\n') /* check for escaped newline */
436 *++e_token = BACKSLASH;
437 if (*buf_ptr == BACKSLASH)
438 *++e_token = BACKSLASH;
440 *++e_token = *buf_ptr++;
441 ++e_token; /* we must increment this again because we
442 * copied two chars */
443 if (buf_ptr >= buf_end)
447 break; /* we copied one character */
448 } /* end of while (1) */
449 } while (*e_token++ != qchar);
451 e_token = chfont(&stringf, &bodyf, e_token - 1);
471 unary_delim = ps.last_u_d;
494 * if (ps.in_or_st) ps.block_init = 1;
496 /* ? code = ps.block_init ? lparen : lbrace; */
502 /* ? code = ps.block_init ? rparen : rbrace; */
506 case 014: /* a form feed */
507 unary_delim = ps.last_u_d;
508 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
524 case '+': /* check for -, +, --, ++ */
525 code = (ps.last_u_d ? unary_op : binary_op);
528 if (*buf_ptr == token[0]) {
529 /* check for doubled character */
530 *e_token++ = *buf_ptr++;
531 /* buffer overflow will be checked at end of loop */
532 if (last_code == ident || last_code == rparen) {
533 code = (ps.last_u_d ? unary_op : postop);
534 /* check for following ++ or -- */
538 else if (*buf_ptr == '=')
539 /* check for operator += */
540 *e_token++ = *buf_ptr++;
541 else if (*buf_ptr == '>') {
542 /* check for operator -> */
543 *e_token++ = *buf_ptr++;
544 if (!pointer_as_binop) {
547 ps.want_blank = false;
550 break; /* buffer overflow will be checked at end of
557 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
558 e_token[-1] = *buf_ptr++;
559 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
560 *e_token++ = *buf_ptr++;
561 *e_token++ = '='; /* Flip =+ to += */
565 if (*buf_ptr == '=') {/* == */
566 *e_token++ = '='; /* Flip =+ to += */
574 /* can drop thru!!! */
578 case '!': /* ops like <, <<, <=, !=, etc */
579 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
580 *e_token++ = *buf_ptr;
581 if (++buf_ptr >= buf_end)
585 *e_token++ = *buf_ptr++;
586 code = (ps.last_u_d ? unary_op : binary_op);
591 if (token[0] == '/' && *buf_ptr == '*') {
592 /* it is start of comment */
595 if (++buf_ptr >= buf_end)
599 unary_delim = ps.last_u_d;
602 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
604 * handle ||, &&, etc, and also things as in int *****i
606 *e_token++ = *buf_ptr;
607 if (++buf_ptr >= buf_end)
610 code = (ps.last_u_d ? unary_op : binary_op);
614 } /* end of switch */
615 if (code != newline) {
619 if (buf_ptr >= buf_end) /* check for input buffer empty */
621 ps.last_u_d = unary_delim;
622 *e_token = '\0'; /* null terminate the token */
627 alloc_typenames(void)
630 typenames = (const char **)malloc(sizeof(typenames[0]) *
631 (typename_count = 16));
632 if (typenames == NULL)
637 add_typename(const char *key)
642 if (typename_top + 1 >= typename_count) {
643 typenames = realloc((void *)typenames,
644 sizeof(typenames[0]) * (typename_count *= 2));
645 if (typenames == NULL)
648 if (typename_top == -1)
649 typenames[++typename_top] = copy = strdup(key);
650 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) {
651 /* take advantage of sorted input */
652 if (comparison == 0) /* remove duplicates */
654 typenames[++typename_top] = copy = strdup(key);
659 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++)
660 /* find place for the new key */;
661 if (comparison == 0) /* remove duplicates */
663 memmove(&typenames[p + 1], &typenames[p],
664 sizeof(typenames[0]) * (++typename_top - p));
665 typenames[p] = copy = strdup(key);