2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
45 * Here we have the token scanner for indent. It scans off one token and puts
46 * it in the global variable "token". It returns a code, indicating the type
55 #include "indent_globs.h"
56 #include "indent_codes.h"
70 * This table has to be sorted alphabetically, because it'll be used in binary
71 * search. For the same reason, string must be the first thing in struct templ.
73 struct templ specials[] =
109 const char **typenames;
111 int typename_top = -1;
114 { /* this is used to facilitate the decision of
115 * what type (alphanumeric, operator) each
117 0, 0, 0, 0, 0, 0, 0, 0,
118 0, 0, 0, 0, 0, 0, 0, 0,
119 0, 0, 0, 0, 0, 0, 0, 0,
120 0, 0, 0, 0, 0, 0, 0, 0,
121 0, 3, 0, 0, 1, 3, 3, 0,
122 0, 0, 3, 3, 0, 3, 0, 3,
123 1, 1, 1, 1, 1, 1, 1, 1,
124 1, 1, 0, 0, 3, 3, 3, 3,
125 0, 1, 1, 1, 1, 1, 1, 1,
126 1, 1, 1, 1, 1, 1, 1, 1,
127 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 0, 0, 0, 3, 1,
129 0, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1,
131 1, 1, 1, 1, 1, 1, 1, 1,
132 1, 1, 1, 0, 3, 0, 3, 0
136 strcmp_type(const void *e1, const void *e2)
138 return (strcmp(e1, *(const char * const *)e2));
144 int unary_delim; /* this is set to 1 if the current token
145 * forces a following operator to be unary */
146 static int last_code; /* the last token type returned */
147 static int l_struct; /* set to 1 if the last token was 'struct' */
148 int code; /* internal code to be returned */
149 char qchar; /* the delimiter character for a string */
151 e_token = s_token; /* point to start of place to save token */
153 ps.col_1 = ps.last_nl; /* tell world that this token started in
154 * column 1 iff the last thing scanned was nl */
157 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
158 ps.col_1 = false; /* leading blanks imply token is not in column
160 if (++buf_ptr >= buf_end)
164 /* Scan an alphanumeric token */
165 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
167 * we have a character or number
171 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
175 if (*buf_ptr == '0' &&
176 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
177 *e_token++ = *buf_ptr++;
178 *e_token++ = *buf_ptr++;
179 while (isxdigit(*buf_ptr)) {
181 *e_token++ = *buf_ptr++;
186 if (*buf_ptr == '.') {
193 *e_token++ = *buf_ptr++;
194 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
195 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
201 *e_token++ = *buf_ptr++;
202 if (*buf_ptr == '+' || *buf_ptr == '-')
203 *e_token++ = *buf_ptr++;
208 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
210 *e_token++ = *buf_ptr++;
214 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
216 if (buf_ptr[1] == buf_ptr[0])
217 *e_token++ = *buf_ptr++;
218 *e_token++ = *buf_ptr++;
226 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
227 /* fill_buffer() terminates buffer with newline */
228 if (*buf_ptr == BACKSLASH) {
229 if (*(buf_ptr + 1) == '\n') {
231 if (buf_ptr >= buf_end)
238 *e_token++ = *buf_ptr++;
239 if (buf_ptr >= buf_end)
244 if (s_token[0] == 'L' && s_token[1] == '\0' &&
245 (*buf_ptr == '"' || *buf_ptr == '\''))
248 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
249 if (++buf_ptr >= buf_end)
253 if (l_struct && !ps.p_l_follow) {
254 /* if last token was 'struct' and we're not
255 * in parentheses, then this token
256 * should be treated as a declaration */
262 ps.last_u_d = l_struct; /* Operator after identifier is binary
263 * unless last token was 'struct' */
265 last_code = ident; /* Remember that this is the code we will
270 sizeof(specials) / sizeof(specials[0]),
273 if (p == NULL) { /* not a special keyword... */
276 /* ... so maybe a type_t or a typedef */
277 if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) &&
278 strcmp(u, "_t") == 0) || (typename_top >= 0 &&
279 bsearch(s_token, typenames, typename_top + 1,
280 sizeof(typenames[0]), strcmp_type))) {
281 ps.keyword = 4; /* a type name */
285 } else { /* we have a keyword */
286 ps.keyword = p->rwcode;
289 case 7: /* it is a switch */
291 case 8: /* a case or default */
294 case 3: /* a "struct" */
296 * Next time around, we will want to know that we have had a
302 case 4: /* one of the declaration keywords */
305 /* inside parens: cast, param list, offsetof or sizeof */
306 ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.not_cast_mask;
312 case 5: /* if, while, for */
315 case 6: /* do, else */
318 case 10: /* storage class specifier */
321 default: /* all others are treated like any other
324 } /* end of switch */
325 } /* end of if (found_it) */
326 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
329 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
331 strncpy(ps.procname, token, sizeof ps.procname - 1);
333 ps.in_parameter_declaration = 1;
338 * The following hack attempts to guess whether or not the current
339 * token is in fact a declaration keyword -- one that has been
342 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
345 && (ps.last_token == rparen || ps.last_token == semicolon ||
346 ps.last_token == decl ||
347 ps.last_token == lbrace || ps.last_token == rbrace)) {
348 ps.keyword = 4; /* a type name */
353 if (last_code == decl) /* if this is a declared variable, then
354 * following sign is unary */
355 ps.last_u_d = true; /* will make "int a -1" work */
357 return (ident); /* the ident is not in the list */
358 } /* end of procesing for alpanum character */
360 /* Scan a non-alphanumeric token */
362 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
365 if (++buf_ptr >= buf_end)
370 unary_delim = ps.last_u_d;
371 ps.last_nl = true; /* remember that we just had a newline */
372 code = (had_eof ? 0 : newline);
375 * if data has been exhausted, the newline is a dummy, and we should
376 * return code to stop
380 case '\'': /* start of quoted character */
381 case '"': /* start of string */
387 e_token = chfont(&bodyf, &stringf, e_token);
389 do { /* copy the string */
390 while (1) { /* move one character or [/<char>]<char> */
391 if (*buf_ptr == '\n') {
392 diag2(1, "Unterminated literal");
395 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
396 * since CHECK_SIZE guarantees that there
397 * are at least 5 entries left */
398 *e_token = *buf_ptr++;
399 if (buf_ptr >= buf_end)
401 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
402 if (*buf_ptr == '\n') /* check for escaped newline */
405 *++e_token = BACKSLASH;
406 if (*buf_ptr == BACKSLASH)
407 *++e_token = BACKSLASH;
409 *++e_token = *buf_ptr++;
410 ++e_token; /* we must increment this again because we
411 * copied two chars */
412 if (buf_ptr >= buf_end)
416 break; /* we copied one character */
417 } /* end of while (1) */
418 } while (*e_token++ != qchar);
420 e_token = chfont(&stringf, &bodyf, e_token - 1);
440 unary_delim = ps.last_u_d;
463 * if (ps.in_or_st) ps.block_init = 1;
465 /* ? code = ps.block_init ? lparen : lbrace; */
471 /* ? code = ps.block_init ? rparen : rbrace; */
475 case 014: /* a form feed */
476 unary_delim = ps.last_u_d;
477 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
493 case '+': /* check for -, +, --, ++ */
494 code = (ps.last_u_d ? unary_op : binary_op);
497 if (*buf_ptr == token[0]) {
498 /* check for doubled character */
499 *e_token++ = *buf_ptr++;
500 /* buffer overflow will be checked at end of loop */
501 if (last_code == ident || last_code == rparen) {
502 code = (ps.last_u_d ? unary_op : postop);
503 /* check for following ++ or -- */
507 else if (*buf_ptr == '=')
508 /* check for operator += */
509 *e_token++ = *buf_ptr++;
510 else if (*buf_ptr == '>') {
511 /* check for operator -> */
512 *e_token++ = *buf_ptr++;
513 if (!pointer_as_binop) {
516 ps.want_blank = false;
519 break; /* buffer overflow will be checked at end of
526 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
527 e_token[-1] = *buf_ptr++;
528 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
529 *e_token++ = *buf_ptr++;
530 *e_token++ = '='; /* Flip =+ to += */
534 if (*buf_ptr == '=') {/* == */
535 *e_token++ = '='; /* Flip =+ to += */
543 /* can drop thru!!! */
547 case '!': /* ops like <, <<, <=, !=, etc */
548 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
549 *e_token++ = *buf_ptr;
550 if (++buf_ptr >= buf_end)
554 *e_token++ = *buf_ptr++;
555 code = (ps.last_u_d ? unary_op : binary_op);
560 if (token[0] == '/' && *buf_ptr == '*') {
561 /* it is start of comment */
564 if (++buf_ptr >= buf_end)
568 unary_delim = ps.last_u_d;
571 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
573 * handle ||, &&, etc, and also things as in int *****i
575 *e_token++ = *buf_ptr;
576 if (++buf_ptr >= buf_end)
579 code = (ps.last_u_d ? unary_op : binary_op);
583 } /* end of switch */
584 if (code != newline) {
588 if (buf_ptr >= buf_end) /* check for input buffer empty */
590 ps.last_u_d = unary_delim;
591 *e_token = '\0'; /* null terminate the token */
596 alloc_typenames(void)
599 typenames = (const char **)malloc(sizeof(typenames[0]) *
600 (typename_count = 16));
601 if (typenames == NULL)
606 add_typename(const char *key)
611 if (typename_top + 1 >= typename_count) {
612 typenames = realloc((void *)typenames,
613 sizeof(typenames[0]) * (typename_count *= 2));
614 if (typenames == NULL)
617 if (typename_top == -1)
618 typenames[++typename_top] = copy = strdup(key);
619 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) {
620 /* take advantage of sorted input */
621 if (comparison == 0) /* remove duplicates */
623 typenames[++typename_top] = copy = strdup(key);
628 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++)
629 /* find place for the new key */;
630 if (comparison == 0) /* remove duplicates */
632 memmove(&typenames[p + 1], &typenames[p],
633 sizeof(typenames[0]) * (++typename_top - p));
634 typenames[p] = copy = strdup(key);