2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
45 * Here we have the token scanner for indent. It scans off one token and puts
46 * it in the global variable "token". It returns a code, indicating the type
55 #include "indent_globs.h"
56 #include "indent_codes.h"
68 * This table has to be sorted alphabetically, because it'll be used in binary
69 * search. For the same reason, string must be the first thing in struct templ.
71 struct templ specials[] =
106 const char **typenames;
108 int typename_top = -1;
111 { /* this is used to facilitate the decision of
112 * what type (alphanumeric, operator) each
114 0, 0, 0, 0, 0, 0, 0, 0,
115 0, 0, 0, 0, 0, 0, 0, 0,
116 0, 0, 0, 0, 0, 0, 0, 0,
117 0, 0, 0, 0, 0, 0, 0, 0,
118 0, 3, 0, 0, 1, 3, 3, 0,
119 0, 0, 3, 3, 0, 3, 0, 3,
120 1, 1, 1, 1, 1, 1, 1, 1,
121 1, 1, 0, 0, 3, 3, 3, 3,
122 0, 1, 1, 1, 1, 1, 1, 1,
123 1, 1, 1, 1, 1, 1, 1, 1,
124 1, 1, 1, 1, 1, 1, 1, 1,
125 1, 1, 1, 0, 0, 0, 3, 1,
126 0, 1, 1, 1, 1, 1, 1, 1,
127 1, 1, 1, 1, 1, 1, 1, 1,
128 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 0, 3, 0, 3, 0
133 strcmp_type(const void *e1, const void *e2)
135 return (strcmp(e1, *(const char * const *)e2));
141 int unary_delim; /* this is set to 1 if the current token
142 * forces a following operator to be unary */
143 static int last_code; /* the last token type returned */
144 static int l_struct; /* set to 1 if the last token was 'struct' */
145 int code; /* internal code to be returned */
146 char qchar; /* the delimiter character for a string */
148 e_token = s_token; /* point to start of place to save token */
150 ps.col_1 = ps.last_nl; /* tell world that this token started in
151 * column 1 iff the last thing scanned was nl */
154 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
155 ps.col_1 = false; /* leading blanks imply token is not in column
157 if (++buf_ptr >= buf_end)
161 /* Scan an alphanumeric token */
162 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
164 * we have a character or number
168 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
172 if (*buf_ptr == '0' &&
173 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
174 *e_token++ = *buf_ptr++;
175 *e_token++ = *buf_ptr++;
176 while (isxdigit(*buf_ptr)) {
178 *e_token++ = *buf_ptr++;
183 if (*buf_ptr == '.') {
190 *e_token++ = *buf_ptr++;
191 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
192 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
198 *e_token++ = *buf_ptr++;
199 if (*buf_ptr == '+' || *buf_ptr == '-')
200 *e_token++ = *buf_ptr++;
205 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
207 *e_token++ = *buf_ptr++;
211 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
213 if (buf_ptr[1] == buf_ptr[0])
214 *e_token++ = *buf_ptr++;
215 *e_token++ = *buf_ptr++;
223 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
224 /* fill_buffer() terminates buffer with newline */
225 if (*buf_ptr == BACKSLASH) {
226 if (*(buf_ptr + 1) == '\n') {
228 if (buf_ptr >= buf_end)
235 *e_token++ = *buf_ptr++;
236 if (buf_ptr >= buf_end)
240 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
241 if (++buf_ptr >= buf_end)
245 if (l_struct && !ps.p_l_follow) {
246 /* if last token was 'struct' and we're not
247 * in parentheses, then this token
248 * should be treated as a declaration */
254 ps.last_u_d = l_struct; /* Operator after identifier is binary
255 * unless last token was 'struct' */
257 last_code = ident; /* Remember that this is the code we will
262 sizeof(specials) / sizeof(specials[0]),
265 if (p == NULL) { /* not a special keyword... */
268 /* ... so maybe a type_t or a typedef */
269 if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) &&
270 strcmp(u, "_t") == 0) || (typename_top >= 0 &&
271 bsearch(s_token, typenames, typename_top + 1,
272 sizeof(typenames[0]), strcmp_type))) {
273 ps.keyword = 4; /* a type name */
277 } else { /* we have a keyword */
278 ps.keyword = p->rwcode;
281 case 7: /* it is a switch */
283 case 8: /* a case or default */
286 case 3: /* a "struct" */
288 * Next time around, we will want to know that we have had a
294 case 4: /* one of the declaration keywords */
297 /* inside parens: cast, param list, offsetof or sizeof */
298 ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.not_cast_mask;
304 case 5: /* if, while, for */
307 case 6: /* do, else */
310 default: /* all others are treated like any other
313 } /* end of switch */
314 } /* end of if (found_it) */
315 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
318 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
320 strncpy(ps.procname, token, sizeof ps.procname - 1);
321 ps.in_parameter_declaration = 1;
326 * The following hack attempts to guess whether or not the current
327 * token is in fact a declaration keyword -- one that has been
330 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
333 && (ps.last_token == rparen || ps.last_token == semicolon ||
334 ps.last_token == decl ||
335 ps.last_token == lbrace || ps.last_token == rbrace)) {
336 ps.keyword = 4; /* a type name */
341 if (last_code == decl) /* if this is a declared variable, then
342 * following sign is unary */
343 ps.last_u_d = true; /* will make "int a -1" work */
345 return (ident); /* the ident is not in the list */
346 } /* end of procesing for alpanum character */
348 /* Scan a non-alphanumeric token */
350 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
353 if (++buf_ptr >= buf_end)
358 unary_delim = ps.last_u_d;
359 ps.last_nl = true; /* remember that we just had a newline */
360 code = (had_eof ? 0 : newline);
363 * if data has been exhausted, the newline is a dummy, and we should
364 * return code to stop
368 case '\'': /* start of quoted character */
369 case '"': /* start of string */
375 e_token = chfont(&bodyf, &stringf, e_token);
377 do { /* copy the string */
378 while (1) { /* move one character or [/<char>]<char> */
379 if (*buf_ptr == '\n') {
380 diag2(1, "Unterminated literal");
383 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
384 * since CHECK_SIZE guarantees that there
385 * are at least 5 entries left */
386 *e_token = *buf_ptr++;
387 if (buf_ptr >= buf_end)
389 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
390 if (*buf_ptr == '\n') /* check for escaped newline */
393 *++e_token = BACKSLASH;
394 if (*buf_ptr == BACKSLASH)
395 *++e_token = BACKSLASH;
397 *++e_token = *buf_ptr++;
398 ++e_token; /* we must increment this again because we
399 * copied two chars */
400 if (buf_ptr >= buf_end)
404 break; /* we copied one character */
405 } /* end of while (1) */
406 } while (*e_token++ != qchar);
408 e_token = chfont(&stringf, &bodyf, e_token - 1);
428 unary_delim = ps.last_u_d;
451 * if (ps.in_or_st) ps.block_init = 1;
453 /* ? code = ps.block_init ? lparen : lbrace; */
459 /* ? code = ps.block_init ? rparen : rbrace; */
463 case 014: /* a form feed */
464 unary_delim = ps.last_u_d;
465 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
481 case '+': /* check for -, +, --, ++ */
482 code = (ps.last_u_d ? unary_op : binary_op);
485 if (*buf_ptr == token[0]) {
486 /* check for doubled character */
487 *e_token++ = *buf_ptr++;
488 /* buffer overflow will be checked at end of loop */
489 if (last_code == ident || last_code == rparen) {
490 code = (ps.last_u_d ? unary_op : postop);
491 /* check for following ++ or -- */
495 else if (*buf_ptr == '=')
496 /* check for operator += */
497 *e_token++ = *buf_ptr++;
498 else if (*buf_ptr == '>') {
499 /* check for operator -> */
500 *e_token++ = *buf_ptr++;
501 if (!pointer_as_binop) {
504 ps.want_blank = false;
507 break; /* buffer overflow will be checked at end of
514 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
515 e_token[-1] = *buf_ptr++;
516 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
517 *e_token++ = *buf_ptr++;
518 *e_token++ = '='; /* Flip =+ to += */
522 if (*buf_ptr == '=') {/* == */
523 *e_token++ = '='; /* Flip =+ to += */
531 /* can drop thru!!! */
535 case '!': /* ops like <, <<, <=, !=, etc */
536 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
537 *e_token++ = *buf_ptr;
538 if (++buf_ptr >= buf_end)
542 *e_token++ = *buf_ptr++;
543 code = (ps.last_u_d ? unary_op : binary_op);
548 if (token[0] == '/' && *buf_ptr == '*') {
549 /* it is start of comment */
552 if (++buf_ptr >= buf_end)
556 unary_delim = ps.last_u_d;
559 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
561 * handle ||, &&, etc, and also things as in int *****i
563 *e_token++ = *buf_ptr;
564 if (++buf_ptr >= buf_end)
567 code = (ps.last_u_d ? unary_op : binary_op);
571 } /* end of switch */
572 if (code != newline) {
576 if (buf_ptr >= buf_end) /* check for input buffer empty */
578 ps.last_u_d = unary_delim;
579 *e_token = '\0'; /* null terminate the token */
584 alloc_typenames(void)
587 typenames = (const char **)malloc(sizeof(typenames[0]) *
588 (typename_count = 16));
589 if (typenames == NULL)
594 add_typename(const char *key)
599 if (typename_top + 1 >= typename_count) {
600 typenames = realloc((void *)typenames,
601 sizeof(typenames[0]) * (typename_count *= 2));
602 if (typenames == NULL)
605 if (typename_top == -1)
606 typenames[++typename_top] = copy = strdup(key);
607 else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) {
608 /* take advantage of sorted input */
609 if (comparison == 0) /* remove duplicates */
611 typenames[++typename_top] = copy = strdup(key);
616 for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++)
617 /* find place for the new key */;
618 if (comparison == 0) /* remove duplicates */
620 memmove(&typenames[p + 1], &typenames[p],
621 sizeof(typenames[0]) * (++typename_top - p));
622 typenames[p] = copy = strdup(key);