2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
45 * Here we have the token scanner for indent. It scans off one token and puts
46 * it in the global variable "token". It returns a code, indicating the type
55 #include "indent_globs.h"
56 #include "indent_codes.h"
67 struct templ specials[16384] =
104 { /* this is used to facilitate the decision of
105 * what type (alphanumeric, operator) each
107 0, 0, 0, 0, 0, 0, 0, 0,
108 0, 0, 0, 0, 0, 0, 0, 0,
109 0, 0, 0, 0, 0, 0, 0, 0,
110 0, 0, 0, 0, 0, 0, 0, 0,
111 0, 3, 0, 0, 1, 3, 3, 0,
112 0, 0, 3, 3, 0, 3, 0, 3,
113 1, 1, 1, 1, 1, 1, 1, 1,
114 1, 1, 0, 0, 3, 3, 3, 3,
115 0, 1, 1, 1, 1, 1, 1, 1,
116 1, 1, 1, 1, 1, 1, 1, 1,
117 1, 1, 1, 1, 1, 1, 1, 1,
118 1, 1, 1, 0, 0, 0, 3, 1,
119 0, 1, 1, 1, 1, 1, 1, 1,
120 1, 1, 1, 1, 1, 1, 1, 1,
121 1, 1, 1, 1, 1, 1, 1, 1,
122 1, 1, 1, 0, 3, 0, 3, 0
128 int unary_delim; /* this is set to 1 if the current token
129 * forces a following operator to be unary */
130 static int last_code; /* the last token type returned */
131 static int l_struct; /* set to 1 if the last token was 'struct' */
132 int code; /* internal code to be returned */
133 char qchar; /* the delimiter character for a string */
135 e_token = s_token; /* point to start of place to save token */
137 ps.col_1 = ps.last_nl; /* tell world that this token started in
138 * column 1 iff the last thing scanned was nl */
141 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
142 ps.col_1 = false; /* leading blanks imply token is not in column
144 if (++buf_ptr >= buf_end)
148 /* Scan an alphanumeric token */
149 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
151 * we have a character or number
153 const char *j; /* used for searching thru list of
158 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
162 if (*buf_ptr == '0' &&
163 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
164 *e_token++ = *buf_ptr++;
165 *e_token++ = *buf_ptr++;
166 while (isxdigit(*buf_ptr)) {
168 *e_token++ = *buf_ptr++;
173 if (*buf_ptr == '.') {
180 *e_token++ = *buf_ptr++;
181 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
182 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
188 *e_token++ = *buf_ptr++;
189 if (*buf_ptr == '+' || *buf_ptr == '-')
190 *e_token++ = *buf_ptr++;
195 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
197 *e_token++ = *buf_ptr++;
201 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
203 if (buf_ptr[1] == buf_ptr[0])
204 *e_token++ = *buf_ptr++;
205 *e_token++ = *buf_ptr++;
213 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
214 /* fill_buffer() terminates buffer with newline */
215 if (*buf_ptr == BACKSLASH) {
216 if (*(buf_ptr + 1) == '\n') {
218 if (buf_ptr >= buf_end)
225 *e_token++ = *buf_ptr++;
226 if (buf_ptr >= buf_end)
230 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
231 if (++buf_ptr >= buf_end)
235 if (l_struct && !ps.p_l_follow) {
236 /* if last token was 'struct' and we're not
237 * in parentheses, then this token
238 * should be treated as a declaration */
244 ps.last_u_d = l_struct; /* Operator after identifier is binary
245 * unless last token was 'struct' */
247 last_code = ident; /* Remember that this is the code we will
251 const char *q = s_token;
252 size_t q_len = strlen(q);
253 /* Check if we have an "_t" in the end */
255 (strcmp(q + q_len - 2, "_t") == 0)) {
256 ps.keyword = 4; /* a type name */
258 goto found_auto_typedef;
263 * This loop will check if the token is a keyword.
265 for (p = specials; (j = p->rwd) != NULL; p++) {
266 const char *q = s_token; /* point at scanned token */
267 if (*j++ != *q++ || *j++ != *q++)
268 continue; /* This test depends on the fact that
269 * identifiers are always at least 1 character
270 * long (ie. the first two bytes of the
271 * identifier are always meaningful) */
273 break; /* If its a one-character identifier */
276 goto found_keyword; /* I wish that C had a multi-level
279 if (p->rwd) { /* we have a keyword */
281 ps.keyword = p->rwcode;
284 case 7: /* it is a switch */
286 case 8: /* a case or default */
289 case 3: /* a "struct" */
291 * Next time around, we will want to know that we have had a
297 case 4: /* one of the declaration keywords */
300 /* inside parens: cast, param list, offsetof or sizeof */
301 ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.not_cast_mask;
307 case 5: /* if, while, for */
310 case 6: /* do, else */
313 default: /* all others are treated like any other
316 } /* end of switch */
317 } /* end of if (found_it) */
318 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
321 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
323 strncpy(ps.procname, token, sizeof ps.procname - 1);
324 ps.in_parameter_declaration = 1;
329 * The following hack attempts to guess whether or not the current
330 * token is in fact a declaration keyword -- one that has been
333 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
336 && (ps.last_token == rparen || ps.last_token == semicolon ||
337 ps.last_token == decl ||
338 ps.last_token == lbrace || ps.last_token == rbrace)) {
339 ps.keyword = 4; /* a type name */
344 if (last_code == decl) /* if this is a declared variable, then
345 * following sign is unary */
346 ps.last_u_d = true; /* will make "int a -1" work */
348 return (ident); /* the ident is not in the list */
349 } /* end of procesing for alpanum character */
351 /* Scan a non-alphanumeric token */
353 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
356 if (++buf_ptr >= buf_end)
361 unary_delim = ps.last_u_d;
362 ps.last_nl = true; /* remember that we just had a newline */
363 code = (had_eof ? 0 : newline);
366 * if data has been exhausted, the newline is a dummy, and we should
367 * return code to stop
371 case '\'': /* start of quoted character */
372 case '"': /* start of string */
378 e_token = chfont(&bodyf, &stringf, e_token);
380 do { /* copy the string */
381 while (1) { /* move one character or [/<char>]<char> */
382 if (*buf_ptr == '\n') {
383 diag2(1, "Unterminated literal");
386 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
387 * since CHECK_SIZE guarantees that there
388 * are at least 5 entries left */
389 *e_token = *buf_ptr++;
390 if (buf_ptr >= buf_end)
392 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
393 if (*buf_ptr == '\n') /* check for escaped newline */
396 *++e_token = BACKSLASH;
397 if (*buf_ptr == BACKSLASH)
398 *++e_token = BACKSLASH;
400 *++e_token = *buf_ptr++;
401 ++e_token; /* we must increment this again because we
402 * copied two chars */
403 if (buf_ptr >= buf_end)
407 break; /* we copied one character */
408 } /* end of while (1) */
409 } while (*e_token++ != qchar);
411 e_token = chfont(&stringf, &bodyf, e_token - 1);
431 unary_delim = ps.last_u_d;
454 * if (ps.in_or_st) ps.block_init = 1;
456 /* ? code = ps.block_init ? lparen : lbrace; */
462 /* ? code = ps.block_init ? rparen : rbrace; */
466 case 014: /* a form feed */
467 unary_delim = ps.last_u_d;
468 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
484 case '+': /* check for -, +, --, ++ */
485 code = (ps.last_u_d ? unary_op : binary_op);
488 if (*buf_ptr == token[0]) {
489 /* check for doubled character */
490 *e_token++ = *buf_ptr++;
491 /* buffer overflow will be checked at end of loop */
492 if (last_code == ident || last_code == rparen) {
493 code = (ps.last_u_d ? unary_op : postop);
494 /* check for following ++ or -- */
498 else if (*buf_ptr == '=')
499 /* check for operator += */
500 *e_token++ = *buf_ptr++;
501 else if (*buf_ptr == '>') {
502 /* check for operator -> */
503 *e_token++ = *buf_ptr++;
504 if (!pointer_as_binop) {
507 ps.want_blank = false;
510 break; /* buffer overflow will be checked at end of
517 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
518 e_token[-1] = *buf_ptr++;
519 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
520 *e_token++ = *buf_ptr++;
521 *e_token++ = '='; /* Flip =+ to += */
525 if (*buf_ptr == '=') {/* == */
526 *e_token++ = '='; /* Flip =+ to += */
534 /* can drop thru!!! */
538 case '!': /* ops like <, <<, <=, !=, etc */
539 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
540 *e_token++ = *buf_ptr;
541 if (++buf_ptr >= buf_end)
545 *e_token++ = *buf_ptr++;
546 code = (ps.last_u_d ? unary_op : binary_op);
551 if (token[0] == '/' && *buf_ptr == '*') {
552 /* it is start of comment */
555 if (++buf_ptr >= buf_end)
559 unary_delim = ps.last_u_d;
562 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
564 * handle ||, &&, etc, and also things as in int *****i
566 *e_token++ = *buf_ptr;
567 if (++buf_ptr >= buf_end)
570 code = (ps.last_u_d ? unary_op : binary_op);
574 } /* end of switch */
575 if (code != newline) {
579 if (buf_ptr >= buf_end) /* check for input buffer empty */
581 ps.last_u_d = unary_delim;
582 *e_token = '\0'; /* null terminate the token */
587 * Add the given keyword to the keyword table, using val as the keyword type
590 addkey(char *key, int val)
592 struct templ *p = specials;
594 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
598 if (p >= specials + sizeof(specials) / sizeof(specials[0])) {
599 fprintf(stderr, "indent: typedef table overflow\n");