2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
45 * Here we have the token scanner for indent. It scans off one token and puts
46 * it in the global variable "token". It returns a code, indicating the type
55 #include "indent_globs.h"
56 #include "indent_codes.h"
69 struct templ specials[1000] =
106 { /* this is used to facilitate the decision of
107 * what type (alphanumeric, operator) each
109 0, 0, 0, 0, 0, 0, 0, 0,
110 0, 0, 0, 0, 0, 0, 0, 0,
111 0, 0, 0, 0, 0, 0, 0, 0,
112 0, 0, 0, 0, 0, 0, 0, 0,
113 0, 3, 0, 0, 1, 3, 3, 0,
114 0, 0, 3, 3, 0, 3, 0, 3,
115 1, 1, 1, 1, 1, 1, 1, 1,
116 1, 1, 0, 0, 3, 3, 3, 3,
117 0, 1, 1, 1, 1, 1, 1, 1,
118 1, 1, 1, 1, 1, 1, 1, 1,
119 1, 1, 1, 1, 1, 1, 1, 1,
120 1, 1, 1, 0, 0, 0, 3, 1,
121 0, 1, 1, 1, 1, 1, 1, 1,
122 1, 1, 1, 1, 1, 1, 1, 1,
123 1, 1, 1, 1, 1, 1, 1, 1,
124 1, 1, 1, 0, 3, 0, 3, 0
130 int unary_delim; /* this is set to 1 if the current token
131 * forces a following operator to be unary */
132 static int last_code; /* the last token type returned */
133 static int l_struct; /* set to 1 if the last token was 'struct' */
134 int code; /* internal code to be returned */
135 char qchar; /* the delimiter character for a string */
137 e_token = s_token; /* point to start of place to save token */
139 ps.col_1 = ps.last_nl; /* tell world that this token started in
140 * column 1 iff the last thing scanned was nl */
143 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
144 ps.col_1 = false; /* leading blanks imply token is not in column
146 if (++buf_ptr >= buf_end)
150 /* Scan an alphanumeric token */
151 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
153 * we have a character or number
155 const char *j; /* used for searching thru list of
160 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
162 BASE_2, BASE_8, BASE_10, BASE_16
167 enum base in_base = BASE_10;
169 if (*buf_ptr == '0') {
170 if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B')
172 else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')
174 else if (isdigit(buf_ptr[1]))
179 *e_token++ = *buf_ptr++;
180 *e_token++ = *buf_ptr++;
181 while (*buf_ptr == '0' || *buf_ptr == '1') {
183 *e_token++ = *buf_ptr++;
187 *e_token++ = *buf_ptr++;
188 while (*buf_ptr >= '0' && *buf_ptr <= '8') {
190 *e_token++ = *buf_ptr++;
194 *e_token++ = *buf_ptr++;
195 *e_token++ = *buf_ptr++;
196 while (isxdigit(*buf_ptr)) {
198 *e_token++ = *buf_ptr++;
203 if (*buf_ptr == '.') {
210 *e_token++ = *buf_ptr++;
211 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
212 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
218 *e_token++ = *buf_ptr++;
219 if (*buf_ptr == '+' || *buf_ptr == '-')
220 *e_token++ = *buf_ptr++;
227 if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) {
229 *e_token++ = *buf_ptr++;
233 if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) {
235 if (buf_ptr[1] == buf_ptr[0])
236 *e_token++ = *buf_ptr++;
237 *e_token++ = *buf_ptr++;
245 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
246 /* fill_buffer() terminates buffer with newline */
247 if (*buf_ptr == BACKSLASH) {
248 if (*(buf_ptr + 1) == '\n') {
250 if (buf_ptr >= buf_end)
257 *e_token++ = *buf_ptr++;
258 if (buf_ptr >= buf_end)
263 if (s_token[0] == 'L' && s_token[1] == '\0' &&
264 (*buf_ptr == '"' || *buf_ptr == '\''))
267 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
268 if (++buf_ptr >= buf_end)
272 if (l_struct && !ps.p_l_follow) {
273 /* if last token was 'struct' and we're not
274 * in parentheses, then this token
275 * should be treated as a declaration */
281 ps.last_u_d = l_struct; /* Operator after identifier is binary
282 * unless last token was 'struct' */
284 last_code = ident; /* Remember that this is the code we will
288 const char *q = s_token;
289 size_t q_len = strlen(q);
290 /* Check if we have an "_t" in the end */
292 (strcmp(q + q_len - 2, "_t") == 0)) {
293 ps.keyword = 4; /* a type name */
295 goto found_auto_typedef;
300 * This loop will check if the token is a keyword.
302 for (p = specials; (j = p->rwd) != NULL; p++) {
303 const char *q = s_token; /* point at scanned token */
304 if (*j++ != *q++ || *j++ != *q++)
305 continue; /* This test depends on the fact that
306 * identifiers are always at least 1 character
307 * long (ie. the first two bytes of the
308 * identifier are always meaningful) */
310 break; /* If its a one-character identifier */
313 goto found_keyword; /* I wish that C had a multi-level
316 if (p->rwd) { /* we have a keyword */
318 ps.keyword = p->rwcode;
321 case 7: /* it is a switch */
323 case 8: /* a case or default */
326 case 3: /* a "struct" */
328 * Next time around, we will want to know that we have had a
334 case 4: /* one of the declaration keywords */
337 /* inside parens: cast, param list, offsetof or sizeof */
338 ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.not_cast_mask;
344 case 5: /* if, while, for */
347 case 6: /* do, else */
350 default: /* all others are treated like any other
353 } /* end of switch */
354 } /* end of if (found_it) */
355 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
358 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
360 strncpy(ps.procname, token, sizeof ps.procname - 1);
361 ps.in_parameter_declaration = 1;
366 * The following hack attempts to guess whether or not the current
367 * token is in fact a declaration keyword -- one that has been
370 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
373 && (ps.last_token == rparen || ps.last_token == semicolon ||
374 ps.last_token == decl ||
375 ps.last_token == lbrace || ps.last_token == rbrace)) {
376 ps.keyword = 4; /* a type name */
381 if (last_code == decl) /* if this is a declared variable, then
382 * following sign is unary */
383 ps.last_u_d = true; /* will make "int a -1" work */
385 return (ident); /* the ident is not in the list */
386 } /* end of procesing for alpanum character */
388 /* Scan a non-alphanumeric token */
390 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
393 if (++buf_ptr >= buf_end)
398 unary_delim = ps.last_u_d;
399 ps.last_nl = true; /* remember that we just had a newline */
400 code = (had_eof ? 0 : newline);
403 * if data has been exhausted, the newline is a dummy, and we should
404 * return code to stop
408 case '\'': /* start of quoted character */
409 case '"': /* start of string */
415 e_token = chfont(&bodyf, &stringf, e_token);
417 do { /* copy the string */
418 while (1) { /* move one character or [/<char>]<char> */
419 if (*buf_ptr == '\n') {
420 diag2(1, "Unterminated literal");
423 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
424 * since CHECK_SIZE guarantees that there
425 * are at least 5 entries left */
426 *e_token = *buf_ptr++;
427 if (buf_ptr >= buf_end)
429 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
430 if (*buf_ptr == '\n') /* check for escaped newline */
433 *++e_token = BACKSLASH;
434 if (*buf_ptr == BACKSLASH)
435 *++e_token = BACKSLASH;
437 *++e_token = *buf_ptr++;
438 ++e_token; /* we must increment this again because we
439 * copied two chars */
440 if (buf_ptr >= buf_end)
444 break; /* we copied one character */
445 } /* end of while (1) */
446 } while (*e_token++ != qchar);
448 e_token = chfont(&stringf, &bodyf, e_token - 1);
468 unary_delim = ps.last_u_d;
491 * if (ps.in_or_st) ps.block_init = 1;
493 /* ? code = ps.block_init ? lparen : lbrace; */
499 /* ? code = ps.block_init ? rparen : rbrace; */
503 case 014: /* a form feed */
504 unary_delim = ps.last_u_d;
505 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
521 case '+': /* check for -, +, --, ++ */
522 code = (ps.last_u_d ? unary_op : binary_op);
525 if (*buf_ptr == token[0]) {
526 /* check for doubled character */
527 *e_token++ = *buf_ptr++;
528 /* buffer overflow will be checked at end of loop */
529 if (last_code == ident || last_code == rparen) {
530 code = (ps.last_u_d ? unary_op : postop);
531 /* check for following ++ or -- */
535 else if (*buf_ptr == '=')
536 /* check for operator += */
537 *e_token++ = *buf_ptr++;
538 else if (*buf_ptr == '>') {
539 /* check for operator -> */
540 *e_token++ = *buf_ptr++;
541 if (!pointer_as_binop) {
544 ps.want_blank = false;
547 break; /* buffer overflow will be checked at end of
554 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
555 e_token[-1] = *buf_ptr++;
556 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
557 *e_token++ = *buf_ptr++;
558 *e_token++ = '='; /* Flip =+ to += */
562 if (*buf_ptr == '=') {/* == */
563 *e_token++ = '='; /* Flip =+ to += */
571 /* can drop thru!!! */
575 case '!': /* ops like <, <<, <=, !=, etc */
576 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
577 *e_token++ = *buf_ptr;
578 if (++buf_ptr >= buf_end)
582 *e_token++ = *buf_ptr++;
583 code = (ps.last_u_d ? unary_op : binary_op);
588 if (token[0] == '/' && *buf_ptr == '*') {
589 /* it is start of comment */
592 if (++buf_ptr >= buf_end)
596 unary_delim = ps.last_u_d;
599 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
601 * handle ||, &&, etc, and also things as in int *****i
603 *e_token++ = *buf_ptr;
604 if (++buf_ptr >= buf_end)
607 code = (ps.last_u_d ? unary_op : binary_op);
611 } /* end of switch */
612 if (code != newline) {
616 if (buf_ptr >= buf_end) /* check for input buffer empty */
618 ps.last_u_d = unary_delim;
619 *e_token = '\0'; /* null terminate the token */
624 * Add the given keyword to the keyword table, using val as the keyword type
627 addkey(char *key, int val)
629 struct templ *p = specials;
631 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
635 if (p >= specials + sizeof specials / sizeof specials[0])
636 return; /* For now, table overflows are silently