2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93";
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
45 * Here we have the token scanner for indent. It scans off one token and puts
46 * it in the global variable "token". It returns a code, indicating the type
55 #include "indent_globs.h"
56 #include "indent_codes.h"
67 struct templ specials[1000] =
103 { /* this is used to facilitate the decision of
104 * what type (alphanumeric, operator) each
106 0, 0, 0, 0, 0, 0, 0, 0,
107 0, 0, 0, 0, 0, 0, 0, 0,
108 0, 0, 0, 0, 0, 0, 0, 0,
109 0, 0, 0, 0, 0, 0, 0, 0,
110 0, 3, 0, 0, 1, 3, 3, 0,
111 0, 0, 3, 3, 0, 3, 0, 3,
112 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 1, 0, 0, 3, 3, 3, 3,
114 0, 1, 1, 1, 1, 1, 1, 1,
115 1, 1, 1, 1, 1, 1, 1, 1,
116 1, 1, 1, 1, 1, 1, 1, 1,
117 1, 1, 1, 0, 0, 0, 3, 1,
118 0, 1, 1, 1, 1, 1, 1, 1,
119 1, 1, 1, 1, 1, 1, 1, 1,
120 1, 1, 1, 1, 1, 1, 1, 1,
121 1, 1, 1, 0, 3, 0, 3, 0
127 int unary_delim; /* this is set to 1 if the current token
128 * forces a following operator to be unary */
129 static int last_code; /* the last token type returned */
130 static int l_struct; /* set to 1 if the last token was 'struct' */
131 int code; /* internal code to be returned */
132 char qchar; /* the delimiter character for a string */
134 e_token = s_token; /* point to start of place to save token */
136 ps.col_1 = ps.last_nl; /* tell world that this token started in
137 * column 1 iff the last thing scanned was nl */
140 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
141 ps.col_1 = false; /* leading blanks imply token is not in column
143 if (++buf_ptr >= buf_end)
147 /* Scan an alphanumeric token */
148 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
150 * we have a character or number
152 const char *j; /* used for searching thru list of
157 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) {
161 if (*buf_ptr == '0' &&
162 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
163 *e_token++ = *buf_ptr++;
164 *e_token++ = *buf_ptr++;
165 while (isxdigit(*buf_ptr)) {
167 *e_token++ = *buf_ptr++;
172 if (*buf_ptr == '.') {
179 *e_token++ = *buf_ptr++;
180 if (!isdigit(*buf_ptr) && *buf_ptr != '.') {
181 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
187 *e_token++ = *buf_ptr++;
188 if (*buf_ptr == '+' || *buf_ptr == '-')
189 *e_token++ = *buf_ptr++;
194 if (!(seensfx & 1) &&
195 (*buf_ptr == 'U' || *buf_ptr == 'u')) {
197 *e_token++ = *buf_ptr++;
201 if (!(seensfx & 2) &&
202 (*buf_ptr == 'L' || *buf_ptr == 'l')) {
204 if (buf_ptr[1] == buf_ptr[0])
205 *e_token++ = *buf_ptr++;
206 *e_token++ = *buf_ptr++;
214 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) {
215 /* fill_buffer() terminates buffer with newline */
216 if (*buf_ptr == BACKSLASH) {
217 if (*(buf_ptr + 1) == '\n') {
219 if (buf_ptr >= buf_end)
226 *e_token++ = *buf_ptr++;
227 if (buf_ptr >= buf_end)
232 if (s_token[0] == 'L' && s_token[1] == '\0' &&
233 (*buf_ptr == '"' || *buf_ptr == '\''))
236 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
237 if (++buf_ptr >= buf_end)
240 ps.its_a_keyword = false;
241 ps.sizeof_keyword = false;
242 if (l_struct && !ps.p_l_follow) {
243 /* if last token was 'struct' and we're not
244 * in parentheses, then this token
245 * should be treated as a declaration */
251 ps.last_u_d = l_struct; /* Operator after identifier is binary
252 * unless last token was 'struct' */
254 last_code = ident; /* Remember that this is the code we will
258 const char *q = s_token;
259 size_t q_len = strlen(q);
260 /* Check if we have an "_t" in the end */
262 (strcmp(q + q_len - 2, "_t") == 0)) {
263 ps.its_a_keyword = true;
265 goto found_auto_typedef;
270 * This loop will check if the token is a keyword.
272 for (p = specials; (j = p->rwd) != 0; p++) {
273 const char *q = s_token; /* point at scanned token */
274 if (*j++ != *q++ || *j++ != *q++)
275 continue; /* This test depends on the fact that
276 * identifiers are always at least 1 character
277 * long (ie. the first two bytes of the
278 * identifier are always meaningful) */
280 break; /* If its a one-character identifier */
283 goto found_keyword; /* I wish that C had a multi-level
286 if (p->rwd) { /* we have a keyword */
288 ps.its_a_keyword = true;
291 case 1: /* it is a switch */
293 case 2: /* a case or default */
296 case 3: /* a "struct" */
298 * Next time around, we will want to know that we have had a
304 case 4: /* one of the declaration keywords */
307 ps.cast_mask |= (1 << ps.p_l_follow) & ~ps.sizeof_mask;
308 break; /* inside parens: cast, param list or sizeof */
313 case 5: /* if, while, for */
316 case 6: /* do, else */
320 ps.sizeof_keyword = true;
321 default: /* all others are treated like any other
324 } /* end of switch */
325 } /* end of if (found_it) */
326 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
329 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
331 strncpy(ps.procname, token, sizeof ps.procname - 1);
332 ps.in_parameter_declaration = 1;
337 * The following hack attempts to guess whether or not the current
338 * token is in fact a declaration keyword -- one that has been
341 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
344 && (ps.last_token == rparen || ps.last_token == semicolon ||
345 ps.last_token == decl ||
346 ps.last_token == lbrace || ps.last_token == rbrace)) {
347 ps.its_a_keyword = true;
352 if (last_code == decl) /* if this is a declared variable, then
353 * following sign is unary */
354 ps.last_u_d = true; /* will make "int a -1" work */
356 return (ident); /* the ident is not in the list */
357 } /* end of procesing for alpanum character */
359 /* Scan a non-alphanumeric token */
361 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
364 if (++buf_ptr >= buf_end)
369 unary_delim = ps.last_u_d;
370 ps.last_nl = true; /* remember that we just had a newline */
371 code = (had_eof ? 0 : newline);
374 * if data has been exhausted, the newline is a dummy, and we should
375 * return code to stop
379 case '\'': /* start of quoted character */
380 case '"': /* start of string */
386 e_token = chfont(&bodyf, &stringf, e_token);
388 do { /* copy the string */
389 while (1) { /* move one character or [/<char>]<char> */
390 if (*buf_ptr == '\n') {
391 diag2(1, "Unterminated literal");
394 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
395 * since CHECK_SIZE guarantees that there
396 * are at least 5 entries left */
397 *e_token = *buf_ptr++;
398 if (buf_ptr >= buf_end)
400 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
401 if (*buf_ptr == '\n') /* check for escaped newline */
404 *++e_token = BACKSLASH;
405 if (*buf_ptr == BACKSLASH)
406 *++e_token = BACKSLASH;
408 *++e_token = *buf_ptr++;
409 ++e_token; /* we must increment this again because we
410 * copied two chars */
411 if (buf_ptr >= buf_end)
415 break; /* we copied one character */
416 } /* end of while (1) */
417 } while (*e_token++ != qchar);
419 e_token = chfont(&stringf, &bodyf, e_token - 1);
439 unary_delim = ps.last_u_d;
462 * if (ps.in_or_st) ps.block_init = 1;
464 /* ? code = ps.block_init ? lparen : lbrace; */
470 /* ? code = ps.block_init ? rparen : rbrace; */
474 case 014: /* a form feed */
475 unary_delim = ps.last_u_d;
476 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
492 case '+': /* check for -, +, --, ++ */
493 code = (ps.last_u_d ? unary_op : binary_op);
496 if (*buf_ptr == token[0]) {
497 /* check for doubled character */
498 *e_token++ = *buf_ptr++;
499 /* buffer overflow will be checked at end of loop */
500 if (last_code == ident || last_code == rparen) {
501 code = (ps.last_u_d ? unary_op : postop);
502 /* check for following ++ or -- */
506 else if (*buf_ptr == '=')
507 /* check for operator += */
508 *e_token++ = *buf_ptr++;
509 else if (*buf_ptr == '>') {
510 /* check for operator -> */
511 *e_token++ = *buf_ptr++;
512 if (!pointer_as_binop) {
515 ps.want_blank = false;
518 break; /* buffer overflow will be checked at end of
525 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
526 e_token[-1] = *buf_ptr++;
527 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
528 *e_token++ = *buf_ptr++;
529 *e_token++ = '='; /* Flip =+ to += */
533 if (*buf_ptr == '=') {/* == */
534 *e_token++ = '='; /* Flip =+ to += */
542 /* can drop thru!!! */
546 case '!': /* ops like <, <<, <=, !=, etc */
547 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
548 *e_token++ = *buf_ptr;
549 if (++buf_ptr >= buf_end)
553 *e_token++ = *buf_ptr++;
554 code = (ps.last_u_d ? unary_op : binary_op);
559 if (token[0] == '/' && *buf_ptr == '*') {
560 /* it is start of comment */
563 if (++buf_ptr >= buf_end)
567 unary_delim = ps.last_u_d;
570 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
572 * handle ||, &&, etc, and also things as in int *****i
574 *e_token++ = *buf_ptr;
575 if (++buf_ptr >= buf_end)
578 code = (ps.last_u_d ? unary_op : binary_op);
582 } /* end of switch */
583 if (code != newline) {
587 if (buf_ptr >= buf_end) /* check for input buffer empty */
589 ps.last_u_d = unary_delim;
590 *e_token = '\0'; /* null terminate the token */
595 * Add the given keyword to the keyword table, using val as the keyword type
598 addkey(char *key, int val)
600 struct templ *p = specials;
602 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
606 if (p >= specials + sizeof specials / sizeof specials[0])
607 return; /* For now, table overflows are silently