]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.bin/ctags/C.c
Merge bmake-20230622
[FreeBSD/FreeBSD.git] / usr.bin / ctags / C.c
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1987, 1993, 1994
5  *      The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31
32 #if 0
33 #ifndef lint
34 static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94";
35 #endif
36 #endif
37
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40
41 #include <limits.h>
42 #include <stddef.h>
43 #include <stdio.h>
44 #include <string.h>
45
46 #include "ctags.h"
47
48 static int      func_entry(void);
49 static void     hash_entry(void);
50 static void     skip_string(int);
51 static int      str_entry(int);
52
53 /*
54  * c_entries --
55  *      read .c and .h files and call appropriate routines
56  */
57 void
58 c_entries(void)
59 {
60         int     c;                      /* current character */
61         int     level;                  /* brace level */
62         int     token;                  /* if reading a token */
63         int     t_def;                  /* if reading a typedef */
64         int     t_level;                /* typedef's brace level */
65         char    *sp;                    /* buffer pointer */
66         char    tok[MAXTOKEN];          /* token buffer */
67
68         lineftell = ftell(inf);
69         sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
70         while (GETC(!=, EOF)) {
71                 switch (c) {
72                 /*
73                  * Here's where it DOESN'T handle: {
74                  *      foo(a)
75                  *      {
76                  *      #ifdef notdef
77                  *              }
78                  *      #endif
79                  *              if (a)
80                  *                      puts("hello, world");
81                  *      }
82                  */
83                 case '{':
84                         ++level;
85                         goto endtok;
86                 case '}':
87                         /*
88                          * if level goes below zero, try and fix
89                          * it, even though we've already messed up
90                          */
91                         if (--level < 0)
92                                 level = 0;
93                         goto endtok;
94
95                 case '\n':
96                         SETLINE;
97                         /*
98                          * the above 3 cases are similar in that they
99                          * are special characters that also end tokens.
100                          */
101         endtok:                 if (sp > tok) {
102                                 *sp = EOS;
103                                 token = YES;
104                                 sp = tok;
105                         }
106                         else
107                                 token = NO;
108                         continue;
109
110                 /*
111                  * We ignore quoted strings and character constants
112                  * completely.
113                  */
114                 case '"':
115                 case '\'':
116                         skip_string(c);
117                         break;
118
119                 /*
120                  * comments can be fun; note the state is unchanged after
121                  * return, in case we found:
122                  *      "foo() XX comment XX { int bar; }"
123                  */
124                 case '/':
125                         if (GETC(==, '*') || c == '/') {
126                                 skip_comment(c);
127                                 continue;
128                         }
129                         (void)ungetc(c, inf);
130                         c = '/';
131                         goto storec;
132
133                 /* hash marks flag #define's. */
134                 case '#':
135                         if (sp == tok) {
136                                 hash_entry();
137                                 break;
138                         }
139                         goto storec;
140
141                 /*
142                  * if we have a current token, parenthesis on
143                  * level zero indicates a function.
144                  */
145                 case '(':
146                         if (!level && token) {
147                                 int     curline;
148
149                                 if (sp != tok)
150                                         *sp = EOS;
151                                 /*
152                                  * grab the line immediately, we may
153                                  * already be wrong, for example,
154                                  *      foo\n
155                                  *      (arg1,
156                                  */
157                                 get_line();
158                                 curline = lineno;
159                                 if (func_entry()) {
160                                         ++level;
161                                         pfnote(tok, curline);
162                                 }
163                                 break;
164                         }
165                         goto storec;
166
167                 /*
168                  * semi-colons indicate the end of a typedef; if we find a
169                  * typedef we search for the next semi-colon of the same
170                  * level as the typedef.  Ignoring "structs", they are
171                  * tricky, since you can find:
172                  *
173                  *      "typedef long time_t;"
174                  *      "typedef unsigned int u_int;"
175                  *      "typedef unsigned int u_int [10];"
176                  *
177                  * If looking at a typedef, we save a copy of the last token
178                  * found.  Then, when we find the ';' we take the current
179                  * token if it starts with a valid token name, else we take
180                  * the one we saved.  There's probably some reasonable
181                  * alternative to this...
182                  */
183                 case ';':
184                         if (t_def && level == t_level) {
185                                 t_def = NO;
186                                 get_line();
187                                 if (sp != tok)
188                                         *sp = EOS;
189                                 pfnote(tok, lineno);
190                                 break;
191                         }
192                         goto storec;
193
194                 /*
195                  * store characters until one that can't be part of a token
196                  * comes along; check the current token against certain
197                  * reserved words.
198                  */
199                 default:
200                         /* ignore whitespace */
201                         if (c == ' ' || c == '\t') {
202                                 int save = c;
203                                 while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
204                                         ;
205                                 if (c == EOF)
206                                         return;
207                                 (void)ungetc(c, inf);
208                                 c = save;
209                         }
210         storec:         if (!intoken(c)) {
211                                 if (sp == tok)
212                                         break;
213                                 *sp = EOS;
214                                 if (tflag) {
215                                         /* no typedefs inside typedefs */
216                                         if (!t_def &&
217                                                    !memcmp(tok, "typedef",8)) {
218                                                 t_def = YES;
219                                                 t_level = level;
220                                                 break;
221                                         }
222                                         /* catch "typedef struct" */
223                                         if ((!t_def || t_level < level)
224                                             && (!memcmp(tok, "struct", 7)
225                                             || !memcmp(tok, "union", 6)
226                                             || !memcmp(tok, "enum", 5))) {
227                                                 /*
228                                                  * get line immediately;
229                                                  * may change before '{'
230                                                  */
231                                                 get_line();
232                                                 if (str_entry(c))
233                                                         ++level;
234                                                 break;
235                                                 /* } */
236                                         }
237                                 }
238                                 sp = tok;
239                         }
240                         else if (sp != tok || begtoken(c)) {
241                                 if (sp == tok + sizeof tok - 1)
242                                         /* Too long -- truncate it */
243                                         *sp = EOS;
244                                 else 
245                                         *sp++ = c;
246                                 token = YES;
247                         }
248                         continue;
249                 }
250
251                 sp = tok;
252                 token = NO;
253         }
254 }
255
256 /*
257  * func_entry --
258  *      handle a function reference
259  */
260 static int
261 func_entry(void)
262 {
263         int     c;                      /* current character */
264         int     level = 0;              /* for matching '()' */
265         static char attribute[] = "__attribute__";
266         char    maybe_attribute[sizeof attribute + 1],
267                 *anext;
268
269         /*
270          * Find the end of the assumed function declaration.
271          * Note that ANSI C functions can have type definitions so keep
272          * track of the parentheses nesting level.
273          */
274         while (GETC(!=, EOF)) {
275                 switch (c) {
276                 case '\'':
277                 case '"':
278                         /* skip strings and character constants */
279                         skip_string(c);
280                         break;
281                 case '/':
282                         /* skip comments */
283                         if (GETC(==, '*') || c == '/')
284                                 skip_comment(c);
285                         break;
286                 case '(':
287                         level++;
288                         break;
289                 case ')':
290                         if (level == 0)
291                                 goto fnd;
292                         level--;
293                         break;
294                 case '\n':
295                         SETLINE;
296                 }
297         }
298         return (NO);
299 fnd:
300         /*
301          * we assume that the character after a function's right paren
302          * is a token character if it's a function and a non-token
303          * character if it's a declaration.  Comments don't count...
304          */
305         for (anext = maybe_attribute;;) {
306                 while (GETC(!=, EOF) && iswhite(c))
307                         if (c == '\n')
308                                 SETLINE;
309                 if (c == EOF)
310                         return NO;
311                 /*
312                  * Recognize the gnu __attribute__ extension, which would
313                  * otherwise make the heuristic test DTWT
314                  */
315                 if (anext == maybe_attribute) {
316                         if (intoken(c)) {
317                                 *anext++ = c;
318                                 continue;
319                         }
320                 } else {
321                         if (intoken(c)) {
322                                 if (anext - maybe_attribute 
323                                  < (ptrdiff_t)(sizeof attribute - 1))
324                                         *anext++ = c;
325                                 else    break;
326                                 continue;
327                         } else {
328                                 *anext++ = '\0';
329                                 if (strcmp(maybe_attribute, attribute) == 0) {
330                                         (void)ungetc(c, inf);
331                                         return NO;
332                                 }
333                                 break;
334                         }
335                 }
336                 if (intoken(c) || c == '{')
337                         break;
338                 if (c == '/' && (GETC(==, '*') || c == '/'))
339                         skip_comment(c);
340                 else {                          /* don't ever "read" '/' */
341                         (void)ungetc(c, inf);
342                         return (NO);
343                 }
344         }
345         if (c != '{')
346                 (void)skip_key('{');
347         return (YES);
348 }
349
350 /*
351  * hash_entry --
352  *      handle a line starting with a '#'
353  */
354 static void
355 hash_entry(void)
356 {
357         int     c;                      /* character read */
358         int     curline;                /* line started on */
359         char    *sp;                    /* buffer pointer */
360         char    tok[MAXTOKEN];          /* storage buffer */
361
362         /* ignore leading whitespace */
363         while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
364                 ;
365         (void)ungetc(c, inf);
366
367         curline = lineno;
368         for (sp = tok;;) {              /* get next token */
369                 if (GETC(==, EOF))
370                         return;
371                 if (iswhite(c))
372                         break;
373                 if (sp == tok + sizeof tok - 1)
374                         /* Too long -- truncate it */
375                         *sp = EOS;
376                 else 
377                         *sp++ = c;
378         }
379         *sp = EOS;
380         if (memcmp(tok, "define", 6))   /* only interested in #define's */
381                 goto skip;
382         for (;;) {                      /* this doesn't handle "#define \n" */
383                 if (GETC(==, EOF))
384                         return;
385                 if (!iswhite(c))
386                         break;
387         }
388         for (sp = tok;;) {              /* get next token */
389                 if (sp == tok + sizeof tok - 1)
390                         /* Too long -- truncate it */
391                         *sp = EOS;
392                 else 
393                         *sp++ = c;
394                 if (GETC(==, EOF))
395                         return;
396                 /*
397                  * this is where it DOESN'T handle
398                  * "#define \n"
399                  */
400                 if (!intoken(c))
401                         break;
402         }
403         *sp = EOS;
404         if (dflag || c == '(') {        /* only want macros */
405                 get_line();
406                 pfnote(tok, curline);
407         }
408 skip:   if (c == '\n') {                /* get rid of rest of define */
409                 SETLINE
410                 if (*(sp - 1) != '\\')
411                         return;
412         }
413         (void)skip_key('\n');
414 }
415
416 /*
417  * str_entry --
418  *      handle a struct, union or enum entry
419  */
420 static int
421 str_entry(int c) /* c is current character */
422 {
423         int     curline;                /* line started on */
424         char    *sp;                    /* buffer pointer */
425         char    tok[LINE_MAX];          /* storage buffer */
426
427         curline = lineno;
428         while (iswhite(c))
429                 if (GETC(==, EOF))
430                         return (NO);
431         if (c == '{')           /* it was "struct {" */
432                 return (YES);
433         for (sp = tok;;) {              /* get next token */
434                 if (sp == tok + sizeof tok - 1)
435                         /* Too long -- truncate it */
436                         *sp = EOS;
437                 else 
438                         *sp++ = c;
439                 if (GETC(==, EOF))
440                         return (NO);
441                 if (!intoken(c))
442                         break;
443         }
444         switch (c) {
445                 case '{':               /* it was "struct foo{" */
446                         --sp;
447                         break;
448                 case '\n':              /* it was "struct foo\n" */
449                         SETLINE;
450                         /*FALLTHROUGH*/
451                 default:                /* probably "struct foo " */
452                         while (GETC(!=, EOF))
453                                 if (!iswhite(c))
454                                         break;
455                         if (c != '{') {
456                                 (void)ungetc(c, inf);
457                                 return (NO);
458                         }
459         }
460         *sp = EOS;
461         pfnote(tok, curline);
462         return (YES);
463 }
464
465 /*
466  * skip_comment --
467  *      skip over comment
468  */
469 void
470 skip_comment(int t) /* t is comment character */
471 {
472         int     c;                      /* character read */
473         int     star;                   /* '*' flag */
474
475         for (star = 0; GETC(!=, EOF);)
476                 switch(c) {
477                 /* comments don't nest, nor can they be escaped. */
478                 case '*':
479                         star = YES;
480                         break;
481                 case '/':
482                         if (star && t == '*')
483                                 return;
484                         break;
485                 case '\n':
486                         SETLINE;
487                         if (t == '/')
488                                 return;
489                         /*FALLTHROUGH*/
490                 default:
491                         star = NO;
492                         break;
493                 }
494 }
495
496 /*
497  * skip_string --
498  *      skip to the end of a string or character constant.
499  */
500 void
501 skip_string(int key)
502 {
503         int     c,
504                 skip;
505
506         for (skip = NO; GETC(!=, EOF); )
507                 switch (c) {
508                 case '\\':              /* a backslash escapes anything */
509                         skip = !skip;   /* we toggle in case it's "\\" */
510                         break;
511                 case '\n':
512                         SETLINE;
513                         /*FALLTHROUGH*/
514                 default:
515                         if (c == key && !skip)
516                                 return;
517                         skip = NO;
518                 }
519 }
520
521 /*
522  * skip_key --
523  *      skip to next char "key"
524  */
525 int
526 skip_key(int key)
527 {
528         int     c,
529                 skip,
530                 retval;
531
532         for (skip = retval = NO; GETC(!=, EOF);)
533                 switch(c) {
534                 case '\\':              /* a backslash escapes anything */
535                         skip = !skip;   /* we toggle in case it's "\\" */
536                         break;
537                 case ';':               /* special case for yacc; if one */
538                 case '|':               /* of these chars occurs, we may */
539                         retval = YES;   /* have moved out of the rule */
540                         break;          /* not used by C */
541                 case '\'':
542                 case '"':
543                         /* skip strings and character constants */
544                         skip_string(c);
545                         break;
546                 case '/':
547                         /* skip comments */
548                         if (GETC(==, '*') || c == '/') {
549                                 skip_comment(c);
550                                 break;
551                         }
552                         (void)ungetc(c, inf);
553                         c = '/';
554                         goto norm;
555                 case '\n':
556                         SETLINE;
557                         /*FALLTHROUGH*/
558                 default:
559                 norm:
560                         if (c == key && !skip)
561                                 return (retval);
562                         skip = NO;
563                 }
564         return (retval);
565 }