]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.bin/ctags/C.c
THIS BRANCH IS OBSOLETE, PLEASE READ:
[FreeBSD/FreeBSD.git] / usr.bin / ctags / C.c
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1987, 1993, 1994
5  *      The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31
32 #if 0
33 #ifndef lint
34 static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94";
35 #endif
36 #endif
37
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40
41 #include <limits.h>
42 #include <stdio.h>
43 #include <string.h>
44
45 #include "ctags.h"
46
47 static int      func_entry(void);
48 static void     hash_entry(void);
49 static void     skip_string(int);
50 static int      str_entry(int);
51
52 /*
53  * c_entries --
54  *      read .c and .h files and call appropriate routines
55  */
56 void
57 c_entries(void)
58 {
59         int     c;                      /* current character */
60         int     level;                  /* brace level */
61         int     token;                  /* if reading a token */
62         int     t_def;                  /* if reading a typedef */
63         int     t_level;                /* typedef's brace level */
64         char    *sp;                    /* buffer pointer */
65         char    tok[MAXTOKEN];          /* token buffer */
66
67         lineftell = ftell(inf);
68         sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
69         while (GETC(!=, EOF)) {
70                 switch (c) {
71                 /*
72                  * Here's where it DOESN'T handle: {
73                  *      foo(a)
74                  *      {
75                  *      #ifdef notdef
76                  *              }
77                  *      #endif
78                  *              if (a)
79                  *                      puts("hello, world");
80                  *      }
81                  */
82                 case '{':
83                         ++level;
84                         goto endtok;
85                 case '}':
86                         /*
87                          * if level goes below zero, try and fix
88                          * it, even though we've already messed up
89                          */
90                         if (--level < 0)
91                                 level = 0;
92                         goto endtok;
93
94                 case '\n':
95                         SETLINE;
96                         /*
97                          * the above 3 cases are similar in that they
98                          * are special characters that also end tokens.
99                          */
100         endtok:                 if (sp > tok) {
101                                 *sp = EOS;
102                                 token = YES;
103                                 sp = tok;
104                         }
105                         else
106                                 token = NO;
107                         continue;
108
109                 /*
110                  * We ignore quoted strings and character constants
111                  * completely.
112                  */
113                 case '"':
114                 case '\'':
115                         skip_string(c);
116                         break;
117
118                 /*
119                  * comments can be fun; note the state is unchanged after
120                  * return, in case we found:
121                  *      "foo() XX comment XX { int bar; }"
122                  */
123                 case '/':
124                         if (GETC(==, '*') || c == '/') {
125                                 skip_comment(c);
126                                 continue;
127                         }
128                         (void)ungetc(c, inf);
129                         c = '/';
130                         goto storec;
131
132                 /* hash marks flag #define's. */
133                 case '#':
134                         if (sp == tok) {
135                                 hash_entry();
136                                 break;
137                         }
138                         goto storec;
139
140                 /*
141                  * if we have a current token, parenthesis on
142                  * level zero indicates a function.
143                  */
144                 case '(':
145                         if (!level && token) {
146                                 int     curline;
147
148                                 if (sp != tok)
149                                         *sp = EOS;
150                                 /*
151                                  * grab the line immediately, we may
152                                  * already be wrong, for example,
153                                  *      foo\n
154                                  *      (arg1,
155                                  */
156                                 get_line();
157                                 curline = lineno;
158                                 if (func_entry()) {
159                                         ++level;
160                                         pfnote(tok, curline);
161                                 }
162                                 break;
163                         }
164                         goto storec;
165
166                 /*
167                  * semi-colons indicate the end of a typedef; if we find a
168                  * typedef we search for the next semi-colon of the same
169                  * level as the typedef.  Ignoring "structs", they are
170                  * tricky, since you can find:
171                  *
172                  *      "typedef long time_t;"
173                  *      "typedef unsigned int u_int;"
174                  *      "typedef unsigned int u_int [10];"
175                  *
176                  * If looking at a typedef, we save a copy of the last token
177                  * found.  Then, when we find the ';' we take the current
178                  * token if it starts with a valid token name, else we take
179                  * the one we saved.  There's probably some reasonable
180                  * alternative to this...
181                  */
182                 case ';':
183                         if (t_def && level == t_level) {
184                                 t_def = NO;
185                                 get_line();
186                                 if (sp != tok)
187                                         *sp = EOS;
188                                 pfnote(tok, lineno);
189                                 break;
190                         }
191                         goto storec;
192
193                 /*
194                  * store characters until one that can't be part of a token
195                  * comes along; check the current token against certain
196                  * reserved words.
197                  */
198                 default:
199                         /* ignore whitespace */
200                         if (c == ' ' || c == '\t') {
201                                 int save = c;
202                                 while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
203                                         ;
204                                 if (c == EOF)
205                                         return;
206                                 (void)ungetc(c, inf);
207                                 c = save;
208                         }
209         storec:         if (!intoken(c)) {
210                                 if (sp == tok)
211                                         break;
212                                 *sp = EOS;
213                                 if (tflag) {
214                                         /* no typedefs inside typedefs */
215                                         if (!t_def &&
216                                                    !memcmp(tok, "typedef",8)) {
217                                                 t_def = YES;
218                                                 t_level = level;
219                                                 break;
220                                         }
221                                         /* catch "typedef struct" */
222                                         if ((!t_def || t_level < level)
223                                             && (!memcmp(tok, "struct", 7)
224                                             || !memcmp(tok, "union", 6)
225                                             || !memcmp(tok, "enum", 5))) {
226                                                 /*
227                                                  * get line immediately;
228                                                  * may change before '{'
229                                                  */
230                                                 get_line();
231                                                 if (str_entry(c))
232                                                         ++level;
233                                                 break;
234                                                 /* } */
235                                         }
236                                 }
237                                 sp = tok;
238                         }
239                         else if (sp != tok || begtoken(c)) {
240                                 if (sp == tok + sizeof tok - 1)
241                                         /* Too long -- truncate it */
242                                         *sp = EOS;
243                                 else 
244                                         *sp++ = c;
245                                 token = YES;
246                         }
247                         continue;
248                 }
249
250                 sp = tok;
251                 token = NO;
252         }
253 }
254
255 /*
256  * func_entry --
257  *      handle a function reference
258  */
259 static int
260 func_entry(void)
261 {
262         int     c;                      /* current character */
263         int     level = 0;              /* for matching '()' */
264
265         /*
266          * Find the end of the assumed function declaration.
267          * Note that ANSI C functions can have type definitions so keep
268          * track of the parentheses nesting level.
269          */
270         while (GETC(!=, EOF)) {
271                 switch (c) {
272                 case '\'':
273                 case '"':
274                         /* skip strings and character constants */
275                         skip_string(c);
276                         break;
277                 case '/':
278                         /* skip comments */
279                         if (GETC(==, '*') || c == '/')
280                                 skip_comment(c);
281                         break;
282                 case '(':
283                         level++;
284                         break;
285                 case ')':
286                         if (level == 0)
287                                 goto fnd;
288                         level--;
289                         break;
290                 case '\n':
291                         SETLINE;
292                 }
293         }
294         return (NO);
295 fnd:
296         /*
297          * we assume that the character after a function's right paren
298          * is a token character if it's a function and a non-token
299          * character if it's a declaration.  Comments don't count...
300          */
301         for (;;) {
302                 while (GETC(!=, EOF) && iswhite(c))
303                         if (c == '\n')
304                                 SETLINE;
305                 if (intoken(c) || c == '{')
306                         break;
307                 if (c == '/' && (GETC(==, '*') || c == '/'))
308                         skip_comment(c);
309                 else {                          /* don't ever "read" '/' */
310                         (void)ungetc(c, inf);
311                         return (NO);
312                 }
313         }
314         if (c != '{')
315                 (void)skip_key('{');
316         return (YES);
317 }
318
319 /*
320  * hash_entry --
321  *      handle a line starting with a '#'
322  */
323 static void
324 hash_entry(void)
325 {
326         int     c;                      /* character read */
327         int     curline;                /* line started on */
328         char    *sp;                    /* buffer pointer */
329         char    tok[MAXTOKEN];          /* storage buffer */
330
331         /* ignore leading whitespace */
332         while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
333                 ;
334         (void)ungetc(c, inf);
335
336         curline = lineno;
337         for (sp = tok;;) {              /* get next token */
338                 if (GETC(==, EOF))
339                         return;
340                 if (iswhite(c))
341                         break;
342                 if (sp == tok + sizeof tok - 1)
343                         /* Too long -- truncate it */
344                         *sp = EOS;
345                 else 
346                         *sp++ = c;
347         }
348         *sp = EOS;
349         if (memcmp(tok, "define", 6))   /* only interested in #define's */
350                 goto skip;
351         for (;;) {                      /* this doesn't handle "#define \n" */
352                 if (GETC(==, EOF))
353                         return;
354                 if (!iswhite(c))
355                         break;
356         }
357         for (sp = tok;;) {              /* get next token */
358                 if (sp == tok + sizeof tok - 1)
359                         /* Too long -- truncate it */
360                         *sp = EOS;
361                 else 
362                         *sp++ = c;
363                 if (GETC(==, EOF))
364                         return;
365                 /*
366                  * this is where it DOESN'T handle
367                  * "#define \n"
368                  */
369                 if (!intoken(c))
370                         break;
371         }
372         *sp = EOS;
373         if (dflag || c == '(') {        /* only want macros */
374                 get_line();
375                 pfnote(tok, curline);
376         }
377 skip:   if (c == '\n') {                /* get rid of rest of define */
378                 SETLINE
379                 if (*(sp - 1) != '\\')
380                         return;
381         }
382         (void)skip_key('\n');
383 }
384
385 /*
386  * str_entry --
387  *      handle a struct, union or enum entry
388  */
389 static int
390 str_entry(int c) /* c is current character */
391 {
392         int     curline;                /* line started on */
393         char    *sp;                    /* buffer pointer */
394         char    tok[LINE_MAX];          /* storage buffer */
395
396         curline = lineno;
397         while (iswhite(c))
398                 if (GETC(==, EOF))
399                         return (NO);
400         if (c == '{')           /* it was "struct {" */
401                 return (YES);
402         for (sp = tok;;) {              /* get next token */
403                 if (sp == tok + sizeof tok - 1)
404                         /* Too long -- truncate it */
405                         *sp = EOS;
406                 else 
407                         *sp++ = c;
408                 if (GETC(==, EOF))
409                         return (NO);
410                 if (!intoken(c))
411                         break;
412         }
413         switch (c) {
414                 case '{':               /* it was "struct foo{" */
415                         --sp;
416                         break;
417                 case '\n':              /* it was "struct foo\n" */
418                         SETLINE;
419                         /*FALLTHROUGH*/
420                 default:                /* probably "struct foo " */
421                         while (GETC(!=, EOF))
422                                 if (!iswhite(c))
423                                         break;
424                         if (c != '{') {
425                                 (void)ungetc(c, inf);
426                                 return (NO);
427                         }
428         }
429         *sp = EOS;
430         pfnote(tok, curline);
431         return (YES);
432 }
433
434 /*
435  * skip_comment --
436  *      skip over comment
437  */
438 void
439 skip_comment(int t) /* t is comment character */
440 {
441         int     c;                      /* character read */
442         int     star;                   /* '*' flag */
443
444         for (star = 0; GETC(!=, EOF);)
445                 switch(c) {
446                 /* comments don't nest, nor can they be escaped. */
447                 case '*':
448                         star = YES;
449                         break;
450                 case '/':
451                         if (star && t == '*')
452                                 return;
453                         break;
454                 case '\n':
455                         if (t == '/')
456                                 return;
457                         SETLINE;
458                         /*FALLTHROUGH*/
459                 default:
460                         star = NO;
461                         break;
462                 }
463 }
464
465 /*
466  * skip_string --
467  *      skip to the end of a string or character constant.
468  */
469 void
470 skip_string(int key)
471 {
472         int     c,
473                 skip;
474
475         for (skip = NO; GETC(!=, EOF); )
476                 switch (c) {
477                 case '\\':              /* a backslash escapes anything */
478                         skip = !skip;   /* we toggle in case it's "\\" */
479                         break;
480                 case '\n':
481                         SETLINE;
482                         /*FALLTHROUGH*/
483                 default:
484                         if (c == key && !skip)
485                                 return;
486                         skip = NO;
487                 }
488 }
489
490 /*
491  * skip_key --
492  *      skip to next char "key"
493  */
494 int
495 skip_key(int key)
496 {
497         int     c,
498                 skip,
499                 retval;
500
501         for (skip = retval = NO; GETC(!=, EOF);)
502                 switch(c) {
503                 case '\\':              /* a backslash escapes anything */
504                         skip = !skip;   /* we toggle in case it's "\\" */
505                         break;
506                 case ';':               /* special case for yacc; if one */
507                 case '|':               /* of these chars occurs, we may */
508                         retval = YES;   /* have moved out of the rule */
509                         break;          /* not used by C */
510                 case '\'':
511                 case '"':
512                         /* skip strings and character constants */
513                         skip_string(c);
514                         break;
515                 case '/':
516                         /* skip comments */
517                         if (GETC(==, '*') || c == '/') {
518                                 skip_comment(c);
519                                 break;
520                         }
521                         (void)ungetc(c, inf);
522                         c = '/';
523                         goto norm;
524                 case '\n':
525                         SETLINE;
526                         /*FALLTHROUGH*/
527                 default:
528                 norm:
529                         if (c == key && !skip)
530                                 return (retval);
531                         skip = NO;
532                 }
533         return (retval);
534 }