]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - usr.bin/ctags/C.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / usr.bin / ctags / C.c
1 /*
2  * Copyright (c) 1987, 1993, 1994
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #if 0
31 #ifndef lint
32 static char sccsid[] = "@(#)C.c 8.4 (Berkeley) 4/2/94";
33 #endif
34 #endif
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38
39 #include <limits.h>
40 #include <stdio.h>
41 #include <string.h>
42
43 #include "ctags.h"
44
45 static int      func_entry(void);
46 static void     hash_entry(void);
47 static void     skip_string(int);
48 static int      str_entry(int);
49
50 /*
51  * c_entries --
52  *      read .c and .h files and call appropriate routines
53  */
54 void
55 c_entries(void)
56 {
57         int     c;                      /* current character */
58         int     level;                  /* brace level */
59         int     token;                  /* if reading a token */
60         int     t_def;                  /* if reading a typedef */
61         int     t_level;                /* typedef's brace level */
62         char    *sp;                    /* buffer pointer */
63         char    tok[MAXTOKEN];          /* token buffer */
64
65         lineftell = ftell(inf);
66         sp = tok; token = t_def = NO; t_level = -1; level = 0; lineno = 1;
67         while (GETC(!=, EOF)) {
68                 switch (c) {
69                 /*
70                  * Here's where it DOESN'T handle: {
71                  *      foo(a)
72                  *      {
73                  *      #ifdef notdef
74                  *              }
75                  *      #endif
76                  *              if (a)
77                  *                      puts("hello, world");
78                  *      }
79                  */
80                 case '{':
81                         ++level;
82                         goto endtok;
83                 case '}':
84                         /*
85                          * if level goes below zero, try and fix
86                          * it, even though we've already messed up
87                          */
88                         if (--level < 0)
89                                 level = 0;
90                         goto endtok;
91
92                 case '\n':
93                         SETLINE;
94                         /*
95                          * the above 3 cases are similar in that they
96                          * are special characters that also end tokens.
97                          */
98         endtok:                 if (sp > tok) {
99                                 *sp = EOS;
100                                 token = YES;
101                                 sp = tok;
102                         }
103                         else
104                                 token = NO;
105                         continue;
106
107                 /*
108                  * We ignore quoted strings and character constants
109                  * completely.
110                  */
111                 case '"':
112                 case '\'':
113                         skip_string(c);
114                         break;
115
116                 /*
117                  * comments can be fun; note the state is unchanged after
118                  * return, in case we found:
119                  *      "foo() XX comment XX { int bar; }"
120                  */
121                 case '/':
122                         if (GETC(==, '*') || c == '/') {
123                                 skip_comment(c);
124                                 continue;
125                         }
126                         (void)ungetc(c, inf);
127                         c = '/';
128                         goto storec;
129
130                 /* hash marks flag #define's. */
131                 case '#':
132                         if (sp == tok) {
133                                 hash_entry();
134                                 break;
135                         }
136                         goto storec;
137
138                 /*
139                  * if we have a current token, parenthesis on
140                  * level zero indicates a function.
141                  */
142                 case '(':
143                         if (!level && token) {
144                                 int     curline;
145
146                                 if (sp != tok)
147                                         *sp = EOS;
148                                 /*
149                                  * grab the line immediately, we may
150                                  * already be wrong, for example,
151                                  *      foo\n
152                                  *      (arg1,
153                                  */
154                                 getline();
155                                 curline = lineno;
156                                 if (func_entry()) {
157                                         ++level;
158                                         pfnote(tok, curline);
159                                 }
160                                 break;
161                         }
162                         goto storec;
163
164                 /*
165                  * semi-colons indicate the end of a typedef; if we find a
166                  * typedef we search for the next semi-colon of the same
167                  * level as the typedef.  Ignoring "structs", they are
168                  * tricky, since you can find:
169                  *
170                  *      "typedef long time_t;"
171                  *      "typedef unsigned int u_int;"
172                  *      "typedef unsigned int u_int [10];"
173                  *
174                  * If looking at a typedef, we save a copy of the last token
175                  * found.  Then, when we find the ';' we take the current
176                  * token if it starts with a valid token name, else we take
177                  * the one we saved.  There's probably some reasonable
178                  * alternative to this...
179                  */
180                 case ';':
181                         if (t_def && level == t_level) {
182                                 t_def = NO;
183                                 getline();
184                                 if (sp != tok)
185                                         *sp = EOS;
186                                 pfnote(tok, lineno);
187                                 break;
188                         }
189                         goto storec;
190
191                 /*
192                  * store characters until one that can't be part of a token
193                  * comes along; check the current token against certain
194                  * reserved words.
195                  */
196                 default:
197                         /* ignore whitespace */
198                         if (c == ' ' || c == '\t') {
199                                 int save = c;
200                                 while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
201                                         ;
202                                 if (c == EOF)
203                                         return;
204                                 (void)ungetc(c, inf);
205                                 c = save;
206                         }
207         storec:         if (!intoken(c)) {
208                                 if (sp == tok)
209                                         break;
210                                 *sp = EOS;
211                                 if (tflag) {
212                                         /* no typedefs inside typedefs */
213                                         if (!t_def &&
214                                                    !memcmp(tok, "typedef",8)) {
215                                                 t_def = YES;
216                                                 t_level = level;
217                                                 break;
218                                         }
219                                         /* catch "typedef struct" */
220                                         if ((!t_def || t_level < level)
221                                             && (!memcmp(tok, "struct", 7)
222                                             || !memcmp(tok, "union", 6)
223                                             || !memcmp(tok, "enum", 5))) {
224                                                 /*
225                                                  * get line immediately;
226                                                  * may change before '{'
227                                                  */
228                                                 getline();
229                                                 if (str_entry(c))
230                                                         ++level;
231                                                 break;
232                                                 /* } */
233                                         }
234                                 }
235                                 sp = tok;
236                         }
237                         else if (sp != tok || begtoken(c)) {
238                                 if (sp == tok + sizeof tok - 1)
239                                         /* Too long -- truncate it */
240                                         *sp = EOS;
241                                 else 
242                                         *sp++ = c;
243                                 token = YES;
244                         }
245                         continue;
246                 }
247
248                 sp = tok;
249                 token = NO;
250         }
251 }
252
253 /*
254  * func_entry --
255  *      handle a function reference
256  */
257 static int
258 func_entry(void)
259 {
260         int     c;                      /* current character */
261         int     level = 0;              /* for matching '()' */
262
263         /*
264          * Find the end of the assumed function declaration.
265          * Note that ANSI C functions can have type definitions so keep
266          * track of the parentheses nesting level.
267          */
268         while (GETC(!=, EOF)) {
269                 switch (c) {
270                 case '\'':
271                 case '"':
272                         /* skip strings and character constants */
273                         skip_string(c);
274                         break;
275                 case '/':
276                         /* skip comments */
277                         if (GETC(==, '*') || c == '/')
278                                 skip_comment(c);
279                         break;
280                 case '(':
281                         level++;
282                         break;
283                 case ')':
284                         if (level == 0)
285                                 goto fnd;
286                         level--;
287                         break;
288                 case '\n':
289                         SETLINE;
290                 }
291         }
292         return (NO);
293 fnd:
294         /*
295          * we assume that the character after a function's right paren
296          * is a token character if it's a function and a non-token
297          * character if it's a declaration.  Comments don't count...
298          */
299         for (;;) {
300                 while (GETC(!=, EOF) && iswhite(c))
301                         if (c == '\n')
302                                 SETLINE;
303                 if (intoken(c) || c == '{')
304                         break;
305                 if (c == '/' && (GETC(==, '*') || c == '/'))
306                         skip_comment(c);
307                 else {                          /* don't ever "read" '/' */
308                         (void)ungetc(c, inf);
309                         return (NO);
310                 }
311         }
312         if (c != '{')
313                 (void)skip_key('{');
314         return (YES);
315 }
316
317 /*
318  * hash_entry --
319  *      handle a line starting with a '#'
320  */
321 static void
322 hash_entry(void)
323 {
324         int     c;                      /* character read */
325         int     curline;                /* line started on */
326         char    *sp;                    /* buffer pointer */
327         char    tok[MAXTOKEN];          /* storage buffer */
328
329         /* ignore leading whitespace */
330         while (GETC(!=, EOF) && (c == ' ' || c == '\t'))
331                 ;
332         (void)ungetc(c, inf);
333
334         curline = lineno;
335         for (sp = tok;;) {              /* get next token */
336                 if (GETC(==, EOF))
337                         return;
338                 if (iswhite(c))
339                         break;
340                 if (sp == tok + sizeof tok - 1)
341                         /* Too long -- truncate it */
342                         *sp = EOS;
343                 else 
344                         *sp++ = c;
345         }
346         *sp = EOS;
347         if (memcmp(tok, "define", 6))   /* only interested in #define's */
348                 goto skip;
349         for (;;) {                      /* this doesn't handle "#define \n" */
350                 if (GETC(==, EOF))
351                         return;
352                 if (!iswhite(c))
353                         break;
354         }
355         for (sp = tok;;) {              /* get next token */
356                 if (sp == tok + sizeof tok - 1)
357                         /* Too long -- truncate it */
358                         *sp = EOS;
359                 else 
360                         *sp++ = c;
361                 if (GETC(==, EOF))
362                         return;
363                 /*
364                  * this is where it DOESN'T handle
365                  * "#define \n"
366                  */
367                 if (!intoken(c))
368                         break;
369         }
370         *sp = EOS;
371         if (dflag || c == '(') {        /* only want macros */
372                 getline();
373                 pfnote(tok, curline);
374         }
375 skip:   if (c == '\n') {                /* get rid of rest of define */
376                 SETLINE
377                 if (*(sp - 1) != '\\')
378                         return;
379         }
380         (void)skip_key('\n');
381 }
382
383 /*
384  * str_entry --
385  *      handle a struct, union or enum entry
386  */
387 static int
388 str_entry(int c) /* c is current character */
389 {
390         int     curline;                /* line started on */
391         char    *sp;                    /* buffer pointer */
392         char    tok[LINE_MAX];          /* storage buffer */
393
394         curline = lineno;
395         while (iswhite(c))
396                 if (GETC(==, EOF))
397                         return (NO);
398         if (c == '{')           /* it was "struct {" */
399                 return (YES);
400         for (sp = tok;;) {              /* get next token */
401                 if (sp == tok + sizeof tok - 1)
402                         /* Too long -- truncate it */
403                         *sp = EOS;
404                 else 
405                         *sp++ = c;
406                 if (GETC(==, EOF))
407                         return (NO);
408                 if (!intoken(c))
409                         break;
410         }
411         switch (c) {
412                 case '{':               /* it was "struct foo{" */
413                         --sp;
414                         break;
415                 case '\n':              /* it was "struct foo\n" */
416                         SETLINE;
417                         /*FALLTHROUGH*/
418                 default:                /* probably "struct foo " */
419                         while (GETC(!=, EOF))
420                                 if (!iswhite(c))
421                                         break;
422                         if (c != '{') {
423                                 (void)ungetc(c, inf);
424                                 return (NO);
425                         }
426         }
427         *sp = EOS;
428         pfnote(tok, curline);
429         return (YES);
430 }
431
432 /*
433  * skip_comment --
434  *      skip over comment
435  */
436 void
437 skip_comment(int t) /* t is comment character */
438 {
439         int     c;                      /* character read */
440         int     star;                   /* '*' flag */
441
442         for (star = 0; GETC(!=, EOF);)
443                 switch(c) {
444                 /* comments don't nest, nor can they be escaped. */
445                 case '*':
446                         star = YES;
447                         break;
448                 case '/':
449                         if (star && t == '*')
450                                 return;
451                         break;
452                 case '\n':
453                         if (t == '/')
454                                 return;
455                         SETLINE;
456                         /*FALLTHROUGH*/
457                 default:
458                         star = NO;
459                         break;
460                 }
461 }
462
463 /*
464  * skip_string --
465  *      skip to the end of a string or character constant.
466  */
467 void
468 skip_string(int key)
469 {
470         int     c,
471                 skip;
472
473         for (skip = NO; GETC(!=, EOF); )
474                 switch (c) {
475                 case '\\':              /* a backslash escapes anything */
476                         skip = !skip;   /* we toggle in case it's "\\" */
477                         break;
478                 case '\n':
479                         SETLINE;
480                         /*FALLTHROUGH*/
481                 default:
482                         if (c == key && !skip)
483                                 return;
484                         skip = NO;
485                 }
486 }
487
488 /*
489  * skip_key --
490  *      skip to next char "key"
491  */
492 int
493 skip_key(int key)
494 {
495         int     c,
496                 skip,
497                 retval;
498
499         for (skip = retval = NO; GETC(!=, EOF);)
500                 switch(c) {
501                 case '\\':              /* a backslash escapes anything */
502                         skip = !skip;   /* we toggle in case it's "\\" */
503                         break;
504                 case ';':               /* special case for yacc; if one */
505                 case '|':               /* of these chars occurs, we may */
506                         retval = YES;   /* have moved out of the rule */
507                         break;          /* not used by C */
508                 case '\'':
509                 case '"':
510                         /* skip strings and character constants */
511                         skip_string(c);
512                         break;
513                 case '/':
514                         /* skip comments */
515                         if (GETC(==, '*') || c == '/') {
516                                 skip_comment(c);
517                                 break;
518                         }
519                         (void)ungetc(c, inf);
520                         c = '/';
521                         goto norm;
522                 case '\n':
523                         SETLINE;
524                         /*FALLTHROUGH*/
525                 default:
526                 norm:
527                         if (c == key && !skip)
528                                 return (retval);
529                         skip = NO;
530                 }
531         return (retval);
532 }