2 * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1998-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: lex.c,v 1.78.18.5 2005/11/30 03:44:39 marka Exp $ */
28 #include <isc/buffer.h>
33 #include <isc/parseint.h>
34 #include <isc/print.h>
35 #include <isc/stdio.h>
36 #include <isc/string.h>
39 typedef struct inputsource {
41 isc_boolean_t is_file;
42 isc_boolean_t need_close;
44 isc_buffer_t * pushback;
49 unsigned long saved_line;
50 ISC_LINK(struct inputsource) link;
53 #define LEX_MAGIC ISC_MAGIC('L', 'e', 'x', '!')
54 #define VALID_LEX(l) ISC_MAGIC_VALID(l, LEX_MAGIC)
62 unsigned int comments;
63 isc_boolean_t comment_ok;
64 isc_boolean_t last_was_eol;
65 unsigned int paren_count;
66 unsigned int saved_paren_count;
67 isc_lexspecials_t specials;
68 LIST(struct inputsource) sources;
71 static inline isc_result_t
72 grow_data(isc_lex_t *lex, size_t *remainingp, char **currp, char **prevp) {
75 new = isc_mem_get(lex->mctx, lex->max_token * 2 + 1);
77 return (ISC_R_NOMEMORY);
78 memcpy(new, lex->data, lex->max_token + 1);
79 *currp = new + (*currp - lex->data);
81 *prevp = new + (*prevp - lex->data);
82 isc_mem_put(lex->mctx, lex->data, lex->max_token + 1);
84 *remainingp += lex->max_token;
86 return (ISC_R_SUCCESS);
90 isc_lex_create(isc_mem_t *mctx, size_t max_token, isc_lex_t **lexp) {
97 REQUIRE(lexp != NULL && *lexp == NULL);
98 REQUIRE(max_token > 0U);
100 lex = isc_mem_get(mctx, sizeof(*lex));
102 return (ISC_R_NOMEMORY);
103 lex->data = isc_mem_get(mctx, max_token + 1);
104 if (lex->data == NULL) {
105 isc_mem_put(mctx, lex, sizeof(*lex));
106 return (ISC_R_NOMEMORY);
109 lex->max_token = max_token;
111 lex->comment_ok = ISC_TRUE;
112 lex->last_was_eol = ISC_TRUE;
113 lex->paren_count = 0;
114 lex->saved_paren_count = 0;
115 memset(lex->specials, 0, 256);
116 INIT_LIST(lex->sources);
117 lex->magic = LEX_MAGIC;
121 return (ISC_R_SUCCESS);
125 isc_lex_destroy(isc_lex_t **lexp) {
132 REQUIRE(lexp != NULL);
134 REQUIRE(VALID_LEX(lex));
136 while (!EMPTY(lex->sources))
137 RUNTIME_CHECK(isc_lex_close(lex) == ISC_R_SUCCESS);
138 if (lex->data != NULL)
139 isc_mem_put(lex->mctx, lex->data, lex->max_token + 1);
141 isc_mem_put(lex->mctx, lex, sizeof(*lex));
147 isc_lex_getcomments(isc_lex_t *lex) {
149 * Return the current lexer commenting styles.
152 REQUIRE(VALID_LEX(lex));
154 return (lex->comments);
158 isc_lex_setcomments(isc_lex_t *lex, unsigned int comments) {
160 * Set allowed lexer commenting styles.
163 REQUIRE(VALID_LEX(lex));
165 lex->comments = comments;
169 isc_lex_getspecials(isc_lex_t *lex, isc_lexspecials_t specials) {
171 * Put the current list of specials into 'specials'.
174 REQUIRE(VALID_LEX(lex));
176 memcpy(specials, lex->specials, 256);
180 isc_lex_setspecials(isc_lex_t *lex, isc_lexspecials_t specials) {
182 * The characters in 'specials' are returned as tokens. Along with
183 * whitespace, they delimit strings and numbers.
186 REQUIRE(VALID_LEX(lex));
188 memcpy(lex->specials, specials, 256);
191 static inline isc_result_t
192 new_source(isc_lex_t *lex, isc_boolean_t is_file, isc_boolean_t need_close,
193 void *input, const char *name)
198 source = isc_mem_get(lex->mctx, sizeof(*source));
200 return (ISC_R_NOMEMORY);
201 source->result = ISC_R_SUCCESS;
202 source->is_file = is_file;
203 source->need_close = need_close;
204 source->at_eof = ISC_FALSE;
205 source->input = input;
206 source->name = isc_mem_strdup(lex->mctx, name);
207 if (source->name == NULL) {
208 isc_mem_put(lex->mctx, source, sizeof(*source));
209 return (ISC_R_NOMEMORY);
211 source->pushback = NULL;
212 result = isc_buffer_allocate(lex->mctx, &source->pushback,
214 if (result != ISC_R_SUCCESS) {
215 isc_mem_free(lex->mctx, source->name);
216 isc_mem_put(lex->mctx, source, sizeof(*source));
221 ISC_LIST_INITANDPREPEND(lex->sources, source, link);
223 return (ISC_R_SUCCESS);
227 isc_lex_openfile(isc_lex_t *lex, const char *filename) {
232 * Open 'filename' and make it the current input source for 'lex'.
235 REQUIRE(VALID_LEX(lex));
237 result = isc_stdio_open(filename, "r", &stream);
238 if (result != ISC_R_SUCCESS)
241 result = new_source(lex, ISC_TRUE, ISC_TRUE, stream, filename);
242 if (result != ISC_R_SUCCESS)
243 (void)fclose(stream);
248 isc_lex_openstream(isc_lex_t *lex, FILE *stream) {
252 * Make 'stream' the current input source for 'lex'.
255 REQUIRE(VALID_LEX(lex));
257 snprintf(name, sizeof(name), "stream-%p", stream);
259 return (new_source(lex, ISC_TRUE, ISC_FALSE, stream, name));
263 isc_lex_openbuffer(isc_lex_t *lex, isc_buffer_t *buffer) {
267 * Make 'buffer' the current input source for 'lex'.
270 REQUIRE(VALID_LEX(lex));
272 snprintf(name, sizeof(name), "buffer-%p", buffer);
274 return (new_source(lex, ISC_FALSE, ISC_FALSE, buffer, name));
278 isc_lex_close(isc_lex_t *lex) {
282 * Close the most recently opened object (i.e. file or buffer).
285 REQUIRE(VALID_LEX(lex));
287 source = HEAD(lex->sources);
289 return (ISC_R_NOMORE);
291 ISC_LIST_UNLINK(lex->sources, source, link);
292 if (source->is_file) {
293 if (source->need_close)
294 (void)fclose((FILE *)(source->input));
296 isc_mem_free(lex->mctx, source->name);
297 isc_buffer_free(&source->pushback);
298 isc_mem_put(lex->mctx, source, sizeof(*source));
300 return (ISC_R_SUCCESS);
308 lexstate_maybecomment,
310 lexstate_ccommentend,
315 #define IWSEOL (ISC_LEXOPT_INITIALWS | ISC_LEXOPT_EOL)
318 pushback(inputsource *source, int c) {
319 REQUIRE(source->pushback->current > 0);
321 source->at_eof = ISC_FALSE;
324 source->pushback->current--;
330 pushandgrow(isc_lex_t *lex, inputsource *source, int c) {
331 if (isc_buffer_availablelength(source->pushback) == 0) {
332 isc_buffer_t *tbuf = NULL;
337 oldlen = isc_buffer_length(source->pushback);
338 result = isc_buffer_allocate(lex->mctx, &tbuf, oldlen * 2);
339 if (result != ISC_R_SUCCESS)
341 isc_buffer_usedregion(source->pushback, &used);
342 result = isc_buffer_copyregion(tbuf, &used);
343 INSIST(result == ISC_R_SUCCESS);
344 tbuf->current = source->pushback->current;
345 isc_buffer_free(&source->pushback);
346 source->pushback = tbuf;
348 isc_buffer_putuint8(source->pushback, (isc_uint8_t)c);
349 return (ISC_R_SUCCESS);
353 isc_lex_gettoken(isc_lex_t *lex, unsigned int options, isc_token_t *tokenp) {
356 isc_boolean_t done = ISC_FALSE;
357 isc_boolean_t no_comments = ISC_FALSE;
358 isc_boolean_t escaped = ISC_FALSE;
359 lexstate state = lexstate_start;
360 lexstate saved_state = lexstate_start;
361 isc_buffer_t *buffer;
365 isc_uint32_t as_ulong;
366 unsigned int saved_options;
370 * Get the next token.
373 REQUIRE(VALID_LEX(lex));
374 source = HEAD(lex->sources);
375 REQUIRE(tokenp != NULL);
377 if (source == NULL) {
378 if ((options & ISC_LEXOPT_NOMORE) != 0) {
379 tokenp->type = isc_tokentype_nomore;
380 return (ISC_R_SUCCESS);
382 return (ISC_R_NOMORE);
385 if (source->result != ISC_R_SUCCESS)
386 return (source->result);
388 lex->saved_paren_count = lex->paren_count;
389 source->saved_line = source->line;
391 if (isc_buffer_remaininglength(source->pushback) == 0 &&
394 if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 &&
395 lex->paren_count != 0) {
396 lex->paren_count = 0;
397 return (ISC_R_UNBALANCED);
399 if ((options & ISC_LEXOPT_EOF) != 0) {
400 tokenp->type = isc_tokentype_eof;
401 return (ISC_R_SUCCESS);
406 isc_buffer_compact(source->pushback);
408 saved_options = options;
409 if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 && lex->paren_count > 0)
416 remaining = lex->max_token;
418 #ifdef HAVE_FLOCKFILE
420 flockfile(source->input);
424 if (isc_buffer_remaininglength(source->pushback) == 0) {
425 if (source->is_file) {
426 stream = source->input;
428 #if defined(HAVE_FLOCKFILE) && defined(HAVE_GETCUNLOCKED)
429 c = getc_unlocked(stream);
434 if (ferror(stream)) {
435 source->result = ISC_R_IOERROR;
436 result = source->result;
439 source->at_eof = ISC_TRUE;
442 buffer = source->input;
444 if (buffer->current == buffer->used) {
446 source->at_eof = ISC_TRUE;
448 c = *((char *)buffer->base +
454 source->result = pushandgrow(lex, source, c);
455 if (source->result != ISC_R_SUCCESS) {
456 result = source->result;
462 if (!source->at_eof) {
463 if (state == lexstate_start)
464 /* Token has not started yet. */
466 isc_buffer_consumedlength(source->pushback);
467 c = isc_buffer_getuint8(source->pushback);
475 if (lex->comment_ok && !no_comments) {
476 if (!escaped && c == ';' &&
477 ((lex->comments & ISC_LEXCOMMENT_DNSMASTERFILE)
480 state = lexstate_eatline;
481 no_comments = ISC_TRUE;
483 } else if (c == '/' &&
486 ISC_LEXCOMMENT_CPLUSPLUS)) != 0) {
488 state = lexstate_maybecomment;
489 no_comments = ISC_TRUE;
491 } else if (c == '#' &&
492 ((lex->comments & ISC_LEXCOMMENT_SHELL)
495 state = lexstate_eatline;
496 no_comments = ISC_TRUE;
502 /* INSIST(c == EOF || (c >= 0 && c <= 255)); */
506 lex->last_was_eol = ISC_FALSE;
507 if ((options & ISC_LEXOPT_DNSMULTILINE) != 0 &&
508 lex->paren_count != 0) {
509 lex->paren_count = 0;
510 result = ISC_R_UNBALANCED;
513 if ((options & ISC_LEXOPT_EOF) == 0) {
517 tokenp->type = isc_tokentype_eof;
519 } else if (c == ' ' || c == '\t') {
520 if (lex->last_was_eol &&
521 (options & ISC_LEXOPT_INITIALWS)
523 lex->last_was_eol = ISC_FALSE;
524 tokenp->type = isc_tokentype_initialws;
525 tokenp->value.as_char = c;
528 } else if (c == '\n') {
529 if ((options & ISC_LEXOPT_EOL) != 0) {
530 tokenp->type = isc_tokentype_eol;
533 lex->last_was_eol = ISC_TRUE;
534 } else if (c == '\r') {
535 if ((options & ISC_LEXOPT_EOL) != 0)
536 state = lexstate_crlf;
537 } else if (c == '"' &&
538 (options & ISC_LEXOPT_QSTRING) != 0) {
539 lex->last_was_eol = ISC_FALSE;
540 no_comments = ISC_TRUE;
541 state = lexstate_qstring;
542 } else if (lex->specials[c]) {
543 lex->last_was_eol = ISC_FALSE;
544 if ((c == '(' || c == ')') &&
545 (options & ISC_LEXOPT_DNSMULTILINE) != 0) {
547 if (lex->paren_count == 0)
551 if (lex->paren_count == 0) {
552 result = ISC_R_UNBALANCED;
556 if (lex->paren_count == 0)
562 tokenp->type = isc_tokentype_special;
563 tokenp->value.as_char = c;
565 } else if (isdigit((unsigned char)c) &&
566 (options & ISC_LEXOPT_NUMBER) != 0) {
567 lex->last_was_eol = ISC_FALSE;
568 if ((options & ISC_LEXOPT_OCTAL) != 0 &&
569 (c == '8' || c == '9'))
570 state = lexstate_string;
572 state = lexstate_number;
575 lex->last_was_eol = ISC_FALSE;
576 state = lexstate_string;
583 tokenp->type = isc_tokentype_eol;
585 lex->last_was_eol = ISC_TRUE;
587 case lexstate_number:
588 if (c == EOF || !isdigit((unsigned char)c)) {
589 if (c == ' ' || c == '\t' || c == '\r' ||
590 c == '\n' || c == EOF ||
593 if ((options & ISC_LEXOPT_OCTAL) != 0)
595 else if ((options & ISC_LEXOPT_CNUMBER) != 0)
601 result = isc_parse_uint32(&as_ulong,
604 if (result == ISC_R_SUCCESS) {
606 isc_tokentype_number;
607 tokenp->value.as_ulong =
609 } else if (result == ISC_R_BADNUMBER) {
613 isc_tokentype_string;
614 v = &(tokenp->value);
615 v->as_textregion.base =
617 v->as_textregion.length =
624 } else if (!(options & ISC_LEXOPT_CNUMBER) ||
625 ((c != 'x' && c != 'X') ||
626 (curr != &lex->data[1]) ||
627 (lex->data[0] != '0'))) {
628 /* Above test supports hex numbers */
629 state = lexstate_string;
631 } else if ((options & ISC_LEXOPT_OCTAL) != 0 &&
632 (c == '8' || c == '9')) {
633 state = lexstate_string;
635 if (remaining == 0U) {
636 result = grow_data(lex, &remaining,
638 if (result != ISC_R_SUCCESS)
641 INSIST(remaining > 0U);
646 case lexstate_string:
648 * EOF needs to be checked before lex->specials[c]
649 * as lex->specials[EOF] is not a good idea.
651 if (c == '\r' || c == '\n' || c == EOF ||
653 (c == ' ' || c == '\t' || lex->specials[c]))) {
655 if (source->result != ISC_R_SUCCESS) {
656 result = source->result;
659 tokenp->type = isc_tokentype_string;
660 tokenp->value.as_textregion.base = lex->data;
661 tokenp->value.as_textregion.length =
662 lex->max_token - remaining;
666 if ((options & ISC_LEXOPT_ESCAPE) != 0)
667 escaped = (!escaped && c == '\\') ?
668 ISC_TRUE : ISC_FALSE;
669 if (remaining == 0U) {
670 result = grow_data(lex, &remaining,
672 if (result != ISC_R_SUCCESS)
675 INSIST(remaining > 0U);
680 case lexstate_maybecomment:
682 (lex->comments & ISC_LEXCOMMENT_C) != 0) {
683 state = lexstate_ccomment;
685 } else if (c == '/' &&
686 (lex->comments & ISC_LEXCOMMENT_CPLUSPLUS) != 0) {
687 state = lexstate_eatline;
692 no_comments = ISC_FALSE;
695 case lexstate_ccomment:
697 result = ISC_R_UNEXPECTEDEND;
701 state = lexstate_ccommentend;
703 case lexstate_ccommentend:
705 result = ISC_R_UNEXPECTEDEND;
710 * C-style comments become a single space.
711 * We do this to ensure that a comment will
712 * act as a delimiter for strings and
716 no_comments = ISC_FALSE;
720 state = lexstate_ccomment;
722 case lexstate_eatline:
724 result = ISC_R_UNEXPECTEDEND;
728 no_comments = ISC_FALSE;
733 case lexstate_qstring:
735 result = ISC_R_UNEXPECTEDEND;
742 * Overwrite the preceding backslash.
744 INSIST(prev != NULL);
747 tokenp->type = isc_tokentype_qstring;
748 tokenp->value.as_textregion.base =
750 tokenp->value.as_textregion.length =
751 lex->max_token - remaining;
752 no_comments = ISC_FALSE;
756 if (c == '\n' && !escaped &&
757 (options & ISC_LEXOPT_QSTRINGMULTILINE) == 0) {
759 result = ISC_R_UNBALANCEDQUOTES;
762 if (c == '\\' && !escaped)
766 if (remaining == 0U) {
767 result = grow_data(lex, &remaining,
769 if (result != ISC_R_SUCCESS)
772 INSIST(remaining > 0U);
780 FATAL_ERROR(__FILE__, __LINE__,
781 isc_msgcat_get(isc_msgcat, ISC_MSGSET_LEX,
782 ISC_MSG_UNEXPECTEDSTATE,
783 "Unexpected state %d"),
785 /* Does not return. */
790 result = ISC_R_SUCCESS;
792 #ifdef HAVE_FLOCKFILE
794 funlockfile(source->input);
800 isc_lex_getmastertoken(isc_lex_t *lex, isc_token_t *token,
801 isc_tokentype_t expect, isc_boolean_t eol)
803 unsigned int options = ISC_LEXOPT_EOL | ISC_LEXOPT_EOF |
804 ISC_LEXOPT_DNSMULTILINE | ISC_LEXOPT_ESCAPE;
807 if (expect == isc_tokentype_qstring)
808 options |= ISC_LEXOPT_QSTRING;
809 else if (expect == isc_tokentype_number)
810 options |= ISC_LEXOPT_NUMBER;
811 result = isc_lex_gettoken(lex, options, token);
812 if (result == ISC_R_RANGE)
813 isc_lex_ungettoken(lex, token);
814 if (result != ISC_R_SUCCESS)
817 if (eol && ((token->type == isc_tokentype_eol) ||
818 (token->type == isc_tokentype_eof)))
819 return (ISC_R_SUCCESS);
820 if (token->type == isc_tokentype_string &&
821 expect == isc_tokentype_qstring)
822 return (ISC_R_SUCCESS);
823 if (token->type != expect) {
824 isc_lex_ungettoken(lex, token);
825 if (token->type == isc_tokentype_eol ||
826 token->type == isc_tokentype_eof)
827 return (ISC_R_UNEXPECTEDEND);
828 if (expect == isc_tokentype_number)
829 return (ISC_R_BADNUMBER);
830 return (ISC_R_UNEXPECTEDTOKEN);
832 return (ISC_R_SUCCESS);
836 isc_lex_getoctaltoken(isc_lex_t *lex, isc_token_t *token, isc_boolean_t eol)
838 unsigned int options = ISC_LEXOPT_EOL | ISC_LEXOPT_EOF |
839 ISC_LEXOPT_DNSMULTILINE | ISC_LEXOPT_ESCAPE|
840 ISC_LEXOPT_NUMBER | ISC_LEXOPT_OCTAL;
843 result = isc_lex_gettoken(lex, options, token);
844 if (result == ISC_R_RANGE)
845 isc_lex_ungettoken(lex, token);
846 if (result != ISC_R_SUCCESS)
849 if (eol && ((token->type == isc_tokentype_eol) ||
850 (token->type == isc_tokentype_eof)))
851 return (ISC_R_SUCCESS);
852 if (token->type != isc_tokentype_number) {
853 isc_lex_ungettoken(lex, token);
854 if (token->type == isc_tokentype_eol ||
855 token->type == isc_tokentype_eof)
856 return (ISC_R_UNEXPECTEDEND);
857 return (ISC_R_BADNUMBER);
859 return (ISC_R_SUCCESS);
863 isc_lex_ungettoken(isc_lex_t *lex, isc_token_t *tokenp) {
866 * Unget the current token.
869 REQUIRE(VALID_LEX(lex));
870 source = HEAD(lex->sources);
871 REQUIRE(source != NULL);
872 REQUIRE(tokenp != NULL);
873 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
874 tokenp->type == isc_tokentype_eof);
878 isc_buffer_first(source->pushback);
879 lex->paren_count = lex->saved_paren_count;
880 source->line = source->saved_line;
881 source->at_eof = ISC_FALSE;
885 isc_lex_getlasttokentext(isc_lex_t *lex, isc_token_t *tokenp, isc_region_t *r)
889 REQUIRE(VALID_LEX(lex));
890 source = HEAD(lex->sources);
891 REQUIRE(source != NULL);
892 REQUIRE(tokenp != NULL);
893 REQUIRE(isc_buffer_consumedlength(source->pushback) != 0 ||
894 tokenp->type == isc_tokentype_eof);
898 INSIST(source->ignored <= isc_buffer_consumedlength(source->pushback));
899 r->base = (unsigned char *)isc_buffer_base(source->pushback) +
901 r->length = isc_buffer_consumedlength(source->pushback) -
907 isc_lex_getsourcename(isc_lex_t *lex) {
910 REQUIRE(VALID_LEX(lex));
911 source = HEAD(lex->sources);
916 return (source->name);
920 isc_lex_getsourceline(isc_lex_t *lex) {
923 REQUIRE(VALID_LEX(lex));
924 source = HEAD(lex->sources);
929 return (source->line);
934 isc_lex_setsourcename(isc_lex_t *lex, const char *name) {
938 REQUIRE(VALID_LEX(lex));
939 source = HEAD(lex->sources);
942 return(ISC_R_NOTFOUND);
943 newname = isc_mem_strdup(lex->mctx, name);
945 return (ISC_R_NOMEMORY);
946 isc_mem_free(lex->mctx, source->name);
947 source->name = newname;
948 return (ISC_R_SUCCESS);
952 isc_lex_isfile(isc_lex_t *lex) {
955 REQUIRE(VALID_LEX(lex));
957 source = HEAD(lex->sources);
962 return (source->is_file);