]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - src/ucl_parser.c
Import libucl 0.4.0
[FreeBSD/FreeBSD.git] / src / ucl_parser.c
1 /* Copyright (c) 2013, Vsevolod Stakhov
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *       * Redistributions of source code must retain the above copyright
7  *         notice, this list of conditions and the following disclaimer.
8  *       * Redistributions in binary form must reproduce the above copyright
9  *         notice, this list of conditions and the following disclaimer in the
10  *         documentation and/or other materials provided with the distribution.
11  *
12  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15  * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22  */
23
24 #include "ucl.h"
25 #include "ucl_internal.h"
26 #include "ucl_chartable.h"
27
28 /**
29  * @file rcl_parser.c
30  * The implementation of rcl parser
31  */
32
33 struct ucl_parser_saved_state {
34         unsigned int line;
35         unsigned int column;
36         size_t remain;
37         const unsigned char *pos;
38 };
39
40 /**
41  * Move up to len characters
42  * @param parser
43  * @param begin
44  * @param len
45  * @return new position in chunk
46  */
47 #define ucl_chunk_skipc(chunk, p)    do{                                        \
48     if (*(p) == '\n') {                                                                         \
49         (chunk)->line ++;                                                                       \
50         (chunk)->column = 0;                                                            \
51     }                                                                                                           \
52     else (chunk)->column ++;                                                            \
53     (p++);                                                                                                      \
54     (chunk)->pos ++;                                                                            \
55     (chunk)->remain --;                                                                         \
56     } while (0)
57
58 static inline void
59 ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err)
60 {
61         if (chunk->pos < chunk->end) {
62                 if (isgraph (*chunk->pos)) {
63                         ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'",
64                                         chunk->line, chunk->column, str, *chunk->pos);
65                 }
66                 else {
67                         ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'",
68                                         chunk->line, chunk->column, str, (int)*chunk->pos);
69                 }
70         }
71         else {
72                 ucl_create_err (err, "error at the end of chunk: %s", str);
73         }
74 }
75
76 /**
77  * Skip all comments from the current pos resolving nested and multiline comments
78  * @param parser
79  * @return
80  */
81 static bool
82 ucl_skip_comments (struct ucl_parser *parser)
83 {
84         struct ucl_chunk *chunk = parser->chunks;
85         const unsigned char *p;
86         int comments_nested = 0;
87
88         p = chunk->pos;
89
90 start:
91         if (*p == '#') {
92                 if (parser->state != UCL_STATE_SCOMMENT &&
93                                 parser->state != UCL_STATE_MCOMMENT) {
94                         while (p < chunk->end) {
95                                 if (*p == '\n') {
96                                         ucl_chunk_skipc (chunk, p);
97                                         goto start;
98                                 }
99                                 ucl_chunk_skipc (chunk, p);
100                         }
101                 }
102         }
103         else if (*p == '/' && chunk->remain >= 2) {
104                 if (p[1] == '*') {
105                         ucl_chunk_skipc (chunk, p);
106                         comments_nested ++;
107                         ucl_chunk_skipc (chunk, p);
108
109                         while (p < chunk->end) {
110                                 if (*p == '*') {
111                                         ucl_chunk_skipc (chunk, p);
112                                         if (*p == '/') {
113                                                 comments_nested --;
114                                                 if (comments_nested == 0) {
115                                                         ucl_chunk_skipc (chunk, p);
116                                                         goto start;
117                                                 }
118                                         }
119                                         ucl_chunk_skipc (chunk, p);
120                                 }
121                                 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
122                                         comments_nested ++;
123                                         ucl_chunk_skipc (chunk, p);
124                                         ucl_chunk_skipc (chunk, p);
125                                         continue;
126                                 }
127                                 ucl_chunk_skipc (chunk, p);
128                         }
129                         if (comments_nested != 0) {
130                                 ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err);
131                                 return false;
132                         }
133                 }
134         }
135
136         return true;
137 }
138
139 /**
140  * Return multiplier for a character
141  * @param c multiplier character
142  * @param is_bytes if true use 1024 multiplier
143  * @return multiplier
144  */
145 static inline unsigned long
146 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
147         const struct {
148                 char c;
149                 long mult_normal;
150                 long mult_bytes;
151         } multipliers[] = {
152                         {'m', 1000 * 1000, 1024 * 1024},
153                         {'k', 1000, 1024},
154                         {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
155         };
156         int i;
157
158         for (i = 0; i < 3; i ++) {
159                 if (tolower (c) == multipliers[i].c) {
160                         if (is_bytes) {
161                                 return multipliers[i].mult_bytes;
162                         }
163                         return multipliers[i].mult_normal;
164                 }
165         }
166
167         return 1;
168 }
169
170
171 /**
172  * Return multiplier for time scaling
173  * @param c
174  * @return
175  */
176 static inline double
177 ucl_lex_time_multiplier (const unsigned char c) {
178         const struct {
179                 char c;
180                 double mult;
181         } multipliers[] = {
182                         {'m', 60},
183                         {'h', 60 * 60},
184                         {'d', 60 * 60 * 24},
185                         {'w', 60 * 60 * 24 * 7},
186                         {'y', 60 * 60 * 24 * 7 * 365}
187         };
188         int i;
189
190         for (i = 0; i < 5; i ++) {
191                 if (tolower (c) == multipliers[i].c) {
192                         return multipliers[i].mult;
193                 }
194         }
195
196         return 1;
197 }
198
199 /**
200  * Return true if a character is a end of an atom
201  * @param c
202  * @return
203  */
204 static inline bool
205 ucl_lex_is_atom_end (const unsigned char c)
206 {
207         return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
208 }
209
210 static inline bool
211 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
212 {
213         if (c1 == '/') {
214                 if (c2 == '*') {
215                         return true;
216                 }
217         }
218         else if (c1 == '#') {
219                 return true;
220         }
221         return false;
222 }
223
224 /**
225  * Check variable found
226  * @param parser
227  * @param ptr
228  * @param remain
229  * @param out_len
230  * @param strict
231  * @param found
232  * @return
233  */
234 static inline const char *
235 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
236                 size_t *out_len, bool strict, bool *found)
237 {
238         struct ucl_variable *var;
239
240         LL_FOREACH (parser->variables, var) {
241                 if (strict) {
242                         if (remain == var->var_len) {
243                                 if (memcmp (ptr, var->var, var->var_len) == 0) {
244                                         *out_len += var->value_len;
245                                         *found = true;
246                                         return (ptr + var->var_len);
247                                 }
248                         }
249                 }
250                 else {
251                         if (remain >= var->var_len) {
252                                 if (memcmp (ptr, var->var, var->var_len) == 0) {
253                                         *out_len += var->value_len;
254                                         *found = true;
255                                         return (ptr + var->var_len);
256                                 }
257                         }
258                 }
259         }
260
261         return ptr;
262 }
263
264 /**
265  * Check for a variable in a given string
266  * @param parser
267  * @param ptr
268  * @param remain
269  * @param out_len
270  * @param vars_found
271  * @return
272  */
273 static const char *
274 ucl_check_variable (struct ucl_parser *parser, const char *ptr, size_t remain, size_t *out_len, bool *vars_found)
275 {
276         const char *p, *end, *ret = ptr;
277         bool found = false;
278
279         if (*ptr == '{') {
280                 /* We need to match the variable enclosed in braces */
281                 p = ptr + 1;
282                 end = ptr + remain;
283                 while (p < end) {
284                         if (*p == '}') {
285                                 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, out_len, true, &found);
286                                 if (found) {
287                                         /* {} must be excluded actually */
288                                         ret ++;
289                                         if (!*vars_found) {
290                                                 *vars_found = true;
291                                         }
292                                 }
293                                 else {
294                                         *out_len += 2;
295                                 }
296                                 break;
297                         }
298                         p ++;
299                 }
300         }
301         else if (*ptr != '$') {
302                 /* Not count escaped dollar sign */
303                 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
304                 if (found && !*vars_found) {
305                         *vars_found = true;
306                 }
307                 if (!found) {
308                         (*out_len) ++;
309                 }
310         }
311         else {
312                 ret ++;
313                 (*out_len) ++;
314         }
315
316         return ret;
317 }
318
319 /**
320  * Expand a single variable
321  * @param parser
322  * @param ptr
323  * @param remain
324  * @param dest
325  * @return
326  */
327 static const char *
328 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
329                 size_t remain, unsigned char **dest)
330 {
331         unsigned char *d = *dest;
332         const char *p = ptr + 1, *ret;
333         struct ucl_variable *var;
334         bool found = false;
335
336         ret = ptr + 1;
337         remain --;
338
339         if (*p == '$') {
340                 *d++ = *p++;
341                 *dest = d;
342                 return p;
343         }
344         else if (*p == '{') {
345                 p ++;
346                 ret += 2;
347                 remain -= 2;
348         }
349
350         LL_FOREACH (parser->variables, var) {
351                 if (remain >= var->var_len) {
352                         if (memcmp (p, var->var, var->var_len) == 0) {
353                                 memcpy (d, var->value, var->value_len);
354                                 ret += var->var_len;
355                                 d += var->value_len;
356                                 found = true;
357                                 break;
358                         }
359                 }
360         }
361         if (!found) {
362                 memcpy (d, ptr, 2);
363                 d += 2;
364                 ret --;
365         }
366
367         *dest = d;
368         return ret;
369 }
370
371 /**
372  * Expand variables in string
373  * @param parser
374  * @param dst
375  * @param src
376  * @param in_len
377  * @return
378  */
379 static ssize_t
380 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
381                 const char *src, size_t in_len)
382 {
383         const char *p, *end = src + in_len;
384         unsigned char *d;
385         size_t out_len = 0;
386         bool vars_found = false;
387
388         p = src;
389         while (p != end) {
390                 if (*p == '$') {
391                         p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
392                 }
393                 else {
394                         p ++;
395                         out_len ++;
396                 }
397         }
398
399         if (!vars_found) {
400                 /* Trivial case */
401                 *dst = NULL;
402                 return in_len;
403         }
404
405         *dst = UCL_ALLOC (out_len + 1);
406         if (*dst == NULL) {
407                 return in_len;
408         }
409
410         d = *dst;
411         p = src;
412         while (p != end) {
413                 if (*p == '$') {
414                         p = ucl_expand_single_variable (parser, p, end - p, &d);
415                 }
416                 else {
417                         *d++ = *p++;
418                 }
419         }
420
421         *d = '\0';
422
423         return out_len;
424 }
425
426 /**
427  * Store or copy pointer to the trash stack
428  * @param parser parser object
429  * @param src src string
430  * @param dst destination buffer (trash stack pointer)
431  * @param dst_const const destination pointer (e.g. value of object)
432  * @param in_len input length
433  * @param need_unescape need to unescape source (and copy it)
434  * @param need_lowercase need to lowercase value (and copy)
435  * @param need_expand need to expand variables (and copy as well)
436  * @return output length (excluding \0 symbol)
437  */
438 static inline ssize_t
439 ucl_copy_or_store_ptr (struct ucl_parser *parser,
440                 const unsigned char *src, unsigned char **dst,
441                 const char **dst_const, size_t in_len,
442                 bool need_unescape, bool need_lowercase, bool need_expand)
443 {
444         ssize_t ret = -1, tret;
445         unsigned char *tmp;
446
447         if (need_unescape || need_lowercase ||
448                         (need_expand && parser->variables != NULL) ||
449                         !(parser->flags & UCL_PARSER_ZEROCOPY)) {
450                 /* Copy string */
451                 *dst = UCL_ALLOC (in_len + 1);
452                 if (*dst == NULL) {
453                         ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err);
454                         return false;
455                 }
456                 if (need_lowercase) {
457                         ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
458                 }
459                 else {
460                         ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
461                 }
462
463                 if (need_unescape) {
464                         ret = ucl_unescape_json_string (*dst, ret);
465                 }
466                 if (need_expand) {
467                         tmp = *dst;
468                         tret = ret;
469                         ret = ucl_expand_variable (parser, dst, tmp, ret);
470                         if (*dst == NULL) {
471                                 /* Nothing to expand */
472                                 *dst = tmp;
473                                 ret = tret;
474                         }
475                 }
476                 *dst_const = *dst;
477         }
478         else {
479                 *dst_const = src;
480                 ret = in_len;
481         }
482
483         return ret;
484 }
485
486 /**
487  * Create and append an object at the specified level
488  * @param parser
489  * @param is_array
490  * @param level
491  * @return
492  */
493 static inline ucl_object_t *
494 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
495 {
496         struct ucl_stack *st;
497
498         if (!is_array) {
499                 if (obj == NULL) {
500                         obj = ucl_object_typed_new (UCL_OBJECT);
501                 }
502                 else {
503                         obj->type = UCL_OBJECT;
504                 }
505                 obj->value.ov = ucl_hash_create ();
506                 parser->state = UCL_STATE_KEY;
507         }
508         else {
509                 if (obj == NULL) {
510                         obj = ucl_object_typed_new (UCL_ARRAY);
511                 }
512                 else {
513                         obj->type = UCL_ARRAY;
514                 }
515                 parser->state = UCL_STATE_VALUE;
516         }
517
518         st = UCL_ALLOC (sizeof (struct ucl_stack));
519         if (st == NULL) {
520                 ucl_set_err (parser->chunks, 0, "cannot allocate memory for an object", &parser->err);
521                 return NULL;
522         }
523         st->obj = obj;
524         st->level = level;
525         LL_PREPEND (parser->stack, st);
526         parser->cur_obj = obj;
527
528         return obj;
529 }
530
531 int
532 ucl_maybe_parse_number (ucl_object_t *obj,
533                 const char *start, const char *end, const char **pos,
534                 bool allow_double, bool number_bytes, bool allow_time)
535 {
536         const char *p = start, *c = start;
537         char *endptr;
538         bool got_dot = false, got_exp = false, need_double = false,
539                         is_time = false, valid_start = false, is_hex = false,
540                         is_neg = false;
541         double dv = 0;
542         int64_t lv = 0;
543
544         if (*p == '-') {
545                 is_neg = true;
546                 c ++;
547                 p ++;
548         }
549         while (p < end) {
550                 if (is_hex && isxdigit (*p)) {
551                         p ++;
552                 }
553                 else if (isdigit (*p)) {
554                         valid_start = true;
555                         p ++;
556                 }
557                 else if (!is_hex && (*p == 'x' || *p == 'X')) {
558                         is_hex = true;
559                         allow_double = false;
560                         c = p + 1;
561                 }
562                 else if (allow_double) {
563                         if (p == c) {
564                                 /* Empty digits sequence, not a number */
565                                 *pos = start;
566                                 return EINVAL;
567                         }
568                         else if (*p == '.') {
569                                 if (got_dot) {
570                                         /* Double dots, not a number */
571                                         *pos = start;
572                                         return EINVAL;
573                                 }
574                                 else {
575                                         got_dot = true;
576                                         need_double = true;
577                                         p ++;
578                                 }
579                         }
580                         else if (*p == 'e' || *p == 'E') {
581                                 if (got_exp) {
582                                         /* Double exp, not a number */
583                                         *pos = start;
584                                         return EINVAL;
585                                 }
586                                 else {
587                                         got_exp = true;
588                                         need_double = true;
589                                         p ++;
590                                         if (p >= end) {
591                                                 *pos = start;
592                                                 return EINVAL;
593                                         }
594                                         if (!isdigit (*p) && *p != '+' && *p != '-') {
595                                                 /* Wrong exponent sign */
596                                                 *pos = start;
597                                                 return EINVAL;
598                                         }
599                                         else {
600                                                 p ++;
601                                         }
602                                 }
603                         }
604                         else {
605                                 /* Got the end of the number, need to check */
606                                 break;
607                         }
608                 }
609                 else {
610                         break;
611                 }
612         }
613
614         if (!valid_start) {
615                 *pos = start;
616                 return EINVAL;
617         }
618
619         errno = 0;
620         if (need_double) {
621                 dv = strtod (c, &endptr);
622         }
623         else {
624                 if (is_hex) {
625                         lv = strtoimax (c, &endptr, 16);
626                 }
627                 else {
628                         lv = strtoimax (c, &endptr, 10);
629                 }
630         }
631         if (errno == ERANGE) {
632                 *pos = start;
633                 return ERANGE;
634         }
635
636         /* Now check endptr */
637         if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0' ||
638                         ucl_test_character (*endptr, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
639                 p = endptr;
640                 goto set_obj;
641         }
642
643         if (endptr < end && endptr != start) {
644                 p = endptr;
645                 switch (*p) {
646                 case 'm':
647                 case 'M':
648                 case 'g':
649                 case 'G':
650                 case 'k':
651                 case 'K':
652                         if (end - p >= 2) {
653                                 if (p[1] == 's' || p[1] == 'S') {
654                                         /* Milliseconds */
655                                         if (!need_double) {
656                                                 need_double = true;
657                                                 dv = lv;
658                                         }
659                                         is_time = true;
660                                         if (p[0] == 'm' || p[0] == 'M') {
661                                                 dv /= 1000.;
662                                         }
663                                         else {
664                                                 dv *= ucl_lex_num_multiplier (*p, false);
665                                         }
666                                         p += 2;
667                                         goto set_obj;
668                                 }
669                                 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
670                                         /* Bytes */
671                                         if (need_double) {
672                                                 need_double = false;
673                                                 lv = dv;
674                                         }
675                                         lv *= ucl_lex_num_multiplier (*p, true);
676                                         p += 2;
677                                         goto set_obj;
678                                 }
679                                 else if (ucl_lex_is_atom_end (p[1])) {
680                                         if (need_double) {
681                                                 dv *= ucl_lex_num_multiplier (*p, false);
682                                         }
683                                         else {
684                                                 lv *= ucl_lex_num_multiplier (*p, number_bytes);
685                                         }
686                                         p ++;
687                                         goto set_obj;
688                                 }
689                                 else if (allow_time && end - p >= 3) {
690                                         if (tolower (p[0]) == 'm' &&
691                                                         tolower (p[1]) == 'i' &&
692                                                         tolower (p[2]) == 'n') {
693                                                 /* Minutes */
694                                                 if (!need_double) {
695                                                         need_double = true;
696                                                         dv = lv;
697                                                 }
698                                                 is_time = true;
699                                                 dv *= 60.;
700                                                 p += 3;
701                                                 goto set_obj;
702                                         }
703                                 }
704                         }
705                         else {
706                                 if (need_double) {
707                                         dv *= ucl_lex_num_multiplier (*p, false);
708                                 }
709                                 else {
710                                         lv *= ucl_lex_num_multiplier (*p, number_bytes);
711                                 }
712                                 p ++;
713                                 goto set_obj;
714                         }
715                         break;
716                 case 'S':
717                 case 's':
718                         if (allow_time &&
719                                         (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
720                                 if (!need_double) {
721                                         need_double = true;
722                                         dv = lv;
723                                 }
724                                 p ++;
725                                 is_time = true;
726                                 goto set_obj;
727                         }
728                         break;
729                 case 'h':
730                 case 'H':
731                 case 'd':
732                 case 'D':
733                 case 'w':
734                 case 'W':
735                 case 'Y':
736                 case 'y':
737                         if (allow_time &&
738                                         (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
739                                 if (!need_double) {
740                                         need_double = true;
741                                         dv = lv;
742                                 }
743                                 is_time = true;
744                                 dv *= ucl_lex_time_multiplier (*p);
745                                 p ++;
746                                 goto set_obj;
747                         }
748                         break;
749                 }
750         }
751
752         *pos = c;
753         return EINVAL;
754
755         set_obj:
756         if (allow_double && (need_double || is_time)) {
757                 if (!is_time) {
758                         obj->type = UCL_FLOAT;
759                 }
760                 else {
761                         obj->type = UCL_TIME;
762                 }
763                 obj->value.dv = is_neg ? (-dv) : dv;
764         }
765         else {
766                 obj->type = UCL_INT;
767                 obj->value.iv = is_neg ? (-lv) : lv;
768         }
769         *pos = p;
770         return 0;
771 }
772
773 /**
774  * Parse possible number
775  * @param parser
776  * @param chunk
777  * @return true if a number has been parsed
778  */
779 static bool
780 ucl_lex_number (struct ucl_parser *parser,
781                 struct ucl_chunk *chunk, ucl_object_t *obj)
782 {
783         const unsigned char *pos;
784         int ret;
785
786         ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
787                         true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
788
789         if (ret == 0) {
790                 chunk->remain -= pos - chunk->pos;
791                 chunk->column += pos - chunk->pos;
792                 chunk->pos = pos;
793                 return true;
794         }
795         else if (ret == ERANGE) {
796                 ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err);
797         }
798
799         return false;
800 }
801
802 /**
803  * Parse quoted string with possible escapes
804  * @param parser
805  * @param chunk
806  * @return true if a string has been parsed
807  */
808 static bool
809 ucl_lex_json_string (struct ucl_parser *parser,
810                 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
811 {
812         const unsigned char *p = chunk->pos;
813         unsigned char c;
814         int i;
815
816         while (p < chunk->end) {
817                 c = *p;
818                 if (c < 0x1F) {
819                         /* Unmasked control character */
820                         if (c == '\n') {
821                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err);
822                         }
823                         else {
824                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err);
825                         }
826                         return false;
827                 }
828                 else if (c == '\\') {
829                         ucl_chunk_skipc (chunk, p);
830                         c = *p;
831                         if (p >= chunk->end) {
832                                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
833                                 return false;
834                         }
835                         else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
836                                 if (c == 'u') {
837                                         ucl_chunk_skipc (chunk, p);
838                                         for (i = 0; i < 4 && p < chunk->end; i ++) {
839                                                 if (!isxdigit (*p)) {
840                                                         ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err);
841                                                         return false;
842                                                 }
843                                                 ucl_chunk_skipc (chunk, p);
844                                         }
845                                         if (p >= chunk->end) {
846                                                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
847                                                 return false;
848                                         }
849                                 }
850                                 else {
851                                         ucl_chunk_skipc (chunk, p);
852                                 }
853                         }
854                         *need_unescape = true;
855                         *ucl_escape = true;
856                         continue;
857                 }
858                 else if (c == '"') {
859                         ucl_chunk_skipc (chunk, p);
860                         return true;
861                 }
862                 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
863                         *ucl_escape = true;
864                 }
865                 else if (c == '$') {
866                         *var_expand = true;
867                 }
868                 ucl_chunk_skipc (chunk, p);
869         }
870
871         ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err);
872         return false;
873 }
874
875 /**
876  * Parse a key in an object
877  * @param parser
878  * @param chunk
879  * @return true if a key has been parsed
880  */
881 static bool
882 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
883 {
884         const unsigned char *p, *c = NULL, *end, *t;
885         const char *key = NULL;
886         bool got_quote = false, got_eq = false, got_semicolon = false,
887                         need_unescape = false, ucl_escape = false, var_expand = false,
888                         got_content = false, got_sep = false;
889         ucl_object_t *nobj, *tobj;
890         ucl_hash_t *container;
891         ssize_t keylen;
892
893         p = chunk->pos;
894
895         if (*p == '.') {
896                 /* It is macro actually */
897                 ucl_chunk_skipc (chunk, p);
898                 parser->prev_state = parser->state;
899                 parser->state = UCL_STATE_MACRO_NAME;
900                 return true;
901         }
902         while (p < chunk->end) {
903                 /*
904                  * A key must start with alpha, number, '/' or '_' and end with space character
905                  */
906                 if (c == NULL) {
907                         if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
908                                 if (!ucl_skip_comments (parser)) {
909                                         return false;
910                                 }
911                                 p = chunk->pos;
912                         }
913                         else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
914                                 ucl_chunk_skipc (chunk, p);
915                         }
916                         else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
917                                 /* The first symbol */
918                                 c = p;
919                                 ucl_chunk_skipc (chunk, p);
920                                 got_content = true;
921                         }
922                         else if (*p == '"') {
923                                 /* JSON style key */
924                                 c = p + 1;
925                                 got_quote = true;
926                                 got_content = true;
927                                 ucl_chunk_skipc (chunk, p);
928                         }
929                         else if (*p == '}') {
930                                 /* We have actually end of an object */
931                                 *end_of_object = true;
932                                 return true;
933                         }
934                         else if (*p == '.') {
935                                 ucl_chunk_skipc (chunk, p);
936                                 parser->prev_state = parser->state;
937                                 parser->state = UCL_STATE_MACRO_NAME;
938                                 return true;
939                         }
940                         else {
941                                 /* Invalid identifier */
942                                 ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err);
943                                 return false;
944                         }
945                 }
946                 else {
947                         /* Parse the body of a key */
948                         if (!got_quote) {
949                                 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
950                                         got_content = true;
951                                         ucl_chunk_skipc (chunk, p);
952                                 }
953                                 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
954                                         end = p;
955                                         break;
956                                 }
957                                 else {
958                                         ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err);
959                                         return false;
960                                 }
961                         }
962                         else {
963                                 /* We need to parse json like quoted string */
964                                 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
965                                         return false;
966                                 }
967                                 /* Always escape keys obtained via json */
968                                 end = chunk->pos - 1;
969                                 p = chunk->pos;
970                                 break;
971                         }
972                 }
973         }
974
975         if (p >= chunk->end && got_content) {
976                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
977                 return false;
978         }
979         else if (!got_content) {
980                 return true;
981         }
982         *end_of_object = false;
983         /* We are now at the end of the key, need to parse the rest */
984         while (p < chunk->end) {
985                 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
986                         ucl_chunk_skipc (chunk, p);
987                 }
988                 else if (*p == '=') {
989                         if (!got_eq && !got_semicolon) {
990                                 ucl_chunk_skipc (chunk, p);
991                                 got_eq = true;
992                         }
993                         else {
994                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err);
995                                 return false;
996                         }
997                 }
998                 else if (*p == ':') {
999                         if (!got_eq && !got_semicolon) {
1000                                 ucl_chunk_skipc (chunk, p);
1001                                 got_semicolon = true;
1002                         }
1003                         else {
1004                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err);
1005                                 return false;
1006                         }
1007                 }
1008                 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1009                         /* Check for comment */
1010                         if (!ucl_skip_comments (parser)) {
1011                                 return false;
1012                         }
1013                         p = chunk->pos;
1014                 }
1015                 else {
1016                         /* Start value */
1017                         break;
1018                 }
1019         }
1020
1021         if (p >= chunk->end && got_content) {
1022                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1023                 return false;
1024         }
1025
1026         got_sep = got_semicolon || got_eq;
1027
1028         if (!got_sep) {
1029                 /*
1030                  * Maybe we have more keys nested, so search for termination character.
1031                  * Possible choices:
1032                  * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1033                  * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1034                  * 3) key1 value[;,\n] <- we treat that as linear object
1035                  */
1036                 t = p;
1037                 *next_key = false;
1038                 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1039                         t ++;
1040                 }
1041                 /* Check first non-space character after a key */
1042                 if (*t != '{' && *t != '[') {
1043                         while (t < chunk->end) {
1044                                 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1045                                         break;
1046                                 }
1047                                 else if (*t == '{' || *t == '[') {
1048                                         *next_key = true;
1049                                         break;
1050                                 }
1051                                 t ++;
1052                         }
1053                 }
1054         }
1055
1056         /* Create a new object */
1057         nobj = ucl_object_new ();
1058         keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1059                         &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1060         if (keylen == -1) {
1061                 ucl_object_unref (nobj);
1062                 return false;
1063         }
1064         else if (keylen == 0) {
1065                 ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1066                 ucl_object_unref (nobj);
1067                 return false;
1068         }
1069
1070         container = parser->stack->obj->value.ov;
1071         nobj->key = key;
1072         nobj->keylen = keylen;
1073         tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj));
1074         if (tobj == NULL) {
1075                 container = ucl_hash_insert_object (container, nobj);
1076                 nobj->prev = nobj;
1077                 nobj->next = NULL;
1078                 parser->stack->obj->len ++;
1079         }
1080         else {
1081                 DL_APPEND (tobj, nobj);
1082         }
1083
1084         if (ucl_escape) {
1085                 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1086         }
1087         parser->stack->obj->value.ov = container;
1088
1089         parser->cur_obj = nobj;
1090
1091         return true;
1092 }
1093
1094 /**
1095  * Parse a cl string
1096  * @param parser
1097  * @param chunk
1098  * @return true if a key has been parsed
1099  */
1100 static bool
1101 ucl_parse_string_value (struct ucl_parser *parser,
1102                 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1103 {
1104         const unsigned char *p;
1105         enum {
1106                 UCL_BRACE_ROUND = 0,
1107                 UCL_BRACE_SQUARE,
1108                 UCL_BRACE_FIGURE
1109         };
1110         int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1111
1112         p = chunk->pos;
1113
1114         while (p < chunk->end) {
1115
1116                 /* Skip pairs of figure braces */
1117                 if (*p == '{') {
1118                         braces[UCL_BRACE_FIGURE][0] ++;
1119                 }
1120                 else if (*p == '}') {
1121                         braces[UCL_BRACE_FIGURE][1] ++;
1122                         if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1123                                 /* This is not a termination symbol, continue */
1124                                 ucl_chunk_skipc (chunk, p);
1125                                 continue;
1126                         }
1127                 }
1128                 /* Skip pairs of square braces */
1129                 else if (*p == '[') {
1130                         braces[UCL_BRACE_SQUARE][0] ++;
1131                 }
1132                 else if (*p == ']') {
1133                         braces[UCL_BRACE_SQUARE][1] ++;
1134                         if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1135                                 /* This is not a termination symbol, continue */
1136                                 ucl_chunk_skipc (chunk, p);
1137                                 continue;
1138                         }
1139                 }
1140                 else if (*p == '$') {
1141                         *var_expand = true;
1142                 }
1143                 else if (*p == '\\') {
1144                         *need_unescape = true;
1145                         ucl_chunk_skipc (chunk, p);
1146                         if (p < chunk->end) {
1147                                 ucl_chunk_skipc (chunk, p);
1148                         }
1149                         continue;
1150                 }
1151
1152                 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1153                         break;
1154                 }
1155                 ucl_chunk_skipc (chunk, p);
1156         }
1157
1158         if (p >= chunk->end) {
1159                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err);
1160                 return false;
1161         }
1162
1163         return true;
1164 }
1165
1166 /**
1167  * Parse multiline string ending with \n{term}\n
1168  * @param parser
1169  * @param chunk
1170  * @param term
1171  * @param term_len
1172  * @return size of multiline string or 0 in case of error
1173  */
1174 static int
1175 ucl_parse_multiline_string (struct ucl_parser *parser,
1176                 struct ucl_chunk *chunk, const unsigned char *term,
1177                 int term_len, unsigned char const **beg,
1178                 bool *var_expand)
1179 {
1180         const unsigned char *p, *c;
1181         bool newline = false;
1182         int len = 0;
1183
1184         p = chunk->pos;
1185
1186         c = p;
1187
1188         while (p < chunk->end) {
1189                 if (newline) {
1190                         if (chunk->end - p < term_len) {
1191                                 return 0;
1192                         }
1193                         else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) {
1194                                 len = p - c;
1195                                 chunk->remain -= term_len;
1196                                 chunk->pos = p + term_len;
1197                                 chunk->column = term_len;
1198                                 *beg = c;
1199                                 break;
1200                         }
1201                 }
1202                 if (*p == '\n') {
1203                         newline = true;
1204                 }
1205                 else {
1206                         if (*p == '$') {
1207                                 *var_expand = true;
1208                         }
1209                         newline = false;
1210                 }
1211                 ucl_chunk_skipc (chunk, p);
1212         }
1213
1214         return len;
1215 }
1216
1217 static ucl_object_t*
1218 ucl_get_value_object (struct ucl_parser *parser)
1219 {
1220         ucl_object_t *t, *obj = NULL;
1221
1222         if (parser->stack->obj->type == UCL_ARRAY) {
1223                 /* Object must be allocated */
1224                 obj = ucl_object_new ();
1225                 t = parser->stack->obj->value.av;
1226                 DL_APPEND (t, obj);
1227                 parser->cur_obj = obj;
1228                 parser->stack->obj->value.av = t;
1229                 parser->stack->obj->len ++;
1230         }
1231         else {
1232                 /* Object has been already allocated */
1233                 obj = parser->cur_obj;
1234         }
1235
1236         return obj;
1237 }
1238
1239 /**
1240  * Handle value data
1241  * @param parser
1242  * @param chunk
1243  * @return
1244  */
1245 static bool
1246 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1247 {
1248         const unsigned char *p, *c;
1249         ucl_object_t *obj = NULL;
1250         unsigned int stripped_spaces;
1251         int str_len;
1252         bool need_unescape = false, ucl_escape = false, var_expand = false;
1253
1254         p = chunk->pos;
1255
1256         /* Skip any spaces and comments */
1257         if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1258                         (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1259                 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1260                         ucl_chunk_skipc (chunk, p);
1261                 }
1262                 if (!ucl_skip_comments (parser)) {
1263                         return false;
1264                 }
1265                 p = chunk->pos;
1266         }
1267
1268         while (p < chunk->end) {
1269                 c = p;
1270                 switch (*p) {
1271                 case '"':
1272                         obj = ucl_get_value_object (parser);
1273                         ucl_chunk_skipc (chunk, p);
1274                         if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1275                                 return false;
1276                         }
1277                         str_len = chunk->pos - c - 2;
1278                         obj->type = UCL_STRING;
1279                         if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1280                                         &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1281                                 return false;
1282                         }
1283                         obj->len = str_len;
1284                         parser->state = UCL_STATE_AFTER_VALUE;
1285                         p = chunk->pos;
1286                         return true;
1287                         break;
1288                 case '{':
1289                         obj = ucl_get_value_object (parser);
1290                         /* We have a new object */
1291                         obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1292                         if (obj == NULL) {
1293                                 return false;
1294                         }
1295
1296                         ucl_chunk_skipc (chunk, p);
1297                         return true;
1298                         break;
1299                 case '[':
1300                         obj = ucl_get_value_object (parser);
1301                         /* We have a new array */
1302                         obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1303                         if (obj == NULL) {
1304                                 return false;
1305                         }
1306
1307                         ucl_chunk_skipc (chunk, p);
1308                         return true;
1309                         break;
1310                 case ']':
1311                         /* We have the array ending */
1312                         if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1313                                 parser->state = UCL_STATE_AFTER_VALUE;
1314                                 return true;
1315                         }
1316                         else {
1317                                 goto parse_string;
1318                         }
1319                         break;
1320                 case '<':
1321                         obj = ucl_get_value_object (parser);
1322                         /* We have something like multiline value, which must be <<[A-Z]+\n */
1323                         if (chunk->end - p > 3) {
1324                                 if (memcmp (p, "<<", 2) == 0) {
1325                                         p += 2;
1326                                         /* We allow only uppercase characters in multiline definitions */
1327                                         while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1328                                                 p ++;
1329                                         }
1330                                         if (*p =='\n') {
1331                                                 /* Set chunk positions and start multiline parsing */
1332                                                 c += 2;
1333                                                 chunk->remain -= p - c;
1334                                                 chunk->pos = p + 1;
1335                                                 chunk->column = 0;
1336                                                 chunk->line ++;
1337                                                 if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1338                                                                 p - c, &c, &var_expand)) == 0) {
1339                                                         ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err);
1340                                                         return false;
1341                                                 }
1342                                                 obj->type = UCL_STRING;
1343                                                 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1344                                                         &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1345                                                         return false;
1346                                                 }
1347                                                 obj->len = str_len;
1348                                                 parser->state = UCL_STATE_AFTER_VALUE;
1349                                                 return true;
1350                                         }
1351                                 }
1352                         }
1353                         /* Fallback to ordinary strings */
1354                 default:
1355 parse_string:
1356                         if (obj == NULL) {
1357                                 obj = ucl_get_value_object (parser);
1358                         }
1359                         /* Parse atom */
1360                         if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1361                                 if (!ucl_lex_number (parser, chunk, obj)) {
1362                                         if (parser->state == UCL_STATE_ERROR) {
1363                                                 return false;
1364                                         }
1365                                 }
1366                                 else {
1367                                         parser->state = UCL_STATE_AFTER_VALUE;
1368                                         return true;
1369                                 }
1370                                 /* Fallback to normal string */
1371                         }
1372
1373                         if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1374                                 return false;
1375                         }
1376                         /* Cut trailing spaces */
1377                         stripped_spaces = 0;
1378                         while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1379                                         UCL_CHARACTER_WHITESPACE)) {
1380                                 stripped_spaces ++;
1381                         }
1382                         str_len = chunk->pos - c - stripped_spaces;
1383                         if (str_len <= 0) {
1384                                 ucl_set_err (chunk, 0, "string value must not be empty", &parser->err);
1385                                 return false;
1386                         }
1387                         else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1388                                 obj->len = 0;
1389                                 obj->type = UCL_NULL;
1390                         }
1391                         else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1392                                 obj->type = UCL_STRING;
1393                                 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1394                                                 &obj->value.sv, str_len, need_unescape,
1395                                                 false, var_expand)) == -1) {
1396                                         return false;
1397                                 }
1398                                 obj->len = str_len;
1399                         }
1400                         parser->state = UCL_STATE_AFTER_VALUE;
1401                         p = chunk->pos;
1402
1403                         return true;
1404                         break;
1405                 }
1406         }
1407
1408         return true;
1409 }
1410
1411 /**
1412  * Handle after value data
1413  * @param parser
1414  * @param chunk
1415  * @return
1416  */
1417 static bool
1418 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1419 {
1420         const unsigned char *p;
1421         bool got_sep = false;
1422         struct ucl_stack *st;
1423
1424         p = chunk->pos;
1425
1426         while (p < chunk->end) {
1427                 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1428                         /* Skip whitespaces */
1429                         ucl_chunk_skipc (chunk, p);
1430                 }
1431                 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1432                         /* Skip comment */
1433                         if (!ucl_skip_comments (parser)) {
1434                                 return false;
1435                         }
1436                         /* Treat comment as a separator */
1437                         got_sep = true;
1438                         p = chunk->pos;
1439                 }
1440                 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1441                         if (*p == '}' || *p == ']') {
1442                                 if (parser->stack == NULL) {
1443                                         ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err);
1444                                         return false;
1445                                 }
1446                                 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1447                                                 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1448
1449                                         /* Pop all nested objects from a stack */
1450                                         st = parser->stack;
1451                                         parser->stack = st->next;
1452                                         UCL_FREE (sizeof (struct ucl_stack), st);
1453
1454                                         while (parser->stack != NULL) {
1455                                                 st = parser->stack;
1456                                                 if (st->next == NULL || st->next->level == st->level) {
1457                                                         break;
1458                                                 }
1459                                                 parser->stack = st->next;
1460                                                 UCL_FREE (sizeof (struct ucl_stack), st);
1461                                         }
1462                                 }
1463                                 else {
1464                                         ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err);
1465                                         return false;
1466                                 }
1467
1468                                 if (parser->stack == NULL) {
1469                                         /* Ignore everything after a top object */
1470                                         return true;
1471                                 }
1472                                 else {
1473                                         ucl_chunk_skipc (chunk, p);
1474                                 }
1475                                 got_sep = true;
1476                         }
1477                         else {
1478                                 /* Got a separator */
1479                                 got_sep = true;
1480                                 ucl_chunk_skipc (chunk, p);
1481                         }
1482                 }
1483                 else {
1484                         /* Anything else */
1485                         if (!got_sep) {
1486                                 ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err);
1487                                 return false;
1488                         }
1489                         return true;
1490                 }
1491         }
1492
1493         return true;
1494 }
1495
1496 /**
1497  * Handle macro data
1498  * @param parser
1499  * @param chunk
1500  * @return
1501  */
1502 static bool
1503 ucl_parse_macro_value (struct ucl_parser *parser,
1504                 struct ucl_chunk *chunk, struct ucl_macro *macro,
1505                 unsigned char const **macro_start, size_t *macro_len)
1506 {
1507         const unsigned char *p, *c;
1508         bool need_unescape = false, ucl_escape = false, var_expand = false;
1509
1510         p = chunk->pos;
1511
1512         switch (*p) {
1513         case '"':
1514                 /* We have macro value encoded in quotes */
1515                 c = p;
1516                 ucl_chunk_skipc (chunk, p);
1517                 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1518                         return false;
1519                 }
1520
1521                 *macro_start = c + 1;
1522                 *macro_len = chunk->pos - c - 2;
1523                 p = chunk->pos;
1524                 break;
1525         case '{':
1526                 /* We got a multiline macro body */
1527                 ucl_chunk_skipc (chunk, p);
1528                 /* Skip spaces at the beginning */
1529                 while (p < chunk->end) {
1530                         if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1531                                 ucl_chunk_skipc (chunk, p);
1532                         }
1533                         else {
1534                                 break;
1535                         }
1536                 }
1537                 c = p;
1538                 while (p < chunk->end) {
1539                         if (*p == '}') {
1540                                 break;
1541                         }
1542                         ucl_chunk_skipc (chunk, p);
1543                 }
1544                 *macro_start = c;
1545                 *macro_len = p - c;
1546                 ucl_chunk_skipc (chunk, p);
1547                 break;
1548         default:
1549                 /* Macro is not enclosed in quotes or braces */
1550                 c = p;
1551                 while (p < chunk->end) {
1552                         if (ucl_lex_is_atom_end (*p)) {
1553                                 break;
1554                         }
1555                         ucl_chunk_skipc (chunk, p);
1556                 }
1557                 *macro_start = c;
1558                 *macro_len = p - c;
1559                 break;
1560         }
1561
1562         /* We are at the end of a macro */
1563         /* Skip ';' and space characters and return to previous state */
1564         while (p < chunk->end) {
1565                 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1566                         break;
1567                 }
1568                 ucl_chunk_skipc (chunk, p);
1569         }
1570         return true;
1571 }
1572
1573 /**
1574  * Handle the main states of rcl parser
1575  * @param parser parser structure
1576  * @param data the pointer to the beginning of a chunk
1577  * @param len the length of a chunk
1578  * @return true if chunk has been parsed and false in case of error
1579  */
1580 static bool
1581 ucl_state_machine (struct ucl_parser *parser)
1582 {
1583         ucl_object_t *obj;
1584         struct ucl_chunk *chunk = parser->chunks;
1585         const unsigned char *p, *c = NULL, *macro_start = NULL;
1586         unsigned char *macro_escaped;
1587         size_t macro_len = 0;
1588         struct ucl_macro *macro = NULL;
1589         bool next_key = false, end_of_object = false;
1590
1591         if (parser->top_obj == NULL) {
1592                 if (*chunk->pos == '[') {
1593                         obj = ucl_add_parser_stack (NULL, parser, true, 0);
1594                 }
1595                 else {
1596                         obj = ucl_add_parser_stack (NULL, parser, false, 0);
1597                 }
1598                 if (obj == NULL) {
1599                         return false;
1600                 }
1601                 parser->top_obj = obj;
1602                 parser->cur_obj = obj;
1603                 parser->state = UCL_STATE_INIT;
1604         }
1605
1606         p = chunk->pos;
1607         while (chunk->pos < chunk->end) {
1608                 switch (parser->state) {
1609                 case UCL_STATE_INIT:
1610                         /*
1611                          * At the init state we can either go to the parse array or object
1612                          * if we got [ or { correspondingly or can just treat new data as
1613                          * a key of newly created object
1614                          */
1615                         obj = parser->cur_obj;
1616                         if (!ucl_skip_comments (parser)) {
1617                                 parser->prev_state = parser->state;
1618                                 parser->state = UCL_STATE_ERROR;
1619                                 return false;
1620                         }
1621                         else {
1622                                 p = chunk->pos;
1623                                 if (*p == '[') {
1624                                         parser->state = UCL_STATE_VALUE;
1625                                         ucl_chunk_skipc (chunk, p);
1626                                 }
1627                                 else {
1628                                         parser->state = UCL_STATE_KEY;
1629                                         if (*p == '{') {
1630                                                 ucl_chunk_skipc (chunk, p);
1631                                         }
1632                                 }
1633                         }
1634                         break;
1635                 case UCL_STATE_KEY:
1636                         /* Skip any spaces */
1637                         while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1638                                 ucl_chunk_skipc (chunk, p);
1639                         }
1640                         if (*p == '}') {
1641                                 /* We have the end of an object */
1642                                 parser->state = UCL_STATE_AFTER_VALUE;
1643                                 continue;
1644                         }
1645                         if (parser->stack == NULL) {
1646                                 /* No objects are on stack, but we want to parse a key */
1647                                 ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser "
1648                                                 "expects a key", &parser->err);
1649                                 parser->prev_state = parser->state;
1650                                 parser->state = UCL_STATE_ERROR;
1651                                 return false;
1652                         }
1653                         if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1654                                 parser->prev_state = parser->state;
1655                                 parser->state = UCL_STATE_ERROR;
1656                                 return false;
1657                         }
1658                         if (end_of_object) {
1659                                 p = chunk->pos;
1660                                 parser->state = UCL_STATE_AFTER_VALUE;
1661                                 continue;
1662                         }
1663                         else if (parser->state != UCL_STATE_MACRO_NAME) {
1664                                 if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1665                                         /* Parse more keys and nest objects accordingly */
1666                                         obj = ucl_add_parser_stack (parser->cur_obj, parser, false,
1667                                                         parser->stack->level + 1);
1668                                         if (obj == NULL) {
1669                                                 return false;
1670                                         }
1671                                 }
1672                                 else {
1673                                         parser->state = UCL_STATE_VALUE;
1674                                 }
1675                         }
1676                         else {
1677                                 c = chunk->pos;
1678                         }
1679                         p = chunk->pos;
1680                         break;
1681                 case UCL_STATE_VALUE:
1682                         /* We need to check what we do have */
1683                         if (!ucl_parse_value (parser, chunk)) {
1684                                 parser->prev_state = parser->state;
1685                                 parser->state = UCL_STATE_ERROR;
1686                                 return false;
1687                         }
1688                         /* State is set in ucl_parse_value call */
1689                         p = chunk->pos;
1690                         break;
1691                 case UCL_STATE_AFTER_VALUE:
1692                         if (!ucl_parse_after_value (parser, chunk)) {
1693                                 parser->prev_state = parser->state;
1694                                 parser->state = UCL_STATE_ERROR;
1695                                 return false;
1696                         }
1697                         if (parser->stack != NULL) {
1698                                 if (parser->stack->obj->type == UCL_OBJECT) {
1699                                         parser->state = UCL_STATE_KEY;
1700                                 }
1701                                 else {
1702                                         /* Array */
1703                                         parser->state = UCL_STATE_VALUE;
1704                                 }
1705                         }
1706                         else {
1707                                 /* Skip everything at the end */
1708                                 return true;
1709                         }
1710                         p = chunk->pos;
1711                         break;
1712                 case UCL_STATE_MACRO_NAME:
1713                         if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1714                                 ucl_chunk_skipc (chunk, p);
1715                         }
1716                         else if (p - c > 0) {
1717                                 /* We got macro name */
1718                                 macro_len = (size_t)(p - c);
1719                                 HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1720                                 if (macro == NULL) {
1721                                         ucl_create_err (&parser->err, "error on line %d at column %d: "
1722                                                         "unknown macro: '%.*s', character: '%c'",
1723                                                                 chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1724                                         parser->state = UCL_STATE_ERROR;
1725                                         return false;
1726                                 }
1727                                 /* Now we need to skip all spaces */
1728                                 while (p < chunk->end) {
1729                                         if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1730                                                 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1731                                                         /* Skip comment */
1732                                                         if (!ucl_skip_comments (parser)) {
1733                                                                 return false;
1734                                                         }
1735                                                         p = chunk->pos;
1736                                                 }
1737                                                 break;
1738                                         }
1739                                         ucl_chunk_skipc (chunk, p);
1740                                 }
1741                                 parser->state = UCL_STATE_MACRO;
1742                         }
1743                         break;
1744                 case UCL_STATE_MACRO:
1745                         if (!ucl_parse_macro_value (parser, chunk, macro,
1746                                         &macro_start, &macro_len)) {
1747                                 parser->prev_state = parser->state;
1748                                 parser->state = UCL_STATE_ERROR;
1749                                 return false;
1750                         }
1751                         macro_len = ucl_expand_variable (parser, &macro_escaped, macro_start, macro_len);
1752                         parser->state = parser->prev_state;
1753                         if (macro_escaped == NULL) {
1754                                 if (!macro->handler (macro_start, macro_len, macro->ud)) {
1755                                         return false;
1756                                 }
1757                         }
1758                         else {
1759                                 if (!macro->handler (macro_escaped, macro_len, macro->ud)) {
1760                                         UCL_FREE (macro_len + 1, macro_escaped);
1761                                         return false;
1762                                 }
1763                                 UCL_FREE (macro_len + 1, macro_escaped);
1764                         }
1765                         p = chunk->pos;
1766                         break;
1767                 default:
1768                         /* TODO: add all states */
1769                         ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err);
1770                         parser->state = UCL_STATE_ERROR;
1771                         return false;
1772                 }
1773         }
1774
1775         return true;
1776 }
1777
1778 struct ucl_parser*
1779 ucl_parser_new (int flags)
1780 {
1781         struct ucl_parser *new;
1782
1783         new = UCL_ALLOC (sizeof (struct ucl_parser));
1784         if (new == NULL) {
1785                 return NULL;
1786         }
1787         memset (new, 0, sizeof (struct ucl_parser));
1788
1789         ucl_parser_register_macro (new, "include", ucl_include_handler, new);
1790         ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
1791         ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
1792
1793         new->flags = flags;
1794
1795         /* Initial assumption about filevars */
1796         ucl_parser_set_filevars (new, NULL, false);
1797
1798         return new;
1799 }
1800
1801
1802 void
1803 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
1804                 ucl_macro_handler handler, void* ud)
1805 {
1806         struct ucl_macro *new;
1807
1808         if (macro == NULL || handler == NULL) {
1809                 return;
1810         }
1811         new = UCL_ALLOC (sizeof (struct ucl_macro));
1812         if (new == NULL) {
1813                 return;
1814         }
1815         memset (new, 0, sizeof (struct ucl_macro));
1816         new->handler = handler;
1817         new->name = strdup (macro);
1818         new->ud = ud;
1819         HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
1820 }
1821
1822 void
1823 ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
1824                 const char *value)
1825 {
1826         struct ucl_variable *new = NULL, *cur;
1827
1828         if (var == NULL) {
1829                 return;
1830         }
1831
1832         /* Find whether a variable already exists */
1833         LL_FOREACH (parser->variables, cur) {
1834                 if (strcmp (cur->var, var) == 0) {
1835                         new = cur;
1836                         break;
1837                 }
1838         }
1839
1840         if (value == NULL) {
1841
1842                 if (new != NULL) {
1843                         /* Remove variable */
1844                         LL_DELETE (parser->variables, new);
1845                         free (new->var);
1846                         free (new->value);
1847                         UCL_FREE (sizeof (struct ucl_variable), new);
1848                 }
1849                 else {
1850                         /* Do nothing */
1851                         return;
1852                 }
1853         }
1854         else {
1855                 if (new == NULL) {
1856                         new = UCL_ALLOC (sizeof (struct ucl_variable));
1857                         if (new == NULL) {
1858                                 return;
1859                         }
1860                         memset (new, 0, sizeof (struct ucl_variable));
1861                         new->var = strdup (var);
1862                         new->var_len = strlen (var);
1863                         new->value = strdup (value);
1864                         new->value_len = strlen (value);
1865
1866                         LL_PREPEND (parser->variables, new);
1867                 }
1868                 else {
1869                         free (new->value);
1870                         new->value = strdup (value);
1871                         new->value_len = strlen (value);
1872                 }
1873         }
1874 }
1875
1876 bool
1877 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
1878                 size_t len)
1879 {
1880         struct ucl_chunk *chunk;
1881
1882         if (data == NULL || len == 0) {
1883                 ucl_create_err (&parser->err, "invalid chunk added");
1884                 return false;
1885         }
1886         if (parser->state != UCL_STATE_ERROR) {
1887                 chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
1888                 if (chunk == NULL) {
1889                         ucl_create_err (&parser->err, "cannot allocate chunk structure");
1890                         return false;
1891                 }
1892                 chunk->begin = data;
1893                 chunk->remain = len;
1894                 chunk->pos = chunk->begin;
1895                 chunk->end = chunk->begin + len;
1896                 chunk->line = 1;
1897                 chunk->column = 0;
1898                 LL_PREPEND (parser->chunks, chunk);
1899                 parser->recursion ++;
1900                 if (parser->recursion > UCL_MAX_RECURSION) {
1901                         ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
1902                                         parser->recursion);
1903                         return false;
1904                 }
1905                 return ucl_state_machine (parser);
1906         }
1907
1908         ucl_create_err (&parser->err, "a parser is in an invalid state");
1909
1910         return false;
1911 }
1912
1913 bool
1914 ucl_parser_add_string (struct ucl_parser *parser, const char *data,
1915                 size_t len)
1916 {
1917         if (data == NULL) {
1918                 ucl_create_err (&parser->err, "invalid string added");
1919                 return false;
1920         }
1921         if (len == 0) {
1922                 len = strlen (data);
1923         }
1924
1925         return ucl_parser_add_chunk (parser, (const unsigned char *)data, len);
1926 }