]> CyberLeo.Net >> Repos - FreeBSD/releng/9.3.git/blob - contrib/libucl/src/ucl_parser.c
Copy stable/9 to releng/9.3 as part of the 9.3-RELEASE cycle.
[FreeBSD/releng/9.3.git] / contrib / libucl / src / ucl_parser.c
1 /* Copyright (c) 2013, Vsevolod Stakhov
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *       * Redistributions of source code must retain the above copyright
7  *         notice, this list of conditions and the following disclaimer.
8  *       * Redistributions in binary form must reproduce the above copyright
9  *         notice, this list of conditions and the following disclaimer in the
10  *         documentation and/or other materials provided with the distribution.
11  *
12  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15  * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22  */
23
24 #include "ucl.h"
25 #include "ucl_internal.h"
26 #include "ucl_chartable.h"
27
28 /**
29  * @file rcl_parser.c
30  * The implementation of rcl parser
31  */
32
33 struct ucl_parser_saved_state {
34         unsigned int line;
35         unsigned int column;
36         size_t remain;
37         const unsigned char *pos;
38 };
39
40 /**
41  * Move up to len characters
42  * @param parser
43  * @param begin
44  * @param len
45  * @return new position in chunk
46  */
47 #define ucl_chunk_skipc(chunk, p)    do{                                        \
48     if (*(p) == '\n') {                                                                         \
49         (chunk)->line ++;                                                                       \
50         (chunk)->column = 0;                                                            \
51     }                                                                                                           \
52     else (chunk)->column ++;                                                            \
53     (p++);                                                                                                      \
54     (chunk)->pos ++;                                                                            \
55     (chunk)->remain --;                                                                         \
56     } while (0)
57
58 /**
59  * Save parser state
60  * @param chunk
61  * @param s
62  */
63 static inline void
64 ucl_chunk_save_state (struct ucl_chunk *chunk, struct ucl_parser_saved_state *s)
65 {
66         s->column = chunk->column;
67         s->pos = chunk->pos;
68         s->line = chunk->line;
69         s->remain = chunk->remain;
70 }
71
72 /**
73  * Restore parser state
74  * @param chunk
75  * @param s
76  */
77 static inline void
78 ucl_chunk_restore_state (struct ucl_chunk *chunk, struct ucl_parser_saved_state *s)
79 {
80         chunk->column = s->column;
81         chunk->pos = s->pos;
82         chunk->line = s->line;
83         chunk->remain = s->remain;
84 }
85
86 static inline void
87 ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err)
88 {
89         if (chunk->pos < chunk->end) {
90                 if (isgraph (*chunk->pos)) {
91                         ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'",
92                                         chunk->line, chunk->column, str, *chunk->pos);
93                 }
94                 else {
95                         ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'",
96                                         chunk->line, chunk->column, str, (int)*chunk->pos);
97                 }
98         }
99         else {
100                 ucl_create_err (err, "error at the end of chunk: %s", str);
101         }
102 }
103
104 /**
105  * Skip all comments from the current pos resolving nested and multiline comments
106  * @param parser
107  * @return
108  */
109 static bool
110 ucl_skip_comments (struct ucl_parser *parser)
111 {
112         struct ucl_chunk *chunk = parser->chunks;
113         const unsigned char *p;
114         int comments_nested = 0;
115
116         p = chunk->pos;
117
118 start:
119         if (*p == '#') {
120                 if (parser->state != UCL_STATE_SCOMMENT &&
121                                 parser->state != UCL_STATE_MCOMMENT) {
122                         while (p < chunk->end) {
123                                 if (*p == '\n') {
124                                         ucl_chunk_skipc (chunk, p);
125                                         goto start;
126                                 }
127                                 ucl_chunk_skipc (chunk, p);
128                         }
129                 }
130         }
131         else if (*p == '/' && chunk->remain >= 2) {
132                 if (p[1] == '*') {
133                         ucl_chunk_skipc (chunk, p);
134                         comments_nested ++;
135                         ucl_chunk_skipc (chunk, p);
136
137                         while (p < chunk->end) {
138                                 if (*p == '*') {
139                                         ucl_chunk_skipc (chunk, p);
140                                         if (*p == '/') {
141                                                 comments_nested --;
142                                                 if (comments_nested == 0) {
143                                                         ucl_chunk_skipc (chunk, p);
144                                                         goto start;
145                                                 }
146                                         }
147                                         ucl_chunk_skipc (chunk, p);
148                                 }
149                                 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
150                                         comments_nested ++;
151                                         ucl_chunk_skipc (chunk, p);
152                                         ucl_chunk_skipc (chunk, p);
153                                         continue;
154                                 }
155                                 ucl_chunk_skipc (chunk, p);
156                         }
157                         if (comments_nested != 0) {
158                                 ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err);
159                                 return false;
160                         }
161                 }
162         }
163
164         return true;
165 }
166
167 /**
168  * Return multiplier for a character
169  * @param c multiplier character
170  * @param is_bytes if true use 1024 multiplier
171  * @return multiplier
172  */
173 static inline unsigned long
174 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
175         const struct {
176                 char c;
177                 long mult_normal;
178                 long mult_bytes;
179         } multipliers[] = {
180                         {'m', 1000 * 1000, 1024 * 1024},
181                         {'k', 1000, 1024},
182                         {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
183         };
184         int i;
185
186         for (i = 0; i < 3; i ++) {
187                 if (tolower (c) == multipliers[i].c) {
188                         if (is_bytes) {
189                                 return multipliers[i].mult_bytes;
190                         }
191                         return multipliers[i].mult_normal;
192                 }
193         }
194
195         return 1;
196 }
197
198
199 /**
200  * Return multiplier for time scaling
201  * @param c
202  * @return
203  */
204 static inline double
205 ucl_lex_time_multiplier (const unsigned char c) {
206         const struct {
207                 char c;
208                 double mult;
209         } multipliers[] = {
210                         {'m', 60},
211                         {'h', 60 * 60},
212                         {'d', 60 * 60 * 24},
213                         {'w', 60 * 60 * 24 * 7},
214                         {'y', 60 * 60 * 24 * 7 * 365}
215         };
216         int i;
217
218         for (i = 0; i < 5; i ++) {
219                 if (tolower (c) == multipliers[i].c) {
220                         return multipliers[i].mult;
221                 }
222         }
223
224         return 1;
225 }
226
227 /**
228  * Return true if a character is a end of an atom
229  * @param c
230  * @return
231  */
232 static inline bool
233 ucl_lex_is_atom_end (const unsigned char c)
234 {
235         return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
236 }
237
238 static inline bool
239 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
240 {
241         if (c1 == '/') {
242                 if (c2 == '*') {
243                         return true;
244                 }
245         }
246         else if (c1 == '#') {
247                 return true;
248         }
249         return false;
250 }
251
252 /**
253  * Check variable found
254  * @param parser
255  * @param ptr
256  * @param remain
257  * @param out_len
258  * @param strict
259  * @param found
260  * @return
261  */
262 static inline const char *
263 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
264                 size_t *out_len, bool strict, bool *found)
265 {
266         struct ucl_variable *var;
267
268         LL_FOREACH (parser->variables, var) {
269                 if (strict) {
270                         if (remain == var->var_len) {
271                                 if (memcmp (ptr, var->var, var->var_len) == 0) {
272                                         *out_len += var->value_len;
273                                         *found = true;
274                                         return (ptr + var->var_len);
275                                 }
276                         }
277                 }
278                 else {
279                         if (remain >= var->var_len) {
280                                 if (memcmp (ptr, var->var, var->var_len) == 0) {
281                                         *out_len += var->value_len;
282                                         *found = true;
283                                         return (ptr + var->var_len);
284                                 }
285                         }
286                 }
287         }
288
289         return ptr;
290 }
291
292 /**
293  * Check for a variable in a given string
294  * @param parser
295  * @param ptr
296  * @param remain
297  * @param out_len
298  * @param vars_found
299  * @return
300  */
301 static const char *
302 ucl_check_variable (struct ucl_parser *parser, const char *ptr, size_t remain, size_t *out_len, bool *vars_found)
303 {
304         const char *p, *end, *ret = ptr;
305         bool found = false;
306
307         if (*ptr == '{') {
308                 /* We need to match the variable enclosed in braces */
309                 p = ptr + 1;
310                 end = ptr + remain;
311                 while (p < end) {
312                         if (*p == '}') {
313                                 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, out_len, true, &found);
314                                 if (found) {
315                                         /* {} must be excluded actually */
316                                         ret ++;
317                                         if (!*vars_found) {
318                                                 *vars_found = true;
319                                         }
320                                 }
321                                 else {
322                                         *out_len += 2;
323                                 }
324                                 break;
325                         }
326                         p ++;
327                 }
328         }
329         else if (*ptr != '$') {
330                 /* Not count escaped dollar sign */
331                 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
332                 if (found && !*vars_found) {
333                         *vars_found = true;
334                 }
335                 if (!found) {
336                         (*out_len) ++;
337                 }
338         }
339         else {
340                 ret ++;
341                 (*out_len) ++;
342         }
343
344         return ret;
345 }
346
347 /**
348  * Expand a single variable
349  * @param parser
350  * @param ptr
351  * @param remain
352  * @param dest
353  * @return
354  */
355 static const char *
356 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
357                 size_t remain, unsigned char **dest)
358 {
359         unsigned char *d = *dest;
360         const char *p = ptr + 1, *ret;
361         struct ucl_variable *var;
362         bool found = false;
363
364         ret = ptr + 1;
365         remain --;
366
367         if (*p == '$') {
368                 *d++ = *p++;
369                 *dest = d;
370                 return p;
371         }
372         else if (*p == '{') {
373                 p ++;
374                 ret += 2;
375                 remain -= 2;
376         }
377
378         LL_FOREACH (parser->variables, var) {
379                 if (remain >= var->var_len) {
380                         if (memcmp (p, var->var, var->var_len) == 0) {
381                                 memcpy (d, var->value, var->value_len);
382                                 ret += var->var_len;
383                                 d += var->value_len;
384                                 found = true;
385                                 break;
386                         }
387                 }
388         }
389         if (!found) {
390                 memcpy (d, ptr, 2);
391                 d += 2;
392                 ret --;
393         }
394
395         *dest = d;
396         return ret;
397 }
398
399 /**
400  * Expand variables in string
401  * @param parser
402  * @param dst
403  * @param src
404  * @param in_len
405  * @return
406  */
407 static ssize_t
408 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
409                 const char *src, size_t in_len)
410 {
411         const char *p, *end = src + in_len;
412         unsigned char *d;
413         size_t out_len = 0;
414         bool vars_found = false;
415
416         p = src;
417         while (p != end) {
418                 if (*p == '$') {
419                         p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
420                 }
421                 else {
422                         p ++;
423                         out_len ++;
424                 }
425         }
426
427         if (!vars_found) {
428                 /* Trivial case */
429                 *dst = NULL;
430                 return in_len;
431         }
432
433         *dst = UCL_ALLOC (out_len + 1);
434         if (*dst == NULL) {
435                 return in_len;
436         }
437
438         d = *dst;
439         p = src;
440         while (p != end) {
441                 if (*p == '$') {
442                         p = ucl_expand_single_variable (parser, p, end - p, &d);
443                 }
444                 else {
445                         *d++ = *p++;
446                 }
447         }
448
449         *d = '\0';
450
451         return out_len;
452 }
453
454 /**
455  * Store or copy pointer to the trash stack
456  * @param parser parser object
457  * @param src src string
458  * @param dst destination buffer (trash stack pointer)
459  * @param dst_const const destination pointer (e.g. value of object)
460  * @param in_len input length
461  * @param need_unescape need to unescape source (and copy it)
462  * @param need_lowercase need to lowercase value (and copy)
463  * @param need_expand need to expand variables (and copy as well)
464  * @return output length (excluding \0 symbol)
465  */
466 static inline ssize_t
467 ucl_copy_or_store_ptr (struct ucl_parser *parser,
468                 const unsigned char *src, unsigned char **dst,
469                 const char **dst_const, size_t in_len,
470                 bool need_unescape, bool need_lowercase, bool need_expand)
471 {
472         ssize_t ret = -1, tret;
473         unsigned char *tmp;
474
475         if (need_unescape || need_lowercase ||
476                         (need_expand && parser->variables != NULL) ||
477                         !(parser->flags & UCL_PARSER_ZEROCOPY)) {
478                 /* Copy string */
479                 *dst = UCL_ALLOC (in_len + 1);
480                 if (*dst == NULL) {
481                         ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err);
482                         return false;
483                 }
484                 if (need_lowercase) {
485                         ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
486                 }
487                 else {
488                         ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
489                 }
490
491                 if (need_unescape) {
492                         ret = ucl_unescape_json_string (*dst, ret);
493                 }
494                 if (need_expand) {
495                         tmp = *dst;
496                         tret = ret;
497                         ret = ucl_expand_variable (parser, dst, tmp, ret);
498                         if (*dst == NULL) {
499                                 /* Nothing to expand */
500                                 *dst = tmp;
501                                 ret = tret;
502                         }
503                 }
504                 *dst_const = *dst;
505         }
506         else {
507                 *dst_const = src;
508                 ret = in_len;
509         }
510
511         return ret;
512 }
513
514 /**
515  * Create and append an object at the specified level
516  * @param parser
517  * @param is_array
518  * @param level
519  * @return
520  */
521 static inline ucl_object_t *
522 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
523 {
524         struct ucl_stack *st;
525
526         if (!is_array) {
527                 if (obj == NULL) {
528                         obj = ucl_object_typed_new (UCL_OBJECT);
529                 }
530                 else {
531                         obj->type = UCL_OBJECT;
532                 }
533                 obj->value.ov = ucl_hash_create ();
534                 parser->state = UCL_STATE_KEY;
535         }
536         else {
537                 if (obj == NULL) {
538                         obj = ucl_object_typed_new (UCL_ARRAY);
539                 }
540                 else {
541                         obj->type = UCL_ARRAY;
542                 }
543                 parser->state = UCL_STATE_VALUE;
544         }
545
546         st = UCL_ALLOC (sizeof (struct ucl_stack));
547         st->obj = obj;
548         st->level = level;
549         LL_PREPEND (parser->stack, st);
550         parser->cur_obj = obj;
551
552         return obj;
553 }
554
555 int
556 ucl_maybe_parse_number (ucl_object_t *obj,
557                 const char *start, const char *end, const char **pos, bool allow_double, bool number_bytes)
558 {
559         const char *p = start, *c = start;
560         char *endptr;
561         bool got_dot = false, got_exp = false, need_double = false,
562                         is_date = false, valid_start = false, is_hex = false,
563                         is_neg = false;
564         double dv = 0;
565         int64_t lv = 0;
566
567         if (*p == '-') {
568                 is_neg = true;
569                 c ++;
570                 p ++;
571         }
572         while (p < end) {
573                 if (is_hex && isxdigit (*p)) {
574                         p ++;
575                 }
576                 else if (isdigit (*p)) {
577                         valid_start = true;
578                         p ++;
579                 }
580                 else if (!is_hex && (*p == 'x' || *p == 'X')) {
581                         is_hex = true;
582                         allow_double = false;
583                         c = p + 1;
584                 }
585                 else if (allow_double) {
586                         if (p == c) {
587                                 /* Empty digits sequence, not a number */
588                                 *pos = start;
589                                 return EINVAL;
590                         }
591                         else if (*p == '.') {
592                                 if (got_dot) {
593                                         /* Double dots, not a number */
594                                         *pos = start;
595                                         return EINVAL;
596                                 }
597                                 else {
598                                         got_dot = true;
599                                         need_double = true;
600                                         p ++;
601                                 }
602                         }
603                         else if (*p == 'e' || *p == 'E') {
604                                 if (got_exp) {
605                                         /* Double exp, not a number */
606                                         *pos = start;
607                                         return EINVAL;
608                                 }
609                                 else {
610                                         got_exp = true;
611                                         need_double = true;
612                                         p ++;
613                                         if (p >= end) {
614                                                 *pos = start;
615                                                 return EINVAL;
616                                         }
617                                         if (!isdigit (*p) && *p != '+' && *p != '-') {
618                                                 /* Wrong exponent sign */
619                                                 *pos = start;
620                                                 return EINVAL;
621                                         }
622                                         else {
623                                                 p ++;
624                                         }
625                                 }
626                         }
627                         else {
628                                 /* Got the end of the number, need to check */
629                                 break;
630                         }
631                 }
632                 else {
633                         break;
634                 }
635         }
636
637         if (!valid_start) {
638                 *pos = start;
639                 return EINVAL;
640         }
641
642         errno = 0;
643         if (need_double) {
644                 dv = strtod (c, &endptr);
645         }
646         else {
647                 if (is_hex) {
648                         lv = strtoimax (c, &endptr, 16);
649                 }
650                 else {
651                         lv = strtoimax (c, &endptr, 10);
652                 }
653         }
654         if (errno == ERANGE) {
655                 *pos = start;
656                 return ERANGE;
657         }
658
659         /* Now check endptr */
660         if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') {
661                 p = endptr;
662                 goto set_obj;
663         }
664
665         if (endptr < end && endptr != start) {
666                 p = endptr;
667                 switch (*p) {
668                 case 'm':
669                 case 'M':
670                 case 'g':
671                 case 'G':
672                 case 'k':
673                 case 'K':
674                         if (end - p >= 2) {
675                                 if (p[1] == 's' || p[1] == 'S') {
676                                         /* Milliseconds */
677                                         if (!need_double) {
678                                                 need_double = true;
679                                                 dv = lv;
680                                         }
681                                         is_date = true;
682                                         if (p[0] == 'm' || p[0] == 'M') {
683                                                 dv /= 1000.;
684                                         }
685                                         else {
686                                                 dv *= ucl_lex_num_multiplier (*p, false);
687                                         }
688                                         p += 2;
689                                         goto set_obj;
690                                 }
691                                 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
692                                         /* Bytes */
693                                         if (need_double) {
694                                                 need_double = false;
695                                                 lv = dv;
696                                         }
697                                         lv *= ucl_lex_num_multiplier (*p, true);
698                                         p += 2;
699                                         goto set_obj;
700                                 }
701                                 else if (ucl_lex_is_atom_end (p[1])) {
702                                         if (need_double) {
703                                                 dv *= ucl_lex_num_multiplier (*p, false);
704                                         }
705                                         else {
706                                                 lv *= ucl_lex_num_multiplier (*p, number_bytes);
707                                         }
708                                         p ++;
709                                         goto set_obj;
710                                 }
711                                 else if (end - p >= 3) {
712                                         if (tolower (p[0]) == 'm' &&
713                                                         tolower (p[1]) == 'i' &&
714                                                         tolower (p[2]) == 'n') {
715                                                 /* Minutes */
716                                                 if (!need_double) {
717                                                         need_double = true;
718                                                         dv = lv;
719                                                 }
720                                                 is_date = true;
721                                                 dv *= 60.;
722                                                 p += 3;
723                                                 goto set_obj;
724                                         }
725                                 }
726                         }
727                         else {
728                                 if (need_double) {
729                                         dv *= ucl_lex_num_multiplier (*p, false);
730                                 }
731                                 else {
732                                         lv *= ucl_lex_num_multiplier (*p, number_bytes);
733                                 }
734                                 p ++;
735                                 goto set_obj;
736                         }
737                         break;
738                 case 'S':
739                 case 's':
740                         if (p == end - 1 || ucl_lex_is_atom_end (p[1])) {
741                                 if (!need_double) {
742                                         need_double = true;
743                                         dv = lv;
744                                 }
745                                 p ++;
746                                 is_date = true;
747                                 goto set_obj;
748                         }
749                         break;
750                 case 'h':
751                 case 'H':
752                 case 'd':
753                 case 'D':
754                 case 'w':
755                 case 'W':
756                 case 'Y':
757                 case 'y':
758                         if (p == end - 1 || ucl_lex_is_atom_end (p[1])) {
759                                 if (!need_double) {
760                                         need_double = true;
761                                         dv = lv;
762                                 }
763                                 is_date = true;
764                                 dv *= ucl_lex_time_multiplier (*p);
765                                 p ++;
766                                 goto set_obj;
767                         }
768                         break;
769                 }
770         }
771
772         *pos = c;
773         return EINVAL;
774
775         set_obj:
776         if (allow_double && (need_double || is_date)) {
777                 if (!is_date) {
778                         obj->type = UCL_FLOAT;
779                 }
780                 else {
781                         obj->type = UCL_TIME;
782                 }
783                 obj->value.dv = is_neg ? (-dv) : dv;
784         }
785         else {
786                 obj->type = UCL_INT;
787                 obj->value.iv = is_neg ? (-lv) : lv;
788         }
789         *pos = p;
790         return 0;
791 }
792
793 /**
794  * Parse possible number
795  * @param parser
796  * @param chunk
797  * @return true if a number has been parsed
798  */
799 static bool
800 ucl_lex_number (struct ucl_parser *parser,
801                 struct ucl_chunk *chunk, ucl_object_t *obj)
802 {
803         const unsigned char *pos;
804         int ret;
805
806         ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos, true, false);
807
808         if (ret == 0) {
809                 chunk->remain -= pos - chunk->pos;
810                 chunk->column += pos - chunk->pos;
811                 chunk->pos = pos;
812                 return true;
813         }
814         else if (ret == ERANGE) {
815                 ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err);
816         }
817
818         return false;
819 }
820
821 /**
822  * Parse quoted string with possible escapes
823  * @param parser
824  * @param chunk
825  * @return true if a string has been parsed
826  */
827 static bool
828 ucl_lex_json_string (struct ucl_parser *parser,
829                 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
830 {
831         const unsigned char *p = chunk->pos;
832         unsigned char c;
833         int i;
834
835         while (p < chunk->end) {
836                 c = *p;
837                 if (c < 0x1F) {
838                         /* Unmasked control character */
839                         if (c == '\n') {
840                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err);
841                         }
842                         else {
843                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err);
844                         }
845                         return false;
846                 }
847                 else if (c == '\\') {
848                         ucl_chunk_skipc (chunk, p);
849                         c = *p;
850                         if (p >= chunk->end) {
851                                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
852                                 return false;
853                         }
854                         else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
855                                 if (c == 'u') {
856                                         ucl_chunk_skipc (chunk, p);
857                                         for (i = 0; i < 4 && p < chunk->end; i ++) {
858                                                 if (!isxdigit (*p)) {
859                                                         ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err);
860                                                         return false;
861                                                 }
862                                                 ucl_chunk_skipc (chunk, p);
863                                         }
864                                         if (p >= chunk->end) {
865                                                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
866                                                 return false;
867                                         }
868                                 }
869                                 else {
870                                         ucl_chunk_skipc (chunk, p);
871                                 }
872                         }
873                         *need_unescape = true;
874                         *ucl_escape = true;
875                         continue;
876                 }
877                 else if (c == '"') {
878                         ucl_chunk_skipc (chunk, p);
879                         return true;
880                 }
881                 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
882                         *ucl_escape = true;
883                 }
884                 else if (c == '$') {
885                         *var_expand = true;
886                 }
887                 ucl_chunk_skipc (chunk, p);
888         }
889
890         ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err);
891         return false;
892 }
893
894 /**
895  * Parse a key in an object
896  * @param parser
897  * @param chunk
898  * @return true if a key has been parsed
899  */
900 static bool
901 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
902 {
903         const unsigned char *p, *c = NULL, *end, *t;
904         const char *key = NULL;
905         bool got_quote = false, got_eq = false, got_semicolon = false,
906                         need_unescape = false, ucl_escape = false, var_expand = false,
907                         got_content = false, got_sep = false;
908         ucl_object_t *nobj, *tobj;
909         ucl_hash_t *container;
910         ssize_t keylen;
911
912         p = chunk->pos;
913
914         if (*p == '.') {
915                 /* It is macro actually */
916                 ucl_chunk_skipc (chunk, p);
917                 parser->prev_state = parser->state;
918                 parser->state = UCL_STATE_MACRO_NAME;
919                 return true;
920         }
921         while (p < chunk->end) {
922                 /*
923                  * A key must start with alpha, number, '/' or '_' and end with space character
924                  */
925                 if (c == NULL) {
926                         if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
927                                 if (!ucl_skip_comments (parser)) {
928                                         return false;
929                                 }
930                                 p = chunk->pos;
931                         }
932                         else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
933                                 ucl_chunk_skipc (chunk, p);
934                         }
935                         else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
936                                 /* The first symbol */
937                                 c = p;
938                                 ucl_chunk_skipc (chunk, p);
939                                 got_content = true;
940                         }
941                         else if (*p == '"') {
942                                 /* JSON style key */
943                                 c = p + 1;
944                                 got_quote = true;
945                                 got_content = true;
946                                 ucl_chunk_skipc (chunk, p);
947                         }
948                         else if (*p == '}') {
949                                 /* We have actually end of an object */
950                                 *end_of_object = true;
951                                 return true;
952                         }
953                         else if (*p == '.') {
954                                 ucl_chunk_skipc (chunk, p);
955                                 parser->prev_state = parser->state;
956                                 parser->state = UCL_STATE_MACRO_NAME;
957                                 return true;
958                         }
959                         else {
960                                 /* Invalid identifier */
961                                 ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err);
962                                 return false;
963                         }
964                 }
965                 else {
966                         /* Parse the body of a key */
967                         if (!got_quote) {
968                                 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
969                                         got_content = true;
970                                         ucl_chunk_skipc (chunk, p);
971                                 }
972                                 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
973                                         end = p;
974                                         break;
975                                 }
976                                 else {
977                                         ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err);
978                                         return false;
979                                 }
980                         }
981                         else {
982                                 /* We need to parse json like quoted string */
983                                 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
984                                         return false;
985                                 }
986                                 /* Always escape keys obtained via json */
987                                 end = chunk->pos - 1;
988                                 p = chunk->pos;
989                                 break;
990                         }
991                 }
992         }
993
994         if (p >= chunk->end && got_content) {
995                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
996                 return false;
997         }
998         else if (!got_content) {
999                 return true;
1000         }
1001         *end_of_object = false;
1002         /* We are now at the end of the key, need to parse the rest */
1003         while (p < chunk->end) {
1004                 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1005                         ucl_chunk_skipc (chunk, p);
1006                 }
1007                 else if (*p == '=') {
1008                         if (!got_eq && !got_semicolon) {
1009                                 ucl_chunk_skipc (chunk, p);
1010                                 got_eq = true;
1011                         }
1012                         else {
1013                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err);
1014                                 return false;
1015                         }
1016                 }
1017                 else if (*p == ':') {
1018                         if (!got_eq && !got_semicolon) {
1019                                 ucl_chunk_skipc (chunk, p);
1020                                 got_semicolon = true;
1021                         }
1022                         else {
1023                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err);
1024                                 return false;
1025                         }
1026                 }
1027                 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1028                         /* Check for comment */
1029                         if (!ucl_skip_comments (parser)) {
1030                                 return false;
1031                         }
1032                         p = chunk->pos;
1033                 }
1034                 else {
1035                         /* Start value */
1036                         break;
1037                 }
1038         }
1039
1040         if (p >= chunk->end && got_content) {
1041                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1042                 return false;
1043         }
1044
1045         got_sep = got_semicolon || got_eq;
1046
1047         if (!got_sep) {
1048                 /*
1049                  * Maybe we have more keys nested, so search for termination character.
1050                  * Possible choices:
1051                  * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1052                  * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1053                  * 3) key1 value[;,\n] <- we treat that as linear object
1054                  */
1055                 t = p;
1056                 *next_key = false;
1057                 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1058                         t ++;
1059                 }
1060                 /* Check first non-space character after a key */
1061                 if (*t != '{' && *t != '[') {
1062                         while (t < chunk->end) {
1063                                 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1064                                         break;
1065                                 }
1066                                 else if (*t == '{' || *t == '[') {
1067                                         *next_key = true;
1068                                         break;
1069                                 }
1070                                 t ++;
1071                         }
1072                 }
1073         }
1074
1075         /* Create a new object */
1076         nobj = ucl_object_new ();
1077         keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1078                         &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1079         if (keylen == -1) {
1080                 ucl_object_free(nobj);
1081                 return false;
1082         }
1083         else if (keylen == 0) {
1084                 ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1085                 ucl_object_free(nobj);
1086                 return false;
1087         }
1088
1089         container = parser->stack->obj->value.ov;
1090         nobj->key = key;
1091         nobj->keylen = keylen;
1092         tobj = ucl_hash_search_obj (container, nobj);
1093         if (tobj == NULL) {
1094                 container = ucl_hash_insert_object (container, nobj);
1095                 nobj->prev = nobj;
1096                 nobj->next = NULL;
1097                 parser->stack->obj->len ++;
1098         }
1099         else {
1100                 DL_APPEND (tobj, nobj);
1101         }
1102
1103         if (ucl_escape) {
1104                 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1105         }
1106         parser->stack->obj->value.ov = container;
1107
1108         parser->cur_obj = nobj;
1109
1110         return true;
1111 }
1112
1113 /**
1114  * Parse a cl string
1115  * @param parser
1116  * @param chunk
1117  * @return true if a key has been parsed
1118  */
1119 static bool
1120 ucl_parse_string_value (struct ucl_parser *parser,
1121                 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1122 {
1123         const unsigned char *p;
1124         enum {
1125                 UCL_BRACE_ROUND = 0,
1126                 UCL_BRACE_SQUARE,
1127                 UCL_BRACE_FIGURE
1128         };
1129         int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1130
1131         p = chunk->pos;
1132
1133         while (p < chunk->end) {
1134
1135                 /* Skip pairs of figure braces */
1136                 if (*p == '{') {
1137                         braces[UCL_BRACE_FIGURE][0] ++;
1138                 }
1139                 else if (*p == '}') {
1140                         braces[UCL_BRACE_FIGURE][1] ++;
1141                         if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1142                                 /* This is not a termination symbol, continue */
1143                                 ucl_chunk_skipc (chunk, p);
1144                                 continue;
1145                         }
1146                 }
1147                 /* Skip pairs of square braces */
1148                 else if (*p == '[') {
1149                         braces[UCL_BRACE_SQUARE][0] ++;
1150                 }
1151                 else if (*p == ']') {
1152                         braces[UCL_BRACE_SQUARE][1] ++;
1153                         if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1154                                 /* This is not a termination symbol, continue */
1155                                 ucl_chunk_skipc (chunk, p);
1156                                 continue;
1157                         }
1158                 }
1159                 else if (*p == '$') {
1160                         *var_expand = true;
1161                 }
1162                 else if (*p == '\\') {
1163                         *need_unescape = true;
1164                         ucl_chunk_skipc (chunk, p);
1165                         if (p < chunk->end) {
1166                                 ucl_chunk_skipc (chunk, p);
1167                         }
1168                         continue;
1169                 }
1170
1171                 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1172                         break;
1173                 }
1174                 ucl_chunk_skipc (chunk, p);
1175         }
1176
1177         if (p >= chunk->end) {
1178                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err);
1179                 return false;
1180         }
1181
1182         return true;
1183 }
1184
1185 /**
1186  * Parse multiline string ending with \n{term}\n
1187  * @param parser
1188  * @param chunk
1189  * @param term
1190  * @param term_len
1191  * @return size of multiline string or 0 in case of error
1192  */
1193 static int
1194 ucl_parse_multiline_string (struct ucl_parser *parser,
1195                 struct ucl_chunk *chunk, const unsigned char *term,
1196                 int term_len, unsigned char const **beg,
1197                 bool *var_expand)
1198 {
1199         const unsigned char *p, *c;
1200         bool newline = false;
1201         int len = 0;
1202
1203         p = chunk->pos;
1204
1205         c = p;
1206
1207         while (p < chunk->end) {
1208                 if (newline) {
1209                         if (chunk->end - p < term_len) {
1210                                 return 0;
1211                         }
1212                         else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) {
1213                                 len = p - c;
1214                                 chunk->remain -= term_len;
1215                                 chunk->pos = p + term_len;
1216                                 chunk->column = term_len;
1217                                 *beg = c;
1218                                 break;
1219                         }
1220                 }
1221                 if (*p == '\n') {
1222                         newline = true;
1223                 }
1224                 else {
1225                         if (*p == '$') {
1226                                 *var_expand = true;
1227                         }
1228                         newline = false;
1229                 }
1230                 ucl_chunk_skipc (chunk, p);
1231         }
1232
1233         return len;
1234 }
1235
1236 static ucl_object_t*
1237 ucl_get_value_object (struct ucl_parser *parser)
1238 {
1239         ucl_object_t *t, *obj = NULL;
1240
1241         if (parser->stack->obj->type == UCL_ARRAY) {
1242                 /* Object must be allocated */
1243                 obj = ucl_object_new ();
1244                 t = parser->stack->obj->value.av;
1245                 DL_APPEND (t, obj);
1246                 parser->cur_obj = obj;
1247                 parser->stack->obj->value.av = t;
1248                 parser->stack->obj->len ++;
1249         }
1250         else {
1251                 /* Object has been already allocated */
1252                 obj = parser->cur_obj;
1253         }
1254
1255         return obj;
1256 }
1257
1258 /**
1259  * Handle value data
1260  * @param parser
1261  * @param chunk
1262  * @return
1263  */
1264 static bool
1265 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1266 {
1267         const unsigned char *p, *c;
1268         ucl_object_t *obj = NULL;
1269         unsigned int stripped_spaces;
1270         int str_len;
1271         bool need_unescape = false, ucl_escape = false, var_expand = false;
1272
1273         p = chunk->pos;
1274
1275         /* Skip any spaces and comments */
1276         if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1277                         (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1278                 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1279                         ucl_chunk_skipc (chunk, p);
1280                 }
1281                 if (!ucl_skip_comments (parser)) {
1282                         return false;
1283                 }
1284                 p = chunk->pos;
1285         }
1286
1287         while (p < chunk->end) {
1288                 c = p;
1289                 switch (*p) {
1290                 case '"':
1291                         obj = ucl_get_value_object (parser);
1292                         ucl_chunk_skipc (chunk, p);
1293                         if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1294                                 return false;
1295                         }
1296                         str_len = chunk->pos - c - 2;
1297                         obj->type = UCL_STRING;
1298                         if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1299                                         &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1300                                 return false;
1301                         }
1302                         obj->len = str_len;
1303                         parser->state = UCL_STATE_AFTER_VALUE;
1304                         p = chunk->pos;
1305                         return true;
1306                         break;
1307                 case '{':
1308                         obj = ucl_get_value_object (parser);
1309                         /* We have a new object */
1310                         obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1311
1312                         ucl_chunk_skipc (chunk, p);
1313                         return true;
1314                         break;
1315                 case '[':
1316                         obj = ucl_get_value_object (parser);
1317                         /* We have a new array */
1318                         obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1319
1320                         ucl_chunk_skipc (chunk, p);
1321                         return true;
1322                         break;
1323                 case ']':
1324                         /* We have the array ending */
1325                         if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1326                                 parser->state = UCL_STATE_AFTER_VALUE;
1327                                 return true;
1328                         }
1329                         else {
1330                                 goto parse_string;
1331                         }
1332                         break;
1333                 case '<':
1334                         obj = ucl_get_value_object (parser);
1335                         /* We have something like multiline value, which must be <<[A-Z]+\n */
1336                         if (chunk->end - p > 3) {
1337                                 if (memcmp (p, "<<", 2) == 0) {
1338                                         p += 2;
1339                                         /* We allow only uppercase characters in multiline definitions */
1340                                         while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1341                                                 p ++;
1342                                         }
1343                                         if (*p =='\n') {
1344                                                 /* Set chunk positions and start multiline parsing */
1345                                                 c += 2;
1346                                                 chunk->remain -= p - c;
1347                                                 chunk->pos = p + 1;
1348                                                 chunk->column = 0;
1349                                                 chunk->line ++;
1350                                                 if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1351                                                                 p - c, &c, &var_expand)) == 0) {
1352                                                         ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err);
1353                                                         return false;
1354                                                 }
1355                                                 obj->type = UCL_STRING;
1356                                                 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1357                                                         &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1358                                                         return false;
1359                                                 }
1360                                                 obj->len = str_len;
1361                                                 parser->state = UCL_STATE_AFTER_VALUE;
1362                                                 return true;
1363                                         }
1364                                 }
1365                         }
1366                         /* Fallback to ordinary strings */
1367                 default:
1368 parse_string:
1369                         if (obj == NULL) {
1370                                 obj = ucl_get_value_object (parser);
1371                         }
1372                         /* Parse atom */
1373                         if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1374                                 if (!ucl_lex_number (parser, chunk, obj)) {
1375                                         if (parser->state == UCL_STATE_ERROR) {
1376                                                 return false;
1377                                         }
1378                                 }
1379                                 else {
1380                                         parser->state = UCL_STATE_AFTER_VALUE;
1381                                         return true;
1382                                 }
1383                                 /* Fallback to normal string */
1384                         }
1385
1386                         if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1387                                 return false;
1388                         }
1389                         /* Cut trailing spaces */
1390                         stripped_spaces = 0;
1391                         while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1392                                         UCL_CHARACTER_WHITESPACE)) {
1393                                 stripped_spaces ++;
1394                         }
1395                         str_len = chunk->pos - c - stripped_spaces;
1396                         if (str_len <= 0) {
1397                                 ucl_set_err (chunk, 0, "string value must not be empty", &parser->err);
1398                                 return false;
1399                         }
1400                         else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1401                                 obj->len = 0;
1402                                 obj->type = UCL_NULL;
1403                         }
1404                         else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1405                                 obj->type = UCL_STRING;
1406                                 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1407                                                 &obj->value.sv, str_len, need_unescape,
1408                                                 false, var_expand)) == -1) {
1409                                         return false;
1410                                 }
1411                                 obj->len = str_len;
1412                         }
1413                         parser->state = UCL_STATE_AFTER_VALUE;
1414                         p = chunk->pos;
1415
1416                         return true;
1417                         break;
1418                 }
1419         }
1420
1421         return true;
1422 }
1423
1424 /**
1425  * Handle after value data
1426  * @param parser
1427  * @param chunk
1428  * @return
1429  */
1430 static bool
1431 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1432 {
1433         const unsigned char *p;
1434         bool got_sep = false;
1435         struct ucl_stack *st;
1436
1437         p = chunk->pos;
1438
1439         while (p < chunk->end) {
1440                 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1441                         /* Skip whitespaces */
1442                         ucl_chunk_skipc (chunk, p);
1443                 }
1444                 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1445                         /* Skip comment */
1446                         if (!ucl_skip_comments (parser)) {
1447                                 return false;
1448                         }
1449                         /* Treat comment as a separator */
1450                         got_sep = true;
1451                         p = chunk->pos;
1452                 }
1453                 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1454                         if (*p == '}' || *p == ']') {
1455                                 if (parser->stack == NULL) {
1456                                         ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err);
1457                                         return false;
1458                                 }
1459                                 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1460                                                 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1461
1462                                         /* Pop all nested objects from a stack */
1463                                         st = parser->stack;
1464                                         parser->stack = st->next;
1465                                         UCL_FREE (sizeof (struct ucl_stack), st);
1466
1467                                         while (parser->stack != NULL) {
1468                                                 st = parser->stack;
1469                                                 if (st->next == NULL || st->next->level == st->level) {
1470                                                         break;
1471                                                 }
1472                                                 parser->stack = st->next;
1473                                                 UCL_FREE (sizeof (struct ucl_stack), st);
1474                                         }
1475                                 }
1476                                 else {
1477                                         ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err);
1478                                         return false;
1479                                 }
1480
1481                                 if (parser->stack == NULL) {
1482                                         /* Ignore everything after a top object */
1483                                         return true;
1484                                 }
1485                                 else {
1486                                         ucl_chunk_skipc (chunk, p);
1487                                 }
1488                                 got_sep = true;
1489                         }
1490                         else {
1491                                 /* Got a separator */
1492                                 got_sep = true;
1493                                 ucl_chunk_skipc (chunk, p);
1494                         }
1495                 }
1496                 else {
1497                         /* Anything else */
1498                         if (!got_sep) {
1499                                 ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err);
1500                                 return false;
1501                         }
1502                         return true;
1503                 }
1504         }
1505
1506         return true;
1507 }
1508
1509 /**
1510  * Handle macro data
1511  * @param parser
1512  * @param chunk
1513  * @return
1514  */
1515 static bool
1516 ucl_parse_macro_value (struct ucl_parser *parser,
1517                 struct ucl_chunk *chunk, struct ucl_macro *macro,
1518                 unsigned char const **macro_start, size_t *macro_len)
1519 {
1520         const unsigned char *p, *c;
1521         bool need_unescape = false, ucl_escape = false, var_expand = false;
1522
1523         p = chunk->pos;
1524
1525         switch (*p) {
1526         case '"':
1527                 /* We have macro value encoded in quotes */
1528                 c = p;
1529                 ucl_chunk_skipc (chunk, p);
1530                 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1531                         return false;
1532                 }
1533
1534                 *macro_start = c + 1;
1535                 *macro_len = chunk->pos - c - 2;
1536                 p = chunk->pos;
1537                 break;
1538         case '{':
1539                 /* We got a multiline macro body */
1540                 ucl_chunk_skipc (chunk, p);
1541                 /* Skip spaces at the beginning */
1542                 while (p < chunk->end) {
1543                         if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1544                                 ucl_chunk_skipc (chunk, p);
1545                         }
1546                         else {
1547                                 break;
1548                         }
1549                 }
1550                 c = p;
1551                 while (p < chunk->end) {
1552                         if (*p == '}') {
1553                                 break;
1554                         }
1555                         ucl_chunk_skipc (chunk, p);
1556                 }
1557                 *macro_start = c;
1558                 *macro_len = p - c;
1559                 ucl_chunk_skipc (chunk, p);
1560                 break;
1561         default:
1562                 /* Macro is not enclosed in quotes or braces */
1563                 c = p;
1564                 while (p < chunk->end) {
1565                         if (ucl_lex_is_atom_end (*p)) {
1566                                 break;
1567                         }
1568                         ucl_chunk_skipc (chunk, p);
1569                 }
1570                 *macro_start = c;
1571                 *macro_len = p - c;
1572                 break;
1573         }
1574
1575         /* We are at the end of a macro */
1576         /* Skip ';' and space characters and return to previous state */
1577         while (p < chunk->end) {
1578                 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1579                         break;
1580                 }
1581                 ucl_chunk_skipc (chunk, p);
1582         }
1583         return true;
1584 }
1585
1586 /**
1587  * Handle the main states of rcl parser
1588  * @param parser parser structure
1589  * @param data the pointer to the beginning of a chunk
1590  * @param len the length of a chunk
1591  * @return true if chunk has been parsed and false in case of error
1592  */
1593 static bool
1594 ucl_state_machine (struct ucl_parser *parser)
1595 {
1596         ucl_object_t *obj;
1597         struct ucl_chunk *chunk = parser->chunks;
1598         const unsigned char *p, *c = NULL, *macro_start = NULL;
1599         unsigned char *macro_escaped;
1600         size_t macro_len = 0;
1601         struct ucl_macro *macro = NULL;
1602         bool next_key = false, end_of_object = false;
1603
1604         if (parser->top_obj == NULL) {
1605                 if (*chunk->pos == '[') {
1606                         obj = ucl_add_parser_stack (NULL, parser, true, 0);
1607                 }
1608                 else {
1609                         obj = ucl_add_parser_stack (NULL, parser, false, 0);
1610                 }
1611                 parser->top_obj = obj;
1612                 parser->cur_obj = obj;
1613                 parser->state = UCL_STATE_INIT;
1614         }
1615
1616         p = chunk->pos;
1617         while (chunk->pos < chunk->end) {
1618                 switch (parser->state) {
1619                 case UCL_STATE_INIT:
1620                         /*
1621                          * At the init state we can either go to the parse array or object
1622                          * if we got [ or { correspondingly or can just treat new data as
1623                          * a key of newly created object
1624                          */
1625                         obj = parser->cur_obj;
1626                         if (!ucl_skip_comments (parser)) {
1627                                 parser->prev_state = parser->state;
1628                                 parser->state = UCL_STATE_ERROR;
1629                                 return false;
1630                         }
1631                         else {
1632                                 p = chunk->pos;
1633                                 if (*p == '[') {
1634                                         parser->state = UCL_STATE_VALUE;
1635                                         ucl_chunk_skipc (chunk, p);
1636                                 }
1637                                 else {
1638                                         parser->state = UCL_STATE_KEY;
1639                                         if (*p == '{') {
1640                                                 ucl_chunk_skipc (chunk, p);
1641                                         }
1642                                 }
1643                         }
1644                         break;
1645                 case UCL_STATE_KEY:
1646                         /* Skip any spaces */
1647                         while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1648                                 ucl_chunk_skipc (chunk, p);
1649                         }
1650                         if (*p == '}') {
1651                                 /* We have the end of an object */
1652                                 parser->state = UCL_STATE_AFTER_VALUE;
1653                                 continue;
1654                         }
1655                         if (parser->stack == NULL) {
1656                                 /* No objects are on stack, but we want to parse a key */
1657                                 ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser "
1658                                                 "expects a key", &parser->err);
1659                                 parser->prev_state = parser->state;
1660                                 parser->state = UCL_STATE_ERROR;
1661                                 return false;
1662                         }
1663                         if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1664                                 parser->prev_state = parser->state;
1665                                 parser->state = UCL_STATE_ERROR;
1666                                 return false;
1667                         }
1668                         if (end_of_object) {
1669                                 p = chunk->pos;
1670                                 parser->state = UCL_STATE_AFTER_VALUE;
1671                                 continue;
1672                         }
1673                         else if (parser->state != UCL_STATE_MACRO_NAME) {
1674                                 if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1675                                         /* Parse more keys and nest objects accordingly */
1676                                         obj = ucl_add_parser_stack (parser->cur_obj, parser, false, parser->stack->level + 1);
1677                                 }
1678                                 else {
1679                                         parser->state = UCL_STATE_VALUE;
1680                                 }
1681                         }
1682                         else {
1683                                 c = chunk->pos;
1684                         }
1685                         p = chunk->pos;
1686                         break;
1687                 case UCL_STATE_VALUE:
1688                         /* We need to check what we do have */
1689                         if (!ucl_parse_value (parser, chunk)) {
1690                                 parser->prev_state = parser->state;
1691                                 parser->state = UCL_STATE_ERROR;
1692                                 return false;
1693                         }
1694                         /* State is set in ucl_parse_value call */
1695                         p = chunk->pos;
1696                         break;
1697                 case UCL_STATE_AFTER_VALUE:
1698                         if (!ucl_parse_after_value (parser, chunk)) {
1699                                 parser->prev_state = parser->state;
1700                                 parser->state = UCL_STATE_ERROR;
1701                                 return false;
1702                         }
1703                         if (parser->stack != NULL) {
1704                                 if (parser->stack->obj->type == UCL_OBJECT) {
1705                                         parser->state = UCL_STATE_KEY;
1706                                 }
1707                                 else {
1708                                         /* Array */
1709                                         parser->state = UCL_STATE_VALUE;
1710                                 }
1711                         }
1712                         else {
1713                                 /* Skip everything at the end */
1714                                 return true;
1715                         }
1716                         p = chunk->pos;
1717                         break;
1718                 case UCL_STATE_MACRO_NAME:
1719                         if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1720                                 ucl_chunk_skipc (chunk, p);
1721                         }
1722                         else if (p - c > 0) {
1723                                 /* We got macro name */
1724                                 macro_len = (size_t)(p - c);
1725                                 HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1726                                 if (macro == NULL) {
1727                                         ucl_create_err (&parser->err, "error on line %d at column %d: "
1728                                                         "unknown macro: '%.*s', character: '%c'",
1729                                                                 chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1730                                         parser->state = UCL_STATE_ERROR;
1731                                         return false;
1732                                 }
1733                                 /* Now we need to skip all spaces */
1734                                 while (p < chunk->end) {
1735                                         if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1736                                                 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1737                                                         /* Skip comment */
1738                                                         if (!ucl_skip_comments (parser)) {
1739                                                                 return false;
1740                                                         }
1741                                                         p = chunk->pos;
1742                                                 }
1743                                                 break;
1744                                         }
1745                                         ucl_chunk_skipc (chunk, p);
1746                                 }
1747                                 parser->state = UCL_STATE_MACRO;
1748                         }
1749                         break;
1750                 case UCL_STATE_MACRO:
1751                         if (!ucl_parse_macro_value (parser, chunk, macro,
1752                                         &macro_start, &macro_len)) {
1753                                 parser->prev_state = parser->state;
1754                                 parser->state = UCL_STATE_ERROR;
1755                                 return false;
1756                         }
1757                         macro_len = ucl_expand_variable (parser, &macro_escaped, macro_start, macro_len);
1758                         parser->state = parser->prev_state;
1759                         if (macro_escaped == NULL) {
1760                                 if (!macro->handler (macro_start, macro_len, macro->ud)) {
1761                                         return false;
1762                                 }
1763                         }
1764                         else {
1765                                 if (!macro->handler (macro_escaped, macro_len, macro->ud)) {
1766                                         UCL_FREE (macro_len + 1, macro_escaped);
1767                                         return false;
1768                                 }
1769                                 UCL_FREE (macro_len + 1, macro_escaped);
1770                         }
1771                         p = chunk->pos;
1772                         break;
1773                 default:
1774                         /* TODO: add all states */
1775                         ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err);
1776                         parser->state = UCL_STATE_ERROR;
1777                         return false;
1778                 }
1779         }
1780
1781         return true;
1782 }
1783
1784 struct ucl_parser*
1785 ucl_parser_new (int flags)
1786 {
1787         struct ucl_parser *new;
1788
1789         new = UCL_ALLOC (sizeof (struct ucl_parser));
1790         memset (new, 0, sizeof (struct ucl_parser));
1791
1792         ucl_parser_register_macro (new, "include", ucl_include_handler, new);
1793         ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
1794         ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
1795
1796         new->flags = flags;
1797
1798         /* Initial assumption about filevars */
1799         ucl_parser_set_filevars (new, NULL, false);
1800
1801         return new;
1802 }
1803
1804
1805 void
1806 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
1807                 ucl_macro_handler handler, void* ud)
1808 {
1809         struct ucl_macro *new;
1810
1811         new = UCL_ALLOC (sizeof (struct ucl_macro));
1812         memset (new, 0, sizeof (struct ucl_macro));
1813         new->handler = handler;
1814         new->name = strdup (macro);
1815         new->ud = ud;
1816         HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
1817 }
1818
1819 void
1820 ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
1821                 const char *value)
1822 {
1823         struct ucl_variable *new = NULL, *cur;
1824
1825         if (var == NULL) {
1826                 return;
1827         }
1828
1829         /* Find whether a variable already exists */
1830         LL_FOREACH (parser->variables, cur) {
1831                 if (strcmp (cur->var, var) == 0) {
1832                         new = cur;
1833                         break;
1834                 }
1835         }
1836
1837         if (value == NULL) {
1838
1839                 if (new != NULL) {
1840                         /* Remove variable */
1841                         LL_DELETE (parser->variables, new);
1842                         free (new->var);
1843                         free (new->value);
1844                         UCL_FREE (sizeof (struct ucl_variable), new);
1845                 }
1846                 else {
1847                         /* Do nothing */
1848                         return;
1849                 }
1850         }
1851         else {
1852                 if (new == NULL) {
1853                         new = UCL_ALLOC (sizeof (struct ucl_variable));
1854                         memset (new, 0, sizeof (struct ucl_variable));
1855                         new->var = strdup (var);
1856                         new->var_len = strlen (var);
1857                         new->value = strdup (value);
1858                         new->value_len = strlen (value);
1859
1860                         LL_PREPEND (parser->variables, new);
1861                 }
1862                 else {
1863                         free (new->value);
1864                         new->value = strdup (value);
1865                         new->value_len = strlen (value);
1866                 }
1867         }
1868 }
1869
1870 bool
1871 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
1872                 size_t len)
1873 {
1874         struct ucl_chunk *chunk;
1875
1876         if (parser->state != UCL_STATE_ERROR) {
1877                 chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
1878                 chunk->begin = data;
1879                 chunk->remain = len;
1880                 chunk->pos = chunk->begin;
1881                 chunk->end = chunk->begin + len;
1882                 chunk->line = 1;
1883                 chunk->column = 0;
1884                 LL_PREPEND (parser->chunks, chunk);
1885                 parser->recursion ++;
1886                 if (parser->recursion > UCL_MAX_RECURSION) {
1887                         ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
1888                                         parser->recursion);
1889                         return false;
1890                 }
1891                 return ucl_state_machine (parser);
1892         }
1893
1894         ucl_create_err (&parser->err, "a parser is in an invalid state");
1895
1896         return false;
1897 }