src/ucl_parser.c

   1 /* Copyright (c) 2013, Vsevolod Stakhov
   2  * All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are met:
   6  *       * Redistributions of source code must retain the above copyright
   7  *         notice, this list of conditions and the following disclaimer.
   8  *       * Redistributions in binary form must reproduce the above copyright
   9  *         notice, this list of conditions and the following disclaimer in the
  10  *         documentation and/or other materials provided with the distribution.
  11  *
  12  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
  13  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  14  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  15  * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
  16  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  17  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  18  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  19  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  20  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  21  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  22  */
  23
  24 #include "ucl.h"
  25 #include "ucl_internal.h"
  26 #include "ucl_chartable.h"
  27
  28 /**
  29  * @file rcl_parser.c
  30  * The implementation of rcl parser
  31  */
  32
  33 struct ucl_parser_saved_state {
  34         unsigned int line;
  35         unsigned int column;
  36         size_t remain;
  37         const unsigned char *pos;
  38 };
  39
  40 /**
  41  * Move up to len characters
  42  * @param parser
  43  * @param begin
  44  * @param len
  45  * @return new position in chunk
  46  */
  47 #define ucl_chunk_skipc(chunk, p)    do{                                        \
  48     if (*(p) == '\n') {                                                                         \
  49         (chunk)->line ++;                                                                       \
  50         (chunk)->column = 0;                                                            \
  51     }                                                                                                           \
  52     else (chunk)->column ++;                                                            \
  53     (p++);                                                                                                      \
  54     (chunk)->pos ++;                                                                            \
  55     (chunk)->remain --;                                                                         \
  56     } while (0)
  57
  58 static inline void
  59 ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err)
  60 {
  61         if (chunk->pos < chunk->end) {
  62                 if (isgraph (*chunk->pos)) {
  63                         ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'",
  64                                         chunk->line, chunk->column, str, *chunk->pos);
  65                 }
  66                 else {
  67                         ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'",
  68                                         chunk->line, chunk->column, str, (int)*chunk->pos);
  69                 }
  70         }
  71         else {
  72                 ucl_create_err (err, "error at the end of chunk: %s", str);
  73         }
  74 }
  75
  76 /**
  77  * Skip all comments from the current pos resolving nested and multiline comments
  78  * @param parser
  79  * @return
  80  */
  81 static bool
  82 ucl_skip_comments (struct ucl_parser *parser)
  83 {
  84         struct ucl_chunk *chunk = parser->chunks;
  85         const unsigned char *p;
  86         int comments_nested = 0;
  87
  88         p = chunk->pos;
  89
  90 start:
  91         if (*p == '#') {
  92                 if (parser->state != UCL_STATE_SCOMMENT &&
  93                                 parser->state != UCL_STATE_MCOMMENT) {
  94                         while (p < chunk->end) {
  95                                 if (*p == '\n') {
  96                                         ucl_chunk_skipc (chunk, p);
  97                                         goto start;
  98                                 }
  99                                 ucl_chunk_skipc (chunk, p);
 100                         }
 101                 }
 102         }
 103         else if (*p == '/' && chunk->remain >= 2) {
 104                 if (p[1] == '*') {
 105                         ucl_chunk_skipc (chunk, p);
 106                         comments_nested ++;
 107                         ucl_chunk_skipc (chunk, p);
 108
 109                         while (p < chunk->end) {
 110                                 if (*p == '*') {
 111                                         ucl_chunk_skipc (chunk, p);
 112                                         if (*p == '/') {
 113                                                 comments_nested --;
 114                                                 if (comments_nested == 0) {
 115                                                         ucl_chunk_skipc (chunk, p);
 116                                                         goto start;
 117                                                 }
 118                                         }
 119                                         ucl_chunk_skipc (chunk, p);
 120                                 }
 121                                 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
 122                                         comments_nested ++;
 123                                         ucl_chunk_skipc (chunk, p);
 124                                         ucl_chunk_skipc (chunk, p);
 125                                         continue;
 126                                 }
 127                                 ucl_chunk_skipc (chunk, p);
 128                         }
 129                         if (comments_nested != 0) {
 130                                 ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err);
 131                                 return false;
 132                         }
 133                 }
 134         }
 135
 136         return true;
 137 }
 138
 139 /**
 140  * Return multiplier for a character
 141  * @param c multiplier character
 142  * @param is_bytes if true use 1024 multiplier
 143  * @return multiplier
 144  */
 145 static inline unsigned long
 146 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
 147         const struct {
 148                 char c;
 149                 long mult_normal;
 150                 long mult_bytes;
 151         } multipliers[] = {
 152                         {'m', 1000 * 1000, 1024 * 1024},
 153                         {'k', 1000, 1024},
 154                         {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
 155         };
 156         int i;
 157
 158         for (i = 0; i < 3; i ++) {
 159                 if (tolower (c) == multipliers[i].c) {
 160                         if (is_bytes) {
 161                                 return multipliers[i].mult_bytes;
 162                         }
 163                         return multipliers[i].mult_normal;
 164                 }
 165         }
 166
 167         return 1;
 168 }
 169
 170
 171 /**
 172  * Return multiplier for time scaling
 173  * @param c
 174  * @return
 175  */
 176 static inline double
 177 ucl_lex_time_multiplier (const unsigned char c) {
 178         const struct {
 179                 char c;
 180                 double mult;
 181         } multipliers[] = {
 182                         {'m', 60},
 183                         {'h', 60 * 60},
 184                         {'d', 60 * 60 * 24},
 185                         {'w', 60 * 60 * 24 * 7},
 186                         {'y', 60 * 60 * 24 * 7 * 365}
 187         };
 188         int i;
 189
 190         for (i = 0; i < 5; i ++) {
 191                 if (tolower (c) == multipliers[i].c) {
 192                         return multipliers[i].mult;
 193                 }
 194         }
 195
 196         return 1;
 197 }
 198
 199 /**
 200  * Return true if a character is a end of an atom
 201  * @param c
 202  * @return
 203  */
 204 static inline bool
 205 ucl_lex_is_atom_end (const unsigned char c)
 206 {
 207         return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
 208 }
 209
 210 static inline bool
 211 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
 212 {
 213         if (c1 == '/') {
 214                 if (c2 == '*') {
 215                         return true;
 216                 }
 217         }
 218         else if (c1 == '#') {
 219                 return true;
 220         }
 221         return false;
 222 }
 223
 224 /**
 225  * Check variable found
 226  * @param parser
 227  * @param ptr
 228  * @param remain
 229  * @param out_len
 230  * @param strict
 231  * @param found
 232  * @return
 233  */
 234 static inline const char *
 235 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
 236                 size_t *out_len, bool strict, bool *found)
 237 {
 238         struct ucl_variable *var;
 239
 240         LL_FOREACH (parser->variables, var) {
 241                 if (strict) {
 242                         if (remain == var->var_len) {
 243                                 if (memcmp (ptr, var->var, var->var_len) == 0) {
 244                                         *out_len += var->value_len;
 245                                         *found = true;
 246                                         return (ptr + var->var_len);
 247                                 }
 248                         }
 249                 }
 250                 else {
 251                         if (remain >= var->var_len) {
 252                                 if (memcmp (ptr, var->var, var->var_len) == 0) {
 253                                         *out_len += var->value_len;
 254                                         *found = true;
 255                                         return (ptr + var->var_len);
 256                                 }
 257                         }
 258                 }
 259         }
 260
 261         return ptr;
 262 }
 263
 264 /**
 265  * Check for a variable in a given string
 266  * @param parser
 267  * @param ptr
 268  * @param remain
 269  * @param out_len
 270  * @param vars_found
 271  * @return
 272  */
 273 static const char *
 274 ucl_check_variable (struct ucl_parser *parser, const char *ptr, size_t remain, size_t *out_len, bool *vars_found)
 275 {
 276         const char *p, *end, *ret = ptr;
 277         bool found = false;
 278
 279         if (*ptr == '{') {
 280                 /* We need to match the variable enclosed in braces */
 281                 p = ptr + 1;
 282                 end = ptr + remain;
 283                 while (p < end) {
 284                         if (*p == '}') {
 285                                 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, out_len, true, &found);
 286                                 if (found) {
 287                                         /* {} must be excluded actually */
 288                                         ret ++;
 289                                         if (!*vars_found) {
 290                                                 *vars_found = true;
 291                                         }
 292                                 }
 293                                 else {
 294                                         *out_len += 2;
 295                                 }
 296                                 break;
 297                         }
 298                         p ++;
 299                 }
 300         }
 301         else if (*ptr != '$') {
 302                 /* Not count escaped dollar sign */
 303                 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
 304                 if (found && !*vars_found) {
 305                         *vars_found = true;
 306                 }
 307                 if (!found) {
 308                         (*out_len) ++;
 309                 }
 310         }
 311         else {
 312                 ret ++;
 313                 (*out_len) ++;
 314         }
 315
 316         return ret;
 317 }
 318
 319 /**
 320  * Expand a single variable
 321  * @param parser
 322  * @param ptr
 323  * @param remain
 324  * @param dest
 325  * @return
 326  */
 327 static const char *
 328 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
 329                 size_t remain, unsigned char **dest)
 330 {
 331         unsigned char *d = *dest;
 332         const char *p = ptr + 1, *ret;
 333         struct ucl_variable *var;
 334         bool found = false;
 335
 336         ret = ptr + 1;
 337         remain --;
 338
 339         if (*p == '$') {
 340                 *d++ = *p++;
 341                 *dest = d;
 342                 return p;
 343         }
 344         else if (*p == '{') {
 345                 p ++;
 346                 ret += 2;
 347                 remain -= 2;
 348         }
 349
 350         LL_FOREACH (parser->variables, var) {
 351                 if (remain >= var->var_len) {
 352                         if (memcmp (p, var->var, var->var_len) == 0) {
 353                                 memcpy (d, var->value, var->value_len);
 354                                 ret += var->var_len;
 355                                 d += var->value_len;
 356                                 found = true;
 357                                 break;
 358                         }
 359                 }
 360         }
 361         if (!found) {
 362                 memcpy (d, ptr, 2);
 363                 d += 2;
 364                 ret --;
 365         }
 366
 367         *dest = d;
 368         return ret;
 369 }
 370
 371 /**
 372  * Expand variables in string
 373  * @param parser
 374  * @param dst
 375  * @param src
 376  * @param in_len
 377  * @return
 378  */
 379 static ssize_t
 380 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
 381                 const char *src, size_t in_len)
 382 {
 383         const char *p, *end = src + in_len;
 384         unsigned char *d;
 385         size_t out_len = 0;
 386         bool vars_found = false;
 387
 388         p = src;
 389         while (p != end) {
 390                 if (*p == '$') {
 391                         p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
 392                 }
 393                 else {
 394                         p ++;
 395                         out_len ++;
 396                 }
 397         }
 398
 399         if (!vars_found) {
 400                 /* Trivial case */
 401                 *dst = NULL;
 402                 return in_len;
 403         }
 404
 405         *dst = UCL_ALLOC (out_len + 1);
 406         if (*dst == NULL) {
 407                 return in_len;
 408         }
 409
 410         d = *dst;
 411         p = src;
 412         while (p != end) {
 413                 if (*p == '$') {
 414                         p = ucl_expand_single_variable (parser, p, end - p, &d);
 415                 }
 416                 else {
 417                         *d++ = *p++;
 418                 }
 419         }
 420
 421         *d = '\0';
 422
 423         return out_len;
 424 }
 425
 426 /**
 427  * Store or copy pointer to the trash stack
 428  * @param parser parser object
 429  * @param src src string
 430  * @param dst destination buffer (trash stack pointer)
 431  * @param dst_const const destination pointer (e.g. value of object)
 432  * @param in_len input length
 433  * @param need_unescape need to unescape source (and copy it)
 434  * @param need_lowercase need to lowercase value (and copy)
 435  * @param need_expand need to expand variables (and copy as well)
 436  * @return output length (excluding \0 symbol)
 437  */
 438 static inline ssize_t
 439 ucl_copy_or_store_ptr (struct ucl_parser *parser,
 440                 const unsigned char *src, unsigned char **dst,
 441                 const char **dst_const, size_t in_len,
 442                 bool need_unescape, bool need_lowercase, bool need_expand)
 443 {
 444         ssize_t ret = -1, tret;
 445         unsigned char *tmp;
 446
 447         if (need_unescape || need_lowercase ||
 448                         (need_expand && parser->variables != NULL) ||
 449                         !(parser->flags & UCL_PARSER_ZEROCOPY)) {
 450                 /* Copy string */
 451                 *dst = UCL_ALLOC (in_len + 1);
 452                 if (*dst == NULL) {
 453                         ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err);
 454                         return false;
 455                 }
 456                 if (need_lowercase) {
 457                         ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
 458                 }
 459                 else {
 460                         ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
 461                 }
 462
 463                 if (need_unescape) {
 464                         ret = ucl_unescape_json_string (*dst, ret);
 465                 }
 466                 if (need_expand) {
 467                         tmp = *dst;
 468                         tret = ret;
 469                         ret = ucl_expand_variable (parser, dst, tmp, ret);
 470                         if (*dst == NULL) {
 471                                 /* Nothing to expand */
 472                                 *dst = tmp;
 473                                 ret = tret;
 474                         }
 475                 }
 476                 *dst_const = *dst;
 477         }
 478         else {
 479                 *dst_const = src;
 480                 ret = in_len;
 481         }
 482
 483         return ret;
 484 }
 485
 486 /**
 487  * Create and append an object at the specified level
 488  * @param parser
 489  * @param is_array
 490  * @param level
 491  * @return
 492  */
 493 static inline ucl_object_t *
 494 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
 495 {
 496         struct ucl_stack *st;
 497
 498         if (!is_array) {
 499                 if (obj == NULL) {
 500                         obj = ucl_object_typed_new (UCL_OBJECT);
 501                 }
 502                 else {
 503                         obj->type = UCL_OBJECT;
 504                 }
 505                 obj->value.ov = ucl_hash_create ();
 506                 parser->state = UCL_STATE_KEY;
 507         }
 508         else {
 509                 if (obj == NULL) {
 510                         obj = ucl_object_typed_new (UCL_ARRAY);
 511                 }
 512                 else {
 513                         obj->type = UCL_ARRAY;
 514                 }
 515                 parser->state = UCL_STATE_VALUE;
 516         }
 517
 518         st = UCL_ALLOC (sizeof (struct ucl_stack));
 519         if (st == NULL) {
 520                 ucl_set_err (parser->chunks, 0, "cannot allocate memory for an object", &parser->err);
 521                 return NULL;
 522         }
 523         st->obj = obj;
 524         st->level = level;
 525         LL_PREPEND (parser->stack, st);
 526         parser->cur_obj = obj;
 527
 528         return obj;
 529 }
 530
 531 int
 532 ucl_maybe_parse_number (ucl_object_t *obj,
 533                 const char *start, const char *end, const char **pos,
 534                 bool allow_double, bool number_bytes, bool allow_time)
 535 {
 536         const char *p = start, *c = start;
 537         char *endptr;
 538         bool got_dot = false, got_exp = false, need_double = false,
 539                         is_time = false, valid_start = false, is_hex = false,
 540                         is_neg = false;
 541         double dv = 0;
 542         int64_t lv = 0;
 543
 544         if (*p == '-') {
 545                 is_neg = true;
 546                 c ++;
 547                 p ++;
 548         }
 549         while (p < end) {
 550                 if (is_hex && isxdigit (*p)) {
 551                         p ++;
 552                 }
 553                 else if (isdigit (*p)) {
 554                         valid_start = true;
 555                         p ++;
 556                 }
 557                 else if (!is_hex && (*p == 'x' || *p == 'X')) {
 558                         is_hex = true;
 559                         allow_double = false;
 560                         c = p + 1;
 561                 }
 562                 else if (allow_double) {
 563                         if (p == c) {
 564                                 /* Empty digits sequence, not a number */
 565                                 *pos = start;
 566                                 return EINVAL;
 567                         }
 568                         else if (*p == '.') {
 569                                 if (got_dot) {
 570                                         /* Double dots, not a number */
 571                                         *pos = start;
 572                                         return EINVAL;
 573                                 }
 574                                 else {
 575                                         got_dot = true;
 576                                         need_double = true;
 577                                         p ++;
 578                                 }
 579                         }
 580                         else if (*p == 'e' || *p == 'E') {
 581                                 if (got_exp) {
 582                                         /* Double exp, not a number */
 583                                         *pos = start;
 584                                         return EINVAL;
 585                                 }
 586                                 else {
 587                                         got_exp = true;
 588                                         need_double = true;
 589                                         p ++;
 590                                         if (p >= end) {
 591                                                 *pos = start;
 592                                                 return EINVAL;
 593                                         }
 594                                         if (!isdigit (*p) && *p != '+' && *p != '-') {
 595                                                 /* Wrong exponent sign */
 596                                                 *pos = start;
 597                                                 return EINVAL;
 598                                         }
 599                                         else {
 600                                                 p ++;
 601                                         }
 602                                 }
 603                         }
 604                         else {
 605                                 /* Got the end of the number, need to check */
 606                                 break;
 607                         }
 608                 }
 609                 else {
 610                         break;
 611                 }
 612         }
 613
 614         if (!valid_start) {
 615                 *pos = start;
 616                 return EINVAL;
 617         }
 618
 619         errno = 0;
 620         if (need_double) {
 621                 dv = strtod (c, &endptr);
 622         }
 623         else {
 624                 if (is_hex) {
 625                         lv = strtoimax (c, &endptr, 16);
 626                 }
 627                 else {
 628                         lv = strtoimax (c, &endptr, 10);
 629                 }
 630         }
 631         if (errno == ERANGE) {
 632                 *pos = start;
 633                 return ERANGE;
 634         }
 635
 636         /* Now check endptr */
 637         if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0' ||
 638                         ucl_test_character (*endptr, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
 639                 p = endptr;
 640                 goto set_obj;
 641         }
 642
 643         if (endptr < end && endptr != start) {
 644                 p = endptr;
 645                 switch (*p) {
 646                 case 'm':
 647                 case 'M':
 648                 case 'g':
 649                 case 'G':
 650                 case 'k':
 651                 case 'K':
 652                         if (end - p >= 2) {
 653                                 if (p[1] == 's' || p[1] == 'S') {
 654                                         /* Milliseconds */
 655                                         if (!need_double) {
 656                                                 need_double = true;
 657                                                 dv = lv;
 658                                         }
 659                                         is_time = true;
 660                                         if (p[0] == 'm' || p[0] == 'M') {
 661                                                 dv /= 1000.;
 662                                         }
 663                                         else {
 664                                                 dv *= ucl_lex_num_multiplier (*p, false);
 665                                         }
 666                                         p += 2;
 667                                         goto set_obj;
 668                                 }
 669                                 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
 670                                         /* Bytes */
 671                                         if (need_double) {
 672                                                 need_double = false;
 673                                                 lv = dv;
 674                                         }
 675                                         lv *= ucl_lex_num_multiplier (*p, true);
 676                                         p += 2;
 677                                         goto set_obj;
 678                                 }
 679                                 else if (ucl_lex_is_atom_end (p[1])) {
 680                                         if (need_double) {
 681                                                 dv *= ucl_lex_num_multiplier (*p, false);
 682                                         }
 683                                         else {
 684                                                 lv *= ucl_lex_num_multiplier (*p, number_bytes);
 685                                         }
 686                                         p ++;
 687                                         goto set_obj;
 688                                 }
 689                                 else if (allow_time && end - p >= 3) {
 690                                         if (tolower (p[0]) == 'm' &&
 691                                                         tolower (p[1]) == 'i' &&
 692                                                         tolower (p[2]) == 'n') {
 693                                                 /* Minutes */
 694                                                 if (!need_double) {
 695                                                         need_double = true;
 696                                                         dv = lv;
 697                                                 }
 698                                                 is_time = true;
 699                                                 dv *= 60.;
 700                                                 p += 3;
 701                                                 goto set_obj;
 702                                         }
 703                                 }
 704                         }
 705                         else {
 706                                 if (need_double) {
 707                                         dv *= ucl_lex_num_multiplier (*p, false);
 708                                 }
 709                                 else {
 710                                         lv *= ucl_lex_num_multiplier (*p, number_bytes);
 711                                 }
 712                                 p ++;
 713                                 goto set_obj;
 714                         }
 715                         break;
 716                 case 'S':
 717                 case 's':
 718                         if (allow_time &&
 719                                         (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
 720                                 if (!need_double) {
 721                                         need_double = true;
 722                                         dv = lv;
 723                                 }
 724                                 p ++;
 725                                 is_time = true;
 726                                 goto set_obj;
 727                         }
 728                         break;
 729                 case 'h':
 730                 case 'H':
 731                 case 'd':
 732                 case 'D':
 733                 case 'w':
 734                 case 'W':
 735                 case 'Y':
 736                 case 'y':
 737                         if (allow_time &&
 738                                         (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
 739                                 if (!need_double) {
 740                                         need_double = true;
 741                                         dv = lv;
 742                                 }
 743                                 is_time = true;
 744                                 dv *= ucl_lex_time_multiplier (*p);
 745                                 p ++;
 746                                 goto set_obj;
 747                         }
 748                         break;
 749                 }
 750         }
 751
 752         *pos = c;
 753         return EINVAL;
 754
 755         set_obj:
 756         if (allow_double && (need_double || is_time)) {
 757                 if (!is_time) {
 758                         obj->type = UCL_FLOAT;
 759                 }
 760                 else {
 761                         obj->type = UCL_TIME;
 762                 }
 763                 obj->value.dv = is_neg ? (-dv) : dv;
 764         }
 765         else {
 766                 obj->type = UCL_INT;
 767                 obj->value.iv = is_neg ? (-lv) : lv;
 768         }
 769         *pos = p;
 770         return 0;
 771 }
 772
 773 /**
 774  * Parse possible number
 775  * @param parser
 776  * @param chunk
 777  * @return true if a number has been parsed
 778  */
 779 static bool
 780 ucl_lex_number (struct ucl_parser *parser,
 781                 struct ucl_chunk *chunk, ucl_object_t *obj)
 782 {
 783         const unsigned char *pos;
 784         int ret;
 785
 786         ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
 787                         true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
 788
 789         if (ret == 0) {
 790                 chunk->remain -= pos - chunk->pos;
 791                 chunk->column += pos - chunk->pos;
 792                 chunk->pos = pos;
 793                 return true;
 794         }
 795         else if (ret == ERANGE) {
 796                 ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err);
 797         }
 798
 799         return false;
 800 }
 801
 802 /**
 803  * Parse quoted string with possible escapes
 804  * @param parser
 805  * @param chunk
 806  * @return true if a string has been parsed
 807  */
 808 static bool
 809 ucl_lex_json_string (struct ucl_parser *parser,
 810                 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
 811 {
 812         const unsigned char *p = chunk->pos;
 813         unsigned char c;
 814         int i;
 815
 816         while (p < chunk->end) {
 817                 c = *p;
 818                 if (c < 0x1F) {
 819                         /* Unmasked control character */
 820                         if (c == '\n') {
 821                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err);
 822                         }
 823                         else {
 824                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err);
 825                         }
 826                         return false;
 827                 }
 828                 else if (c == '\\') {
 829                         ucl_chunk_skipc (chunk, p);
 830                         c = *p;
 831                         if (p >= chunk->end) {
 832                                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
 833                                 return false;
 834                         }
 835                         else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
 836                                 if (c == 'u') {
 837                                         ucl_chunk_skipc (chunk, p);
 838                                         for (i = 0; i < 4 && p < chunk->end; i ++) {
 839                                                 if (!isxdigit (*p)) {
 840                                                         ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err);
 841                                                         return false;
 842                                                 }
 843                                                 ucl_chunk_skipc (chunk, p);
 844                                         }
 845                                         if (p >= chunk->end) {
 846                                                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
 847                                                 return false;
 848                                         }
 849                                 }
 850                                 else {
 851                                         ucl_chunk_skipc (chunk, p);
 852                                 }
 853                         }
 854                         *need_unescape = true;
 855                         *ucl_escape = true;
 856                         continue;
 857                 }
 858                 else if (c == '"') {
 859                         ucl_chunk_skipc (chunk, p);
 860                         return true;
 861                 }
 862                 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
 863                         *ucl_escape = true;
 864                 }
 865                 else if (c == '$') {
 866                         *var_expand = true;
 867                 }
 868                 ucl_chunk_skipc (chunk, p);
 869         }
 870
 871         ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err);
 872         return false;
 873 }
 874
 875 /**
 876  * Parse a key in an object
 877  * @param parser
 878  * @param chunk
 879  * @return true if a key has been parsed
 880  */
 881 static bool
 882 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
 883 {
 884         const unsigned char *p, *c = NULL, *end, *t;
 885         const char *key = NULL;
 886         bool got_quote = false, got_eq = false, got_semicolon = false,
 887                         need_unescape = false, ucl_escape = false, var_expand = false,
 888                         got_content = false, got_sep = false;
 889         ucl_object_t *nobj, *tobj;
 890         ucl_hash_t *container;
 891         ssize_t keylen;
 892
 893         p = chunk->pos;
 894
 895         if (*p == '.') {
 896                 /* It is macro actually */
 897                 ucl_chunk_skipc (chunk, p);
 898                 parser->prev_state = parser->state;
 899                 parser->state = UCL_STATE_MACRO_NAME;
 900                 return true;
 901         }
 902         while (p < chunk->end) {
 903                 /*
 904                  * A key must start with alpha, number, '/' or '_' and end with space character
 905                  */
 906                 if (c == NULL) {
 907                         if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
 908                                 if (!ucl_skip_comments (parser)) {
 909                                         return false;
 910                                 }
 911                                 p = chunk->pos;
 912                         }
 913                         else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
 914                                 ucl_chunk_skipc (chunk, p);
 915                         }
 916                         else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
 917                                 /* The first symbol */
 918                                 c = p;
 919                                 ucl_chunk_skipc (chunk, p);
 920                                 got_content = true;
 921                         }
 922                         else if (*p == '"') {
 923                                 /* JSON style key */
 924                                 c = p + 1;
 925                                 got_quote = true;
 926                                 got_content = true;
 927                                 ucl_chunk_skipc (chunk, p);
 928                         }
 929                         else if (*p == '}') {
 930                                 /* We have actually end of an object */
 931                                 *end_of_object = true;
 932                                 return true;
 933                         }
 934                         else if (*p == '.') {
 935                                 ucl_chunk_skipc (chunk, p);
 936                                 parser->prev_state = parser->state;
 937                                 parser->state = UCL_STATE_MACRO_NAME;
 938                                 return true;
 939                         }
 940                         else {
 941                                 /* Invalid identifier */
 942                                 ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err);
 943                                 return false;
 944                         }
 945                 }
 946                 else {
 947                         /* Parse the body of a key */
 948                         if (!got_quote) {
 949                                 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
 950                                         got_content = true;
 951                                         ucl_chunk_skipc (chunk, p);
 952                                 }
 953                                 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
 954                                         end = p;
 955                                         break;
 956                                 }
 957                                 else {
 958                                         ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err);
 959                                         return false;
 960                                 }
 961                         }
 962                         else {
 963                                 /* We need to parse json like quoted string */
 964                                 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
 965                                         return false;
 966                                 }
 967                                 /* Always escape keys obtained via json */
 968                                 end = chunk->pos - 1;
 969                                 p = chunk->pos;
 970                                 break;
 971                         }
 972                 }
 973         }
 974
 975         if (p >= chunk->end && got_content) {
 976                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
 977                 return false;
 978         }
 979         else if (!got_content) {
 980                 return true;
 981         }
 982         *end_of_object = false;
 983         /* We are now at the end of the key, need to parse the rest */
 984         while (p < chunk->end) {
 985                 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
 986                         ucl_chunk_skipc (chunk, p);
 987                 }
 988                 else if (*p == '=') {
 989                         if (!got_eq && !got_semicolon) {
 990                                 ucl_chunk_skipc (chunk, p);
 991                                 got_eq = true;
 992                         }
 993                         else {
 994                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err);
 995                                 return false;
 996                         }
 997                 }
 998                 else if (*p == ':') {
 999                         if (!got_eq && !got_semicolon) {
1000                                 ucl_chunk_skipc (chunk, p);
1001                                 got_semicolon = true;
1002                         }
1003                         else {
1004                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err);
1005                                 return false;
1006                         }
1007                 }
1008                 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1009                         /* Check for comment */
1010                         if (!ucl_skip_comments (parser)) {
1011                                 return false;
1012                         }
1013                         p = chunk->pos;
1014                 }
1015                 else {
1016                         /* Start value */
1017                         break;
1018                 }
1019         }
1020
1021         if (p >= chunk->end && got_content) {
1022                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1023                 return false;
1024         }
1025
1026         got_sep = got_semicolon || got_eq;
1027
1028         if (!got_sep) {
1029                 /*
1030                  * Maybe we have more keys nested, so search for termination character.
1031                  * Possible choices:
1032                  * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1033                  * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1034                  * 3) key1 value[;,\n] <- we treat that as linear object
1035                  */
1036                 t = p;
1037                 *next_key = false;
1038                 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1039                         t ++;
1040                 }
1041                 /* Check first non-space character after a key */
1042                 if (*t != '{' && *t != '[') {
1043                         while (t < chunk->end) {
1044                                 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1045                                         break;
1046                                 }
1047                                 else if (*t == '{' || *t == '[') {
1048                                         *next_key = true;
1049                                         break;
1050                                 }
1051                                 t ++;
1052                         }
1053                 }
1054         }
1055
1056         /* Create a new object */
1057         nobj = ucl_object_new ();
1058         keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1059                         &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1060         if (keylen == -1) {
1061                 ucl_object_unref (nobj);
1062                 return false;
1063         }
1064         else if (keylen == 0) {
1065                 ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1066                 ucl_object_unref (nobj);
1067                 return false;
1068         }
1069
1070         container = parser->stack->obj->value.ov;
1071         nobj->key = key;
1072         nobj->keylen = keylen;
1073         tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj));
1074         if (tobj == NULL) {
1075                 container = ucl_hash_insert_object (container, nobj);
1076                 nobj->prev = nobj;
1077                 nobj->next = NULL;
1078                 parser->stack->obj->len ++;
1079         }
1080         else {
1081                 DL_APPEND (tobj, nobj);
1082         }
1083
1084         if (ucl_escape) {
1085                 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1086         }
1087         parser->stack->obj->value.ov = container;
1088
1089         parser->cur_obj = nobj;
1090
1091         return true;
1092 }
1093
1094 /**
1095  * Parse a cl string
1096  * @param parser
1097  * @param chunk
1098  * @return true if a key has been parsed
1099  */
1100 static bool
1101 ucl_parse_string_value (struct ucl_parser *parser,
1102                 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1103 {
1104         const unsigned char *p;
1105         enum {
1106                 UCL_BRACE_ROUND = 0,
1107                 UCL_BRACE_SQUARE,
1108                 UCL_BRACE_FIGURE
1109         };
1110         int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1111
1112         p = chunk->pos;
1113
1114         while (p < chunk->end) {
1115
1116                 /* Skip pairs of figure braces */
1117                 if (*p == '{') {
1118                         braces[UCL_BRACE_FIGURE][0] ++;
1119                 }
1120                 else if (*p == '}') {
1121                         braces[UCL_BRACE_FIGURE][1] ++;
1122                         if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1123                                 /* This is not a termination symbol, continue */
1124                                 ucl_chunk_skipc (chunk, p);
1125                                 continue;
1126                         }
1127                 }
1128                 /* Skip pairs of square braces */
1129                 else if (*p == '[') {
1130                         braces[UCL_BRACE_SQUARE][0] ++;
1131                 }
1132                 else if (*p == ']') {
1133                         braces[UCL_BRACE_SQUARE][1] ++;
1134                         if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1135                                 /* This is not a termination symbol, continue */
1136                                 ucl_chunk_skipc (chunk, p);
1137                                 continue;
1138                         }
1139                 }
1140                 else if (*p == '$') {
1141                         *var_expand = true;
1142                 }
1143                 else if (*p == '\\') {
1144                         *need_unescape = true;
1145                         ucl_chunk_skipc (chunk, p);
1146                         if (p < chunk->end) {
1147                                 ucl_chunk_skipc (chunk, p);
1148                         }
1149                         continue;
1150                 }
1151
1152                 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1153                         break;
1154                 }
1155                 ucl_chunk_skipc (chunk, p);
1156         }
1157
1158         if (p >= chunk->end) {
1159                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err);
1160                 return false;
1161         }
1162
1163         return true;
1164 }
1165
1166 /**
1167  * Parse multiline string ending with \n{term}\n
1168  * @param parser
1169  * @param chunk
1170  * @param term
1171  * @param term_len
1172  * @return size of multiline string or 0 in case of error
1173  */
1174 static int
1175 ucl_parse_multiline_string (struct ucl_parser *parser,
1176                 struct ucl_chunk *chunk, const unsigned char *term,
1177                 int term_len, unsigned char const **beg,
1178                 bool *var_expand)
1179 {
1180         const unsigned char *p, *c;
1181         bool newline = false;
1182         int len = 0;
1183
1184         p = chunk->pos;
1185
1186         c = p;
1187
1188         while (p < chunk->end) {
1189                 if (newline) {
1190                         if (chunk->end - p < term_len) {
1191                                 return 0;
1192                         }
1193                         else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) {
1194                                 len = p - c;
1195                                 chunk->remain -= term_len;
1196                                 chunk->pos = p + term_len;
1197                                 chunk->column = term_len;
1198                                 *beg = c;
1199                                 break;
1200                         }
1201                 }
1202                 if (*p == '\n') {
1203                         newline = true;
1204                 }
1205                 else {
1206                         if (*p == '$') {
1207                                 *var_expand = true;
1208                         }
1209                         newline = false;
1210                 }
1211                 ucl_chunk_skipc (chunk, p);
1212         }
1213
1214         return len;
1215 }
1216
1217 static ucl_object_t*
1218 ucl_get_value_object (struct ucl_parser *parser)
1219 {
1220         ucl_object_t *t, *obj = NULL;
1221
1222         if (parser->stack->obj->type == UCL_ARRAY) {
1223                 /* Object must be allocated */
1224                 obj = ucl_object_new ();
1225                 t = parser->stack->obj->value.av;
1226                 DL_APPEND (t, obj);
1227                 parser->cur_obj = obj;
1228                 parser->stack->obj->value.av = t;
1229                 parser->stack->obj->len ++;
1230         }
1231         else {
1232                 /* Object has been already allocated */
1233                 obj = parser->cur_obj;
1234         }
1235
1236         return obj;
1237 }
1238
1239 /**
1240  * Handle value data
1241  * @param parser
1242  * @param chunk
1243  * @return
1244  */
1245 static bool
1246 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1247 {
1248         const unsigned char *p, *c;
1249         ucl_object_t *obj = NULL;
1250         unsigned int stripped_spaces;
1251         int str_len;
1252         bool need_unescape = false, ucl_escape = false, var_expand = false;
1253
1254         p = chunk->pos;
1255
1256         /* Skip any spaces and comments */
1257         if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1258                         (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1259                 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1260                         ucl_chunk_skipc (chunk, p);
1261                 }
1262                 if (!ucl_skip_comments (parser)) {
1263                         return false;
1264                 }
1265                 p = chunk->pos;
1266         }
1267
1268         while (p < chunk->end) {
1269                 c = p;
1270                 switch (*p) {
1271                 case '"':
1272                         obj = ucl_get_value_object (parser);
1273                         ucl_chunk_skipc (chunk, p);
1274                         if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1275                                 return false;
1276                         }
1277                         str_len = chunk->pos - c - 2;
1278                         obj->type = UCL_STRING;
1279                         if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1280                                         &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1281                                 return false;
1282                         }
1283                         obj->len = str_len;
1284                         parser->state = UCL_STATE_AFTER_VALUE;
1285                         p = chunk->pos;
1286                         return true;
1287                         break;
1288                 case '{':
1289                         obj = ucl_get_value_object (parser);
1290                         /* We have a new object */
1291                         obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1292                         if (obj == NULL) {
1293                                 return false;
1294                         }
1295
1296                         ucl_chunk_skipc (chunk, p);
1297                         return true;
1298                         break;
1299                 case '[':
1300                         obj = ucl_get_value_object (parser);
1301                         /* We have a new array */
1302                         obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1303                         if (obj == NULL) {
1304                                 return false;
1305                         }
1306
1307                         ucl_chunk_skipc (chunk, p);
1308                         return true;
1309                         break;
1310                 case ']':
1311                         /* We have the array ending */
1312                         if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1313                                 parser->state = UCL_STATE_AFTER_VALUE;
1314                                 return true;
1315                         }
1316                         else {
1317                                 goto parse_string;
1318                         }
1319                         break;
1320                 case '<':
1321                         obj = ucl_get_value_object (parser);
1322                         /* We have something like multiline value, which must be <<[A-Z]+\n */
1323                         if (chunk->end - p > 3) {
1324                                 if (memcmp (p, "<<", 2) == 0) {
1325                                         p += 2;
1326                                         /* We allow only uppercase characters in multiline definitions */
1327                                         while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1328                                                 p ++;
1329                                         }
1330                                         if (*p =='\n') {
1331                                                 /* Set chunk positions and start multiline parsing */
1332                                                 c += 2;
1333                                                 chunk->remain -= p - c;
1334                                                 chunk->pos = p + 1;
1335                                                 chunk->column = 0;
1336                                                 chunk->line ++;
1337                                                 if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1338                                                                 p - c, &c, &var_expand)) == 0) {
1339                                                         ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err);
1340                                                         return false;
1341                                                 }
1342                                                 obj->type = UCL_STRING;
1343                                                 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1344                                                         &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1345                                                         return false;
1346                                                 }
1347                                                 obj->len = str_len;
1348                                                 parser->state = UCL_STATE_AFTER_VALUE;
1349                                                 return true;
1350                                         }
1351                                 }
1352                         }
1353                         /* Fallback to ordinary strings */
1354                 default:
1355 parse_string:
1356                         if (obj == NULL) {
1357                                 obj = ucl_get_value_object (parser);
1358                         }
1359                         /* Parse atom */
1360                         if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1361                                 if (!ucl_lex_number (parser, chunk, obj)) {
1362                                         if (parser->state == UCL_STATE_ERROR) {
1363                                                 return false;
1364                                         }
1365                                 }
1366                                 else {
1367                                         parser->state = UCL_STATE_AFTER_VALUE;
1368                                         return true;
1369                                 }
1370                                 /* Fallback to normal string */
1371                         }
1372
1373                         if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1374                                 return false;
1375                         }
1376                         /* Cut trailing spaces */
1377                         stripped_spaces = 0;
1378                         while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1379                                         UCL_CHARACTER_WHITESPACE)) {
1380                                 stripped_spaces ++;
1381                         }
1382                         str_len = chunk->pos - c - stripped_spaces;
1383                         if (str_len <= 0) {
1384                                 ucl_set_err (chunk, 0, "string value must not be empty", &parser->err);
1385                                 return false;
1386                         }
1387                         else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1388                                 obj->len = 0;
1389                                 obj->type = UCL_NULL;
1390                         }
1391                         else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1392                                 obj->type = UCL_STRING;
1393                                 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1394                                                 &obj->value.sv, str_len, need_unescape,
1395                                                 false, var_expand)) == -1) {
1396                                         return false;
1397                                 }
1398                                 obj->len = str_len;
1399                         }
1400                         parser->state = UCL_STATE_AFTER_VALUE;
1401                         p = chunk->pos;
1402
1403                         return true;
1404                         break;
1405                 }
1406         }
1407
1408         return true;
1409 }
1410
1411 /**
1412  * Handle after value data
1413  * @param parser
1414  * @param chunk
1415  * @return
1416  */
1417 static bool
1418 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1419 {
1420         const unsigned char *p;
1421         bool got_sep = false;
1422         struct ucl_stack *st;
1423
1424         p = chunk->pos;
1425
1426         while (p < chunk->end) {
1427                 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1428                         /* Skip whitespaces */
1429                         ucl_chunk_skipc (chunk, p);
1430                 }
1431                 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1432                         /* Skip comment */
1433                         if (!ucl_skip_comments (parser)) {
1434                                 return false;
1435                         }
1436                         /* Treat comment as a separator */
1437                         got_sep = true;
1438                         p = chunk->pos;
1439                 }
1440                 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1441                         if (*p == '}' || *p == ']') {
1442                                 if (parser->stack == NULL) {
1443                                         ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err);
1444                                         return false;
1445                                 }
1446                                 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1447                                                 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1448
1449                                         /* Pop all nested objects from a stack */
1450                                         st = parser->stack;
1451                                         parser->stack = st->next;
1452                                         UCL_FREE (sizeof (struct ucl_stack), st);
1453
1454                                         while (parser->stack != NULL) {
1455                                                 st = parser->stack;
1456                                                 if (st->next == NULL || st->next->level == st->level) {
1457                                                         break;
1458                                                 }
1459                                                 parser->stack = st->next;
1460                                                 UCL_FREE (sizeof (struct ucl_stack), st);
1461                                         }
1462                                 }
1463                                 else {
1464                                         ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err);
1465                                         return false;
1466                                 }
1467
1468                                 if (parser->stack == NULL) {
1469                                         /* Ignore everything after a top object */
1470                                         return true;
1471                                 }
1472                                 else {
1473                                         ucl_chunk_skipc (chunk, p);
1474                                 }
1475                                 got_sep = true;
1476                         }
1477                         else {
1478                                 /* Got a separator */
1479                                 got_sep = true;
1480                                 ucl_chunk_skipc (chunk, p);
1481                         }
1482                 }
1483                 else {
1484                         /* Anything else */
1485                         if (!got_sep) {
1486                                 ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err);
1487                                 return false;
1488                         }
1489                         return true;
1490                 }
1491         }
1492
1493         return true;
1494 }
1495
1496 /**
1497  * Handle macro data
1498  * @param parser
1499  * @param chunk
1500  * @return
1501  */
1502 static bool
1503 ucl_parse_macro_value (struct ucl_parser *parser,
1504                 struct ucl_chunk *chunk, struct ucl_macro *macro,
1505                 unsigned char const **macro_start, size_t *macro_len)
1506 {
1507         const unsigned char *p, *c;
1508         bool need_unescape = false, ucl_escape = false, var_expand = false;
1509
1510         p = chunk->pos;
1511
1512         switch (*p) {
1513         case '"':
1514                 /* We have macro value encoded in quotes */
1515                 c = p;
1516                 ucl_chunk_skipc (chunk, p);
1517                 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1518                         return false;
1519                 }
1520
1521                 *macro_start = c + 1;
1522                 *macro_len = chunk->pos - c - 2;
1523                 p = chunk->pos;
1524                 break;
1525         case '{':
1526                 /* We got a multiline macro body */
1527                 ucl_chunk_skipc (chunk, p);
1528                 /* Skip spaces at the beginning */
1529                 while (p < chunk->end) {
1530                         if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1531                                 ucl_chunk_skipc (chunk, p);
1532                         }
1533                         else {
1534                                 break;
1535                         }
1536                 }
1537                 c = p;
1538                 while (p < chunk->end) {
1539                         if (*p == '}') {
1540                                 break;
1541                         }
1542                         ucl_chunk_skipc (chunk, p);
1543                 }
1544                 *macro_start = c;
1545                 *macro_len = p - c;
1546                 ucl_chunk_skipc (chunk, p);
1547                 break;
1548         default:
1549                 /* Macro is not enclosed in quotes or braces */
1550                 c = p;
1551                 while (p < chunk->end) {
1552                         if (ucl_lex_is_atom_end (*p)) {
1553                                 break;
1554                         }
1555                         ucl_chunk_skipc (chunk, p);
1556                 }
1557                 *macro_start = c;
1558                 *macro_len = p - c;
1559                 break;
1560         }
1561
1562         /* We are at the end of a macro */
1563         /* Skip ';' and space characters and return to previous state */
1564         while (p < chunk->end) {
1565                 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1566                         break;
1567                 }
1568                 ucl_chunk_skipc (chunk, p);
1569         }
1570         return true;
1571 }
1572
1573 /**
1574  * Handle the main states of rcl parser
1575  * @param parser parser structure
1576  * @param data the pointer to the beginning of a chunk
1577  * @param len the length of a chunk
1578  * @return true if chunk has been parsed and false in case of error
1579  */
1580 static bool
1581 ucl_state_machine (struct ucl_parser *parser)
1582 {
1583         ucl_object_t *obj;
1584         struct ucl_chunk *chunk = parser->chunks;
1585         const unsigned char *p, *c = NULL, *macro_start = NULL;
1586         unsigned char *macro_escaped;
1587         size_t macro_len = 0;
1588         struct ucl_macro *macro = NULL;
1589         bool next_key = false, end_of_object = false;
1590
1591         if (parser->top_obj == NULL) {
1592                 if (*chunk->pos == '[') {
1593                         obj = ucl_add_parser_stack (NULL, parser, true, 0);
1594                 }
1595                 else {
1596                         obj = ucl_add_parser_stack (NULL, parser, false, 0);
1597                 }
1598                 if (obj == NULL) {
1599                         return false;
1600                 }
1601                 parser->top_obj = obj;
1602                 parser->cur_obj = obj;
1603                 parser->state = UCL_STATE_INIT;
1604         }
1605
1606         p = chunk->pos;
1607         while (chunk->pos < chunk->end) {
1608                 switch (parser->state) {
1609                 case UCL_STATE_INIT:
1610                         /*
1611                          * At the init state we can either go to the parse array or object
1612                          * if we got [ or { correspondingly or can just treat new data as
1613                          * a key of newly created object
1614                          */
1615                         obj = parser->cur_obj;
1616                         if (!ucl_skip_comments (parser)) {
1617                                 parser->prev_state = parser->state;
1618                                 parser->state = UCL_STATE_ERROR;
1619                                 return false;
1620                         }
1621                         else {
1622                                 p = chunk->pos;
1623                                 if (*p == '[') {
1624                                         parser->state = UCL_STATE_VALUE;
1625                                         ucl_chunk_skipc (chunk, p);
1626                                 }
1627                                 else {
1628                                         parser->state = UCL_STATE_KEY;
1629                                         if (*p == '{') {
1630                                                 ucl_chunk_skipc (chunk, p);
1631                                         }
1632                                 }
1633                         }
1634                         break;
1635                 case UCL_STATE_KEY:
1636                         /* Skip any spaces */
1637                         while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1638                                 ucl_chunk_skipc (chunk, p);
1639                         }
1640                         if (*p == '}') {
1641                                 /* We have the end of an object */
1642                                 parser->state = UCL_STATE_AFTER_VALUE;
1643                                 continue;
1644                         }
1645                         if (parser->stack == NULL) {
1646                                 /* No objects are on stack, but we want to parse a key */
1647                                 ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser "
1648                                                 "expects a key", &parser->err);
1649                                 parser->prev_state = parser->state;
1650                                 parser->state = UCL_STATE_ERROR;
1651                                 return false;
1652                         }
1653                         if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1654                                 parser->prev_state = parser->state;
1655                                 parser->state = UCL_STATE_ERROR;
1656                                 return false;
1657                         }
1658                         if (end_of_object) {
1659                                 p = chunk->pos;
1660                                 parser->state = UCL_STATE_AFTER_VALUE;
1661                                 continue;
1662                         }
1663                         else if (parser->state != UCL_STATE_MACRO_NAME) {
1664                                 if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1665                                         /* Parse more keys and nest objects accordingly */
1666                                         obj = ucl_add_parser_stack (parser->cur_obj, parser, false,
1667                                                         parser->stack->level + 1);
1668                                         if (obj == NULL) {
1669                                                 return false;
1670                                         }
1671                                 }
1672                                 else {
1673                                         parser->state = UCL_STATE_VALUE;
1674                                 }
1675                         }
1676                         else {
1677                                 c = chunk->pos;
1678                         }
1679                         p = chunk->pos;
1680                         break;
1681                 case UCL_STATE_VALUE:
1682                         /* We need to check what we do have */
1683                         if (!ucl_parse_value (parser, chunk)) {
1684                                 parser->prev_state = parser->state;
1685                                 parser->state = UCL_STATE_ERROR;
1686                                 return false;
1687                         }
1688                         /* State is set in ucl_parse_value call */
1689                         p = chunk->pos;
1690                         break;
1691                 case UCL_STATE_AFTER_VALUE:
1692                         if (!ucl_parse_after_value (parser, chunk)) {
1693                                 parser->prev_state = parser->state;
1694                                 parser->state = UCL_STATE_ERROR;
1695                                 return false;
1696                         }
1697                         if (parser->stack != NULL) {
1698                                 if (parser->stack->obj->type == UCL_OBJECT) {
1699                                         parser->state = UCL_STATE_KEY;
1700                                 }
1701                                 else {
1702                                         /* Array */
1703                                         parser->state = UCL_STATE_VALUE;
1704                                 }
1705                         }
1706                         else {
1707                                 /* Skip everything at the end */
1708                                 return true;
1709                         }
1710                         p = chunk->pos;
1711                         break;
1712                 case UCL_STATE_MACRO_NAME:
1713                         if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1714                                 ucl_chunk_skipc (chunk, p);
1715                         }
1716                         else if (p - c > 0) {
1717                                 /* We got macro name */
1718                                 macro_len = (size_t)(p - c);
1719                                 HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1720                                 if (macro == NULL) {
1721                                         ucl_create_err (&parser->err, "error on line %d at column %d: "
1722                                                         "unknown macro: '%.*s', character: '%c'",
1723                                                                 chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1724                                         parser->state = UCL_STATE_ERROR;
1725                                         return false;
1726                                 }
1727                                 /* Now we need to skip all spaces */
1728                                 while (p < chunk->end) {
1729                                         if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1730                                                 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1731                                                         /* Skip comment */
1732                                                         if (!ucl_skip_comments (parser)) {
1733                                                                 return false;
1734                                                         }
1735                                                         p = chunk->pos;
1736                                                 }
1737                                                 break;
1738                                         }
1739                                         ucl_chunk_skipc (chunk, p);
1740                                 }
1741                                 parser->state = UCL_STATE_MACRO;
1742                         }
1743                         break;
1744                 case UCL_STATE_MACRO:
1745                         if (!ucl_parse_macro_value (parser, chunk, macro,
1746                                         &macro_start, &macro_len)) {
1747                                 parser->prev_state = parser->state;
1748                                 parser->state = UCL_STATE_ERROR;
1749                                 return false;
1750                         }
1751                         macro_len = ucl_expand_variable (parser, &macro_escaped, macro_start, macro_len);
1752                         parser->state = parser->prev_state;
1753                         if (macro_escaped == NULL) {
1754                                 if (!macro->handler (macro_start, macro_len, macro->ud)) {
1755                                         return false;
1756                                 }
1757                         }
1758                         else {
1759                                 if (!macro->handler (macro_escaped, macro_len, macro->ud)) {
1760                                         UCL_FREE (macro_len + 1, macro_escaped);
1761                                         return false;
1762                                 }
1763                                 UCL_FREE (macro_len + 1, macro_escaped);
1764                         }
1765                         p = chunk->pos;
1766                         break;
1767                 default:
1768                         /* TODO: add all states */
1769                         ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err);
1770                         parser->state = UCL_STATE_ERROR;
1771                         return false;
1772                 }
1773         }
1774
1775         return true;
1776 }
1777
1778 struct ucl_parser*
1779 ucl_parser_new (int flags)
1780 {
1781         struct ucl_parser *new;
1782
1783         new = UCL_ALLOC (sizeof (struct ucl_parser));
1784         if (new == NULL) {
1785                 return NULL;
1786         }
1787         memset (new, 0, sizeof (struct ucl_parser));
1788
1789         ucl_parser_register_macro (new, "include", ucl_include_handler, new);
1790         ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
1791         ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
1792
1793         new->flags = flags;
1794
1795         /* Initial assumption about filevars */
1796         ucl_parser_set_filevars (new, NULL, false);
1797
1798         return new;
1799 }
1800
1801
1802 void
1803 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
1804                 ucl_macro_handler handler, void* ud)
1805 {
1806         struct ucl_macro *new;
1807
1808         if (macro == NULL || handler == NULL) {
1809                 return;
1810         }
1811         new = UCL_ALLOC (sizeof (struct ucl_macro));
1812         if (new == NULL) {
1813                 return;
1814         }
1815         memset (new, 0, sizeof (struct ucl_macro));
1816         new->handler = handler;
1817         new->name = strdup (macro);
1818         new->ud = ud;
1819         HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
1820 }
1821
1822 void
1823 ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
1824                 const char *value)
1825 {
1826         struct ucl_variable *new = NULL, *cur;
1827
1828         if (var == NULL) {
1829                 return;
1830         }
1831
1832         /* Find whether a variable already exists */
1833         LL_FOREACH (parser->variables, cur) {
1834                 if (strcmp (cur->var, var) == 0) {
1835                         new = cur;
1836                         break;
1837                 }
1838         }
1839
1840         if (value == NULL) {
1841
1842                 if (new != NULL) {
1843                         /* Remove variable */
1844                         LL_DELETE (parser->variables, new);
1845                         free (new->var);
1846                         free (new->value);
1847                         UCL_FREE (sizeof (struct ucl_variable), new);
1848                 }
1849                 else {
1850                         /* Do nothing */
1851                         return;
1852                 }
1853         }
1854         else {
1855                 if (new == NULL) {
1856                         new = UCL_ALLOC (sizeof (struct ucl_variable));
1857                         if (new == NULL) {
1858                                 return;
1859                         }
1860                         memset (new, 0, sizeof (struct ucl_variable));
1861                         new->var = strdup (var);
1862                         new->var_len = strlen (var);
1863                         new->value = strdup (value);
1864                         new->value_len = strlen (value);
1865
1866                         LL_PREPEND (parser->variables, new);
1867                 }
1868                 else {
1869                         free (new->value);
1870                         new->value = strdup (value);
1871                         new->value_len = strlen (value);
1872                 }
1873         }
1874 }
1875
1876 bool
1877 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
1878                 size_t len)
1879 {
1880         struct ucl_chunk *chunk;
1881
1882         if (data == NULL || len == 0) {
1883                 ucl_create_err (&parser->err, "invalid chunk added");
1884                 return false;
1885         }
1886         if (parser->state != UCL_STATE_ERROR) {
1887                 chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
1888                 if (chunk == NULL) {
1889                         ucl_create_err (&parser->err, "cannot allocate chunk structure");
1890                         return false;
1891                 }
1892                 chunk->begin = data;
1893                 chunk->remain = len;
1894                 chunk->pos = chunk->begin;
1895                 chunk->end = chunk->begin + len;
1896                 chunk->line = 1;
1897                 chunk->column = 0;
1898                 LL_PREPEND (parser->chunks, chunk);
1899                 parser->recursion ++;
1900                 if (parser->recursion > UCL_MAX_RECURSION) {
1901                         ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
1902                                         parser->recursion);
1903                         return false;
1904                 }
1905                 return ucl_state_machine (parser);
1906         }
1907
1908         ucl_create_err (&parser->err, "a parser is in an invalid state");
1909
1910         return false;
1911 }
1912
1913 bool
1914 ucl_parser_add_string (struct ucl_parser *parser, const char *data,
1915                 size_t len)
1916 {
1917         if (data == NULL) {
1918                 ucl_create_err (&parser->err, "invalid string added");
1919                 return false;
1920         }
1921         if (len == 0) {
1922                 len = strlen (data);
1923         }
1924
1925         return ucl_parser_add_chunk (parser, (const unsigned char *)data, len);
1926 }