contrib/libucl/src/ucl_parser.c

   1 /* Copyright (c) 2013, Vsevolod Stakhov
   2  * All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are met:
   6  *       * Redistributions of source code must retain the above copyright
   7  *         notice, this list of conditions and the following disclaimer.
   8  *       * Redistributions in binary form must reproduce the above copyright
   9  *         notice, this list of conditions and the following disclaimer in the
  10  *         documentation and/or other materials provided with the distribution.
  11  *
  12  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
  13  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  14  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  15  * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
  16  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  17  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  18  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  19  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  20  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  21  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  22  */
  23
  24 #include "ucl.h"
  25 #include "ucl_internal.h"
  26 #include "ucl_chartable.h"
  27
  28 /**
  29  * @file rcl_parser.c
  30  * The implementation of rcl parser
  31  */
  32
  33 struct ucl_parser_saved_state {
  34         unsigned int line;
  35         unsigned int column;
  36         size_t remain;
  37         const unsigned char *pos;
  38 };
  39
  40 /**
  41  * Move up to len characters
  42  * @param parser
  43  * @param begin
  44  * @param len
  45  * @return new position in chunk
  46  */
  47 #define ucl_chunk_skipc(chunk, p)    do{                                        \
  48     if (*(p) == '\n') {                                                                         \
  49         (chunk)->line ++;                                                                       \
  50         (chunk)->column = 0;                                                            \
  51     }                                                                                                           \
  52     else (chunk)->column ++;                                                            \
  53     (p++);                                                                                                      \
  54     (chunk)->pos ++;                                                                            \
  55     (chunk)->remain --;                                                                         \
  56     } while (0)
  57
  58 static inline void
  59 ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err)
  60 {
  61         if (chunk->pos < chunk->end) {
  62                 if (isgraph (*chunk->pos)) {
  63                         ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'",
  64                                         chunk->line, chunk->column, str, *chunk->pos);
  65                 }
  66                 else {
  67                         ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'",
  68                                         chunk->line, chunk->column, str, (int)*chunk->pos);
  69                 }
  70         }
  71         else {
  72                 ucl_create_err (err, "error at the end of chunk: %s", str);
  73         }
  74 }
  75
  76 /**
  77  * Skip all comments from the current pos resolving nested and multiline comments
  78  * @param parser
  79  * @return
  80  */
  81 static bool
  82 ucl_skip_comments (struct ucl_parser *parser)
  83 {
  84         struct ucl_chunk *chunk = parser->chunks;
  85         const unsigned char *p;
  86         int comments_nested = 0;
  87
  88         p = chunk->pos;
  89
  90 start:
  91         if (*p == '#') {
  92                 if (parser->state != UCL_STATE_SCOMMENT &&
  93                                 parser->state != UCL_STATE_MCOMMENT) {
  94                         while (p < chunk->end) {
  95                                 if (*p == '\n') {
  96                                         ucl_chunk_skipc (chunk, p);
  97                                         goto start;
  98                                 }
  99                                 ucl_chunk_skipc (chunk, p);
 100                         }
 101                 }
 102         }
 103         else if (*p == '/' && chunk->remain >= 2) {
 104                 if (p[1] == '*') {
 105                         ucl_chunk_skipc (chunk, p);
 106                         comments_nested ++;
 107                         ucl_chunk_skipc (chunk, p);
 108
 109                         while (p < chunk->end) {
 110                                 if (*p == '*') {
 111                                         ucl_chunk_skipc (chunk, p);
 112                                         if (*p == '/') {
 113                                                 comments_nested --;
 114                                                 if (comments_nested == 0) {
 115                                                         ucl_chunk_skipc (chunk, p);
 116                                                         goto start;
 117                                                 }
 118                                         }
 119                                         ucl_chunk_skipc (chunk, p);
 120                                 }
 121                                 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
 122                                         comments_nested ++;
 123                                         ucl_chunk_skipc (chunk, p);
 124                                         ucl_chunk_skipc (chunk, p);
 125                                         continue;
 126                                 }
 127                                 ucl_chunk_skipc (chunk, p);
 128                         }
 129                         if (comments_nested != 0) {
 130                                 ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err);
 131                                 return false;
 132                         }
 133                 }
 134         }
 135
 136         return true;
 137 }
 138
 139 /**
 140  * Return multiplier for a character
 141  * @param c multiplier character
 142  * @param is_bytes if true use 1024 multiplier
 143  * @return multiplier
 144  */
 145 static inline unsigned long
 146 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
 147         const struct {
 148                 char c;
 149                 long mult_normal;
 150                 long mult_bytes;
 151         } multipliers[] = {
 152                         {'m', 1000 * 1000, 1024 * 1024},
 153                         {'k', 1000, 1024},
 154                         {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
 155         };
 156         int i;
 157
 158         for (i = 0; i < 3; i ++) {
 159                 if (tolower (c) == multipliers[i].c) {
 160                         if (is_bytes) {
 161                                 return multipliers[i].mult_bytes;
 162                         }
 163                         return multipliers[i].mult_normal;
 164                 }
 165         }
 166
 167         return 1;
 168 }
 169
 170
 171 /**
 172  * Return multiplier for time scaling
 173  * @param c
 174  * @return
 175  */
 176 static inline double
 177 ucl_lex_time_multiplier (const unsigned char c) {
 178         const struct {
 179                 char c;
 180                 double mult;
 181         } multipliers[] = {
 182                         {'m', 60},
 183                         {'h', 60 * 60},
 184                         {'d', 60 * 60 * 24},
 185                         {'w', 60 * 60 * 24 * 7},
 186                         {'y', 60 * 60 * 24 * 7 * 365}
 187         };
 188         int i;
 189
 190         for (i = 0; i < 5; i ++) {
 191                 if (tolower (c) == multipliers[i].c) {
 192                         return multipliers[i].mult;
 193                 }
 194         }
 195
 196         return 1;
 197 }
 198
 199 /**
 200  * Return true if a character is a end of an atom
 201  * @param c
 202  * @return
 203  */
 204 static inline bool
 205 ucl_lex_is_atom_end (const unsigned char c)
 206 {
 207         return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
 208 }
 209
 210 static inline bool
 211 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
 212 {
 213         if (c1 == '/') {
 214                 if (c2 == '*') {
 215                         return true;
 216                 }
 217         }
 218         else if (c1 == '#') {
 219                 return true;
 220         }
 221         return false;
 222 }
 223
 224 /**
 225  * Check variable found
 226  * @param parser
 227  * @param ptr
 228  * @param remain
 229  * @param out_len
 230  * @param strict
 231  * @param found
 232  * @return
 233  */
 234 static inline const char *
 235 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
 236                 size_t *out_len, bool strict, bool *found)
 237 {
 238         struct ucl_variable *var;
 239         unsigned char *dst;
 240         size_t dstlen;
 241         bool need_free = false;
 242
 243         LL_FOREACH (parser->variables, var) {
 244                 if (strict) {
 245                         if (remain == var->var_len) {
 246                                 if (memcmp (ptr, var->var, var->var_len) == 0) {
 247                                         *out_len += var->value_len;
 248                                         *found = true;
 249                                         return (ptr + var->var_len);
 250                                 }
 251                         }
 252                 }
 253                 else {
 254                         if (remain >= var->var_len) {
 255                                 if (memcmp (ptr, var->var, var->var_len) == 0) {
 256                                         *out_len += var->value_len;
 257                                         *found = true;
 258                                         return (ptr + var->var_len);
 259                                 }
 260                         }
 261                 }
 262         }
 263
 264         /* XXX: can only handle ${VAR} */
 265         if (!(*found) && parser->var_handler != NULL && strict) {
 266                 /* Call generic handler */
 267                 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
 268                                 parser->var_data)) {
 269                         *found = true;
 270                         if (need_free) {
 271                                 free (dst);
 272                         }
 273                         return (ptr + remain);
 274                 }
 275         }
 276
 277         return ptr;
 278 }
 279
 280 /**
 281  * Check for a variable in a given string
 282  * @param parser
 283  * @param ptr
 284  * @param remain
 285  * @param out_len
 286  * @param vars_found
 287  * @return
 288  */
 289 static const char *
 290 ucl_check_variable (struct ucl_parser *parser, const char *ptr,
 291                 size_t remain, size_t *out_len, bool *vars_found)
 292 {
 293         const char *p, *end, *ret = ptr;
 294         bool found = false;
 295
 296         if (*ptr == '{') {
 297                 /* We need to match the variable enclosed in braces */
 298                 p = ptr + 1;
 299                 end = ptr + remain;
 300                 while (p < end) {
 301                         if (*p == '}') {
 302                                 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1,
 303                                                 out_len, true, &found);
 304                                 if (found) {
 305                                         /* {} must be excluded actually */
 306                                         ret ++;
 307                                         if (!*vars_found) {
 308                                                 *vars_found = true;
 309                                         }
 310                                 }
 311                                 else {
 312                                         *out_len += 2;
 313                                 }
 314                                 break;
 315                         }
 316                         p ++;
 317                 }
 318         }
 319         else if (*ptr != '$') {
 320                 /* Not count escaped dollar sign */
 321                 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
 322                 if (found && !*vars_found) {
 323                         *vars_found = true;
 324                 }
 325                 if (!found) {
 326                         (*out_len) ++;
 327                 }
 328         }
 329         else {
 330                 ret ++;
 331                 (*out_len) ++;
 332         }
 333
 334         return ret;
 335 }
 336
 337 /**
 338  * Expand a single variable
 339  * @param parser
 340  * @param ptr
 341  * @param remain
 342  * @param dest
 343  * @return
 344  */
 345 static const char *
 346 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
 347                 size_t remain, unsigned char **dest)
 348 {
 349         unsigned char *d = *dest, *dst;
 350         const char *p = ptr + 1, *ret;
 351         struct ucl_variable *var;
 352         size_t dstlen;
 353         bool need_free = false;
 354         bool found = false;
 355         bool strict = false;
 356
 357         ret = ptr + 1;
 358         remain --;
 359
 360         if (*p == '$') {
 361                 *d++ = *p++;
 362                 *dest = d;
 363                 return p;
 364         }
 365         else if (*p == '{') {
 366                 p ++;
 367                 strict = true;
 368                 ret += 2;
 369                 remain -= 2;
 370         }
 371
 372         LL_FOREACH (parser->variables, var) {
 373                 if (remain >= var->var_len) {
 374                         if (memcmp (p, var->var, var->var_len) == 0) {
 375                                 memcpy (d, var->value, var->value_len);
 376                                 ret += var->var_len;
 377                                 d += var->value_len;
 378                                 found = true;
 379                                 break;
 380                         }
 381                 }
 382         }
 383         if (!found) {
 384                 if (strict && parser->var_handler != NULL) {
 385                         if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
 386                                                         parser->var_data)) {
 387                                 memcpy (d, dst, dstlen);
 388                                 ret += dstlen;
 389                                 d += remain;
 390                                 found = true;
 391                         }
 392                 }
 393
 394                 /* Leave variable as is */
 395                 if (!found) {
 396                         memcpy (d, ptr, 2);
 397                         d += 2;
 398                         ret --;
 399                 }
 400         }
 401
 402         *dest = d;
 403         return ret;
 404 }
 405
 406 /**
 407  * Expand variables in string
 408  * @param parser
 409  * @param dst
 410  * @param src
 411  * @param in_len
 412  * @return
 413  */
 414 static ssize_t
 415 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
 416                 const char *src, size_t in_len)
 417 {
 418         const char *p, *end = src + in_len;
 419         unsigned char *d;
 420         size_t out_len = 0;
 421         bool vars_found = false;
 422
 423         p = src;
 424         while (p != end) {
 425                 if (*p == '$') {
 426                         p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
 427                 }
 428                 else {
 429                         p ++;
 430                         out_len ++;
 431                 }
 432         }
 433
 434         if (!vars_found) {
 435                 /* Trivial case */
 436                 *dst = NULL;
 437                 return in_len;
 438         }
 439
 440         *dst = UCL_ALLOC (out_len + 1);
 441         if (*dst == NULL) {
 442                 return in_len;
 443         }
 444
 445         d = *dst;
 446         p = src;
 447         while (p != end) {
 448                 if (*p == '$') {
 449                         p = ucl_expand_single_variable (parser, p, end - p, &d);
 450                 }
 451                 else {
 452                         *d++ = *p++;
 453                 }
 454         }
 455
 456         *d = '\0';
 457
 458         return out_len;
 459 }
 460
 461 /**
 462  * Store or copy pointer to the trash stack
 463  * @param parser parser object
 464  * @param src src string
 465  * @param dst destination buffer (trash stack pointer)
 466  * @param dst_const const destination pointer (e.g. value of object)
 467  * @param in_len input length
 468  * @param need_unescape need to unescape source (and copy it)
 469  * @param need_lowercase need to lowercase value (and copy)
 470  * @param need_expand need to expand variables (and copy as well)
 471  * @return output length (excluding \0 symbol)
 472  */
 473 static inline ssize_t
 474 ucl_copy_or_store_ptr (struct ucl_parser *parser,
 475                 const unsigned char *src, unsigned char **dst,
 476                 const char **dst_const, size_t in_len,
 477                 bool need_unescape, bool need_lowercase, bool need_expand)
 478 {
 479         ssize_t ret = -1, tret;
 480         unsigned char *tmp;
 481
 482         if (need_unescape || need_lowercase ||
 483                         (need_expand && parser->variables != NULL) ||
 484                         !(parser->flags & UCL_PARSER_ZEROCOPY)) {
 485                 /* Copy string */
 486                 *dst = UCL_ALLOC (in_len + 1);
 487                 if (*dst == NULL) {
 488                         ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err);
 489                         return false;
 490                 }
 491                 if (need_lowercase) {
 492                         ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
 493                 }
 494                 else {
 495                         ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
 496                 }
 497
 498                 if (need_unescape) {
 499                         ret = ucl_unescape_json_string (*dst, ret);
 500                 }
 501                 if (need_expand) {
 502                         tmp = *dst;
 503                         tret = ret;
 504                         ret = ucl_expand_variable (parser, dst, tmp, ret);
 505                         if (*dst == NULL) {
 506                                 /* Nothing to expand */
 507                                 *dst = tmp;
 508                                 ret = tret;
 509                         }
 510                 }
 511                 *dst_const = *dst;
 512         }
 513         else {
 514                 *dst_const = src;
 515                 ret = in_len;
 516         }
 517
 518         return ret;
 519 }
 520
 521 /**
 522  * Create and append an object at the specified level
 523  * @param parser
 524  * @param is_array
 525  * @param level
 526  * @return
 527  */
 528 static inline ucl_object_t *
 529 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
 530 {
 531         struct ucl_stack *st;
 532
 533         if (!is_array) {
 534                 if (obj == NULL) {
 535                         obj = ucl_object_typed_new (UCL_OBJECT);
 536                 }
 537                 else {
 538                         obj->type = UCL_OBJECT;
 539                 }
 540                 obj->value.ov = ucl_hash_create ();
 541                 parser->state = UCL_STATE_KEY;
 542         }
 543         else {
 544                 if (obj == NULL) {
 545                         obj = ucl_object_typed_new (UCL_ARRAY);
 546                 }
 547                 else {
 548                         obj->type = UCL_ARRAY;
 549                 }
 550                 parser->state = UCL_STATE_VALUE;
 551         }
 552
 553         st = UCL_ALLOC (sizeof (struct ucl_stack));
 554         if (st == NULL) {
 555                 ucl_set_err (parser->chunks, 0, "cannot allocate memory for an object", &parser->err);
 556                 return NULL;
 557         }
 558         st->obj = obj;
 559         st->level = level;
 560         LL_PREPEND (parser->stack, st);
 561         parser->cur_obj = obj;
 562
 563         return obj;
 564 }
 565
 566 int
 567 ucl_maybe_parse_number (ucl_object_t *obj,
 568                 const char *start, const char *end, const char **pos,
 569                 bool allow_double, bool number_bytes, bool allow_time)
 570 {
 571         const char *p = start, *c = start;
 572         char *endptr;
 573         bool got_dot = false, got_exp = false, need_double = false,
 574                         is_time = false, valid_start = false, is_hex = false,
 575                         is_neg = false;
 576         double dv = 0;
 577         int64_t lv = 0;
 578
 579         if (*p == '-') {
 580                 is_neg = true;
 581                 c ++;
 582                 p ++;
 583         }
 584         while (p < end) {
 585                 if (is_hex && isxdigit (*p)) {
 586                         p ++;
 587                 }
 588                 else if (isdigit (*p)) {
 589                         valid_start = true;
 590                         p ++;
 591                 }
 592                 else if (!is_hex && (*p == 'x' || *p == 'X')) {
 593                         is_hex = true;
 594                         allow_double = false;
 595                         c = p + 1;
 596                 }
 597                 else if (allow_double) {
 598                         if (p == c) {
 599                                 /* Empty digits sequence, not a number */
 600                                 *pos = start;
 601                                 return EINVAL;
 602                         }
 603                         else if (*p == '.') {
 604                                 if (got_dot) {
 605                                         /* Double dots, not a number */
 606                                         *pos = start;
 607                                         return EINVAL;
 608                                 }
 609                                 else {
 610                                         got_dot = true;
 611                                         need_double = true;
 612                                         p ++;
 613                                 }
 614                         }
 615                         else if (*p == 'e' || *p == 'E') {
 616                                 if (got_exp) {
 617                                         /* Double exp, not a number */
 618                                         *pos = start;
 619                                         return EINVAL;
 620                                 }
 621                                 else {
 622                                         got_exp = true;
 623                                         need_double = true;
 624                                         p ++;
 625                                         if (p >= end) {
 626                                                 *pos = start;
 627                                                 return EINVAL;
 628                                         }
 629                                         if (!isdigit (*p) && *p != '+' && *p != '-') {
 630                                                 /* Wrong exponent sign */
 631                                                 *pos = start;
 632                                                 return EINVAL;
 633                                         }
 634                                         else {
 635                                                 p ++;
 636                                         }
 637                                 }
 638                         }
 639                         else {
 640                                 /* Got the end of the number, need to check */
 641                                 break;
 642                         }
 643                 }
 644                 else {
 645                         break;
 646                 }
 647         }
 648
 649         if (!valid_start) {
 650                 *pos = start;
 651                 return EINVAL;
 652         }
 653
 654         errno = 0;
 655         if (need_double) {
 656                 dv = strtod (c, &endptr);
 657         }
 658         else {
 659                 if (is_hex) {
 660                         lv = strtoimax (c, &endptr, 16);
 661                 }
 662                 else {
 663                         lv = strtoimax (c, &endptr, 10);
 664                 }
 665         }
 666         if (errno == ERANGE) {
 667                 *pos = start;
 668                 return ERANGE;
 669         }
 670
 671         /* Now check endptr */
 672         if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0' ||
 673                         ucl_test_character (*endptr, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
 674                 p = endptr;
 675                 goto set_obj;
 676         }
 677
 678         if (endptr < end && endptr != start) {
 679                 p = endptr;
 680                 switch (*p) {
 681                 case 'm':
 682                 case 'M':
 683                 case 'g':
 684                 case 'G':
 685                 case 'k':
 686                 case 'K':
 687                         if (end - p >= 2) {
 688                                 if (p[1] == 's' || p[1] == 'S') {
 689                                         /* Milliseconds */
 690                                         if (!need_double) {
 691                                                 need_double = true;
 692                                                 dv = lv;
 693                                         }
 694                                         is_time = true;
 695                                         if (p[0] == 'm' || p[0] == 'M') {
 696                                                 dv /= 1000.;
 697                                         }
 698                                         else {
 699                                                 dv *= ucl_lex_num_multiplier (*p, false);
 700                                         }
 701                                         p += 2;
 702                                         goto set_obj;
 703                                 }
 704                                 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
 705                                         /* Bytes */
 706                                         if (need_double) {
 707                                                 need_double = false;
 708                                                 lv = dv;
 709                                         }
 710                                         lv *= ucl_lex_num_multiplier (*p, true);
 711                                         p += 2;
 712                                         goto set_obj;
 713                                 }
 714                                 else if (ucl_lex_is_atom_end (p[1])) {
 715                                         if (need_double) {
 716                                                 dv *= ucl_lex_num_multiplier (*p, false);
 717                                         }
 718                                         else {
 719                                                 lv *= ucl_lex_num_multiplier (*p, number_bytes);
 720                                         }
 721                                         p ++;
 722                                         goto set_obj;
 723                                 }
 724                                 else if (allow_time && end - p >= 3) {
 725                                         if (tolower (p[0]) == 'm' &&
 726                                                         tolower (p[1]) == 'i' &&
 727                                                         tolower (p[2]) == 'n') {
 728                                                 /* Minutes */
 729                                                 if (!need_double) {
 730                                                         need_double = true;
 731                                                         dv = lv;
 732                                                 }
 733                                                 is_time = true;
 734                                                 dv *= 60.;
 735                                                 p += 3;
 736                                                 goto set_obj;
 737                                         }
 738                                 }
 739                         }
 740                         else {
 741                                 if (need_double) {
 742                                         dv *= ucl_lex_num_multiplier (*p, false);
 743                                 }
 744                                 else {
 745                                         lv *= ucl_lex_num_multiplier (*p, number_bytes);
 746                                 }
 747                                 p ++;
 748                                 goto set_obj;
 749                         }
 750                         break;
 751                 case 'S':
 752                 case 's':
 753                         if (allow_time &&
 754                                         (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
 755                                 if (!need_double) {
 756                                         need_double = true;
 757                                         dv = lv;
 758                                 }
 759                                 p ++;
 760                                 is_time = true;
 761                                 goto set_obj;
 762                         }
 763                         break;
 764                 case 'h':
 765                 case 'H':
 766                 case 'd':
 767                 case 'D':
 768                 case 'w':
 769                 case 'W':
 770                 case 'Y':
 771                 case 'y':
 772                         if (allow_time &&
 773                                         (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
 774                                 if (!need_double) {
 775                                         need_double = true;
 776                                         dv = lv;
 777                                 }
 778                                 is_time = true;
 779                                 dv *= ucl_lex_time_multiplier (*p);
 780                                 p ++;
 781                                 goto set_obj;
 782                         }
 783                         break;
 784                 }
 785         }
 786
 787         *pos = c;
 788         return EINVAL;
 789
 790         set_obj:
 791         if (allow_double && (need_double || is_time)) {
 792                 if (!is_time) {
 793                         obj->type = UCL_FLOAT;
 794                 }
 795                 else {
 796                         obj->type = UCL_TIME;
 797                 }
 798                 obj->value.dv = is_neg ? (-dv) : dv;
 799         }
 800         else {
 801                 obj->type = UCL_INT;
 802                 obj->value.iv = is_neg ? (-lv) : lv;
 803         }
 804         *pos = p;
 805         return 0;
 806 }
 807
 808 /**
 809  * Parse possible number
 810  * @param parser
 811  * @param chunk
 812  * @return true if a number has been parsed
 813  */
 814 static bool
 815 ucl_lex_number (struct ucl_parser *parser,
 816                 struct ucl_chunk *chunk, ucl_object_t *obj)
 817 {
 818         const unsigned char *pos;
 819         int ret;
 820
 821         ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
 822                         true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
 823
 824         if (ret == 0) {
 825                 chunk->remain -= pos - chunk->pos;
 826                 chunk->column += pos - chunk->pos;
 827                 chunk->pos = pos;
 828                 return true;
 829         }
 830         else if (ret == ERANGE) {
 831                 ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err);
 832         }
 833
 834         return false;
 835 }
 836
 837 /**
 838  * Parse quoted string with possible escapes
 839  * @param parser
 840  * @param chunk
 841  * @return true if a string has been parsed
 842  */
 843 static bool
 844 ucl_lex_json_string (struct ucl_parser *parser,
 845                 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
 846 {
 847         const unsigned char *p = chunk->pos;
 848         unsigned char c;
 849         int i;
 850
 851         while (p < chunk->end) {
 852                 c = *p;
 853                 if (c < 0x1F) {
 854                         /* Unmasked control character */
 855                         if (c == '\n') {
 856                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err);
 857                         }
 858                         else {
 859                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err);
 860                         }
 861                         return false;
 862                 }
 863                 else if (c == '\\') {
 864                         ucl_chunk_skipc (chunk, p);
 865                         c = *p;
 866                         if (p >= chunk->end) {
 867                                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
 868                                 return false;
 869                         }
 870                         else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
 871                                 if (c == 'u') {
 872                                         ucl_chunk_skipc (chunk, p);
 873                                         for (i = 0; i < 4 && p < chunk->end; i ++) {
 874                                                 if (!isxdigit (*p)) {
 875                                                         ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err);
 876                                                         return false;
 877                                                 }
 878                                                 ucl_chunk_skipc (chunk, p);
 879                                         }
 880                                         if (p >= chunk->end) {
 881                                                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
 882                                                 return false;
 883                                         }
 884                                 }
 885                                 else {
 886                                         ucl_chunk_skipc (chunk, p);
 887                                 }
 888                         }
 889                         *need_unescape = true;
 890                         *ucl_escape = true;
 891                         continue;
 892                 }
 893                 else if (c == '"') {
 894                         ucl_chunk_skipc (chunk, p);
 895                         return true;
 896                 }
 897                 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
 898                         *ucl_escape = true;
 899                 }
 900                 else if (c == '$') {
 901                         *var_expand = true;
 902                 }
 903                 ucl_chunk_skipc (chunk, p);
 904         }
 905
 906         ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err);
 907         return false;
 908 }
 909
 910 /**
 911  * Parse a key in an object
 912  * @param parser
 913  * @param chunk
 914  * @return true if a key has been parsed
 915  */
 916 static bool
 917 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
 918 {
 919         const unsigned char *p, *c = NULL, *end, *t;
 920         const char *key = NULL;
 921         bool got_quote = false, got_eq = false, got_semicolon = false,
 922                         need_unescape = false, ucl_escape = false, var_expand = false,
 923                         got_content = false, got_sep = false;
 924         ucl_object_t *nobj, *tobj;
 925         ucl_hash_t *container;
 926         ssize_t keylen;
 927
 928         p = chunk->pos;
 929
 930         if (*p == '.') {
 931                 /* It is macro actually */
 932                 ucl_chunk_skipc (chunk, p);
 933                 parser->prev_state = parser->state;
 934                 parser->state = UCL_STATE_MACRO_NAME;
 935                 return true;
 936         }
 937         while (p < chunk->end) {
 938                 /*
 939                  * A key must start with alpha, number, '/' or '_' and end with space character
 940                  */
 941                 if (c == NULL) {
 942                         if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
 943                                 if (!ucl_skip_comments (parser)) {
 944                                         return false;
 945                                 }
 946                                 p = chunk->pos;
 947                         }
 948                         else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
 949                                 ucl_chunk_skipc (chunk, p);
 950                         }
 951                         else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
 952                                 /* The first symbol */
 953                                 c = p;
 954                                 ucl_chunk_skipc (chunk, p);
 955                                 got_content = true;
 956                         }
 957                         else if (*p == '"') {
 958                                 /* JSON style key */
 959                                 c = p + 1;
 960                                 got_quote = true;
 961                                 got_content = true;
 962                                 ucl_chunk_skipc (chunk, p);
 963                         }
 964                         else if (*p == '}') {
 965                                 /* We have actually end of an object */
 966                                 *end_of_object = true;
 967                                 return true;
 968                         }
 969                         else if (*p == '.') {
 970                                 ucl_chunk_skipc (chunk, p);
 971                                 parser->prev_state = parser->state;
 972                                 parser->state = UCL_STATE_MACRO_NAME;
 973                                 return true;
 974                         }
 975                         else {
 976                                 /* Invalid identifier */
 977                                 ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err);
 978                                 return false;
 979                         }
 980                 }
 981                 else {
 982                         /* Parse the body of a key */
 983                         if (!got_quote) {
 984                                 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
 985                                         got_content = true;
 986                                         ucl_chunk_skipc (chunk, p);
 987                                 }
 988                                 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
 989                                         end = p;
 990                                         break;
 991                                 }
 992                                 else {
 993                                         ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err);
 994                                         return false;
 995                                 }
 996                         }
 997                         else {
 998                                 /* We need to parse json like quoted string */
 999                                 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1000                                         return false;
1001                                 }
1002                                 /* Always escape keys obtained via json */
1003                                 end = chunk->pos - 1;
1004                                 p = chunk->pos;
1005                                 break;
1006                         }
1007                 }
1008         }
1009
1010         if (p >= chunk->end && got_content) {
1011                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1012                 return false;
1013         }
1014         else if (!got_content) {
1015                 return true;
1016         }
1017         *end_of_object = false;
1018         /* We are now at the end of the key, need to parse the rest */
1019         while (p < chunk->end) {
1020                 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1021                         ucl_chunk_skipc (chunk, p);
1022                 }
1023                 else if (*p == '=') {
1024                         if (!got_eq && !got_semicolon) {
1025                                 ucl_chunk_skipc (chunk, p);
1026                                 got_eq = true;
1027                         }
1028                         else {
1029                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err);
1030                                 return false;
1031                         }
1032                 }
1033                 else if (*p == ':') {
1034                         if (!got_eq && !got_semicolon) {
1035                                 ucl_chunk_skipc (chunk, p);
1036                                 got_semicolon = true;
1037                         }
1038                         else {
1039                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err);
1040                                 return false;
1041                         }
1042                 }
1043                 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1044                         /* Check for comment */
1045                         if (!ucl_skip_comments (parser)) {
1046                                 return false;
1047                         }
1048                         p = chunk->pos;
1049                 }
1050                 else {
1051                         /* Start value */
1052                         break;
1053                 }
1054         }
1055
1056         if (p >= chunk->end && got_content) {
1057                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1058                 return false;
1059         }
1060
1061         got_sep = got_semicolon || got_eq;
1062
1063         if (!got_sep) {
1064                 /*
1065                  * Maybe we have more keys nested, so search for termination character.
1066                  * Possible choices:
1067                  * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1068                  * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1069                  * 3) key1 value[;,\n] <- we treat that as linear object
1070                  */
1071                 t = p;
1072                 *next_key = false;
1073                 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1074                         t ++;
1075                 }
1076                 /* Check first non-space character after a key */
1077                 if (*t != '{' && *t != '[') {
1078                         while (t < chunk->end) {
1079                                 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1080                                         break;
1081                                 }
1082                                 else if (*t == '{' || *t == '[') {
1083                                         *next_key = true;
1084                                         break;
1085                                 }
1086                                 t ++;
1087                         }
1088                 }
1089         }
1090
1091         /* Create a new object */
1092         nobj = ucl_object_new ();
1093         keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1094                         &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1095         if (keylen == -1) {
1096                 ucl_object_unref (nobj);
1097                 return false;
1098         }
1099         else if (keylen == 0) {
1100                 ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1101                 ucl_object_unref (nobj);
1102                 return false;
1103         }
1104
1105         container = parser->stack->obj->value.ov;
1106         nobj->key = key;
1107         nobj->keylen = keylen;
1108         tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj));
1109         if (tobj == NULL) {
1110                 container = ucl_hash_insert_object (container, nobj);
1111                 nobj->prev = nobj;
1112                 nobj->next = NULL;
1113                 parser->stack->obj->len ++;
1114         }
1115         else {
1116                 DL_APPEND (tobj, nobj);
1117         }
1118
1119         if (ucl_escape) {
1120                 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1121         }
1122         parser->stack->obj->value.ov = container;
1123
1124         parser->cur_obj = nobj;
1125
1126         return true;
1127 }
1128
1129 /**
1130  * Parse a cl string
1131  * @param parser
1132  * @param chunk
1133  * @return true if a key has been parsed
1134  */
1135 static bool
1136 ucl_parse_string_value (struct ucl_parser *parser,
1137                 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1138 {
1139         const unsigned char *p;
1140         enum {
1141                 UCL_BRACE_ROUND = 0,
1142                 UCL_BRACE_SQUARE,
1143                 UCL_BRACE_FIGURE
1144         };
1145         int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1146
1147         p = chunk->pos;
1148
1149         while (p < chunk->end) {
1150
1151                 /* Skip pairs of figure braces */
1152                 if (*p == '{') {
1153                         braces[UCL_BRACE_FIGURE][0] ++;
1154                 }
1155                 else if (*p == '}') {
1156                         braces[UCL_BRACE_FIGURE][1] ++;
1157                         if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1158                                 /* This is not a termination symbol, continue */
1159                                 ucl_chunk_skipc (chunk, p);
1160                                 continue;
1161                         }
1162                 }
1163                 /* Skip pairs of square braces */
1164                 else if (*p == '[') {
1165                         braces[UCL_BRACE_SQUARE][0] ++;
1166                 }
1167                 else if (*p == ']') {
1168                         braces[UCL_BRACE_SQUARE][1] ++;
1169                         if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1170                                 /* This is not a termination symbol, continue */
1171                                 ucl_chunk_skipc (chunk, p);
1172                                 continue;
1173                         }
1174                 }
1175                 else if (*p == '$') {
1176                         *var_expand = true;
1177                 }
1178                 else if (*p == '\\') {
1179                         *need_unescape = true;
1180                         ucl_chunk_skipc (chunk, p);
1181                         if (p < chunk->end) {
1182                                 ucl_chunk_skipc (chunk, p);
1183                         }
1184                         continue;
1185                 }
1186
1187                 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1188                         break;
1189                 }
1190                 ucl_chunk_skipc (chunk, p);
1191         }
1192
1193         if (p >= chunk->end) {
1194                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err);
1195                 return false;
1196         }
1197
1198         return true;
1199 }
1200
1201 /**
1202  * Parse multiline string ending with \n{term}\n
1203  * @param parser
1204  * @param chunk
1205  * @param term
1206  * @param term_len
1207  * @return size of multiline string or 0 in case of error
1208  */
1209 static int
1210 ucl_parse_multiline_string (struct ucl_parser *parser,
1211                 struct ucl_chunk *chunk, const unsigned char *term,
1212                 int term_len, unsigned char const **beg,
1213                 bool *var_expand)
1214 {
1215         const unsigned char *p, *c;
1216         bool newline = false;
1217         int len = 0;
1218
1219         p = chunk->pos;
1220
1221         c = p;
1222
1223         while (p < chunk->end) {
1224                 if (newline) {
1225                         if (chunk->end - p < term_len) {
1226                                 return 0;
1227                         }
1228                         else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) {
1229                                 len = p - c;
1230                                 chunk->remain -= term_len;
1231                                 chunk->pos = p + term_len;
1232                                 chunk->column = term_len;
1233                                 *beg = c;
1234                                 break;
1235                         }
1236                 }
1237                 if (*p == '\n') {
1238                         newline = true;
1239                 }
1240                 else {
1241                         if (*p == '$') {
1242                                 *var_expand = true;
1243                         }
1244                         newline = false;
1245                 }
1246                 ucl_chunk_skipc (chunk, p);
1247         }
1248
1249         return len;
1250 }
1251
1252 static ucl_object_t*
1253 ucl_get_value_object (struct ucl_parser *parser)
1254 {
1255         ucl_object_t *t, *obj = NULL;
1256
1257         if (parser->stack->obj->type == UCL_ARRAY) {
1258                 /* Object must be allocated */
1259                 obj = ucl_object_new ();
1260                 t = parser->stack->obj->value.av;
1261                 DL_APPEND (t, obj);
1262                 parser->cur_obj = obj;
1263                 parser->stack->obj->value.av = t;
1264                 parser->stack->obj->len ++;
1265         }
1266         else {
1267                 /* Object has been already allocated */
1268                 obj = parser->cur_obj;
1269         }
1270
1271         return obj;
1272 }
1273
1274 /**
1275  * Handle value data
1276  * @param parser
1277  * @param chunk
1278  * @return
1279  */
1280 static bool
1281 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1282 {
1283         const unsigned char *p, *c;
1284         ucl_object_t *obj = NULL;
1285         unsigned int stripped_spaces;
1286         int str_len;
1287         bool need_unescape = false, ucl_escape = false, var_expand = false;
1288
1289         p = chunk->pos;
1290
1291         /* Skip any spaces and comments */
1292         if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1293                         (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1294                 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1295                         ucl_chunk_skipc (chunk, p);
1296                 }
1297                 if (!ucl_skip_comments (parser)) {
1298                         return false;
1299                 }
1300                 p = chunk->pos;
1301         }
1302
1303         while (p < chunk->end) {
1304                 c = p;
1305                 switch (*p) {
1306                 case '"':
1307                         obj = ucl_get_value_object (parser);
1308                         ucl_chunk_skipc (chunk, p);
1309                         if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1310                                 return false;
1311                         }
1312                         str_len = chunk->pos - c - 2;
1313                         obj->type = UCL_STRING;
1314                         if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1315                                         &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1316                                 return false;
1317                         }
1318                         obj->len = str_len;
1319                         parser->state = UCL_STATE_AFTER_VALUE;
1320                         p = chunk->pos;
1321                         return true;
1322                         break;
1323                 case '{':
1324                         obj = ucl_get_value_object (parser);
1325                         /* We have a new object */
1326                         obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1327                         if (obj == NULL) {
1328                                 return false;
1329                         }
1330
1331                         ucl_chunk_skipc (chunk, p);
1332                         return true;
1333                         break;
1334                 case '[':
1335                         obj = ucl_get_value_object (parser);
1336                         /* We have a new array */
1337                         obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1338                         if (obj == NULL) {
1339                                 return false;
1340                         }
1341
1342                         ucl_chunk_skipc (chunk, p);
1343                         return true;
1344                         break;
1345                 case ']':
1346                         /* We have the array ending */
1347                         if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1348                                 parser->state = UCL_STATE_AFTER_VALUE;
1349                                 return true;
1350                         }
1351                         else {
1352                                 goto parse_string;
1353                         }
1354                         break;
1355                 case '<':
1356                         obj = ucl_get_value_object (parser);
1357                         /* We have something like multiline value, which must be <<[A-Z]+\n */
1358                         if (chunk->end - p > 3) {
1359                                 if (memcmp (p, "<<", 2) == 0) {
1360                                         p += 2;
1361                                         /* We allow only uppercase characters in multiline definitions */
1362                                         while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1363                                                 p ++;
1364                                         }
1365                                         if (*p =='\n') {
1366                                                 /* Set chunk positions and start multiline parsing */
1367                                                 c += 2;
1368                                                 chunk->remain -= p - c;
1369                                                 chunk->pos = p + 1;
1370                                                 chunk->column = 0;
1371                                                 chunk->line ++;
1372                                                 if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1373                                                                 p - c, &c, &var_expand)) == 0) {
1374                                                         ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err);
1375                                                         return false;
1376                                                 }
1377                                                 obj->type = UCL_STRING;
1378                                                 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1379                                                         &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1380                                                         return false;
1381                                                 }
1382                                                 obj->len = str_len;
1383                                                 parser->state = UCL_STATE_AFTER_VALUE;
1384                                                 return true;
1385                                         }
1386                                 }
1387                         }
1388                         /* Fallback to ordinary strings */
1389                 default:
1390 parse_string:
1391                         if (obj == NULL) {
1392                                 obj = ucl_get_value_object (parser);
1393                         }
1394                         /* Parse atom */
1395                         if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1396                                 if (!ucl_lex_number (parser, chunk, obj)) {
1397                                         if (parser->state == UCL_STATE_ERROR) {
1398                                                 return false;
1399                                         }
1400                                 }
1401                                 else {
1402                                         parser->state = UCL_STATE_AFTER_VALUE;
1403                                         return true;
1404                                 }
1405                                 /* Fallback to normal string */
1406                         }
1407
1408                         if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1409                                 return false;
1410                         }
1411                         /* Cut trailing spaces */
1412                         stripped_spaces = 0;
1413                         while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1414                                         UCL_CHARACTER_WHITESPACE)) {
1415                                 stripped_spaces ++;
1416                         }
1417                         str_len = chunk->pos - c - stripped_spaces;
1418                         if (str_len <= 0) {
1419                                 ucl_set_err (chunk, 0, "string value must not be empty", &parser->err);
1420                                 return false;
1421                         }
1422                         else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1423                                 obj->len = 0;
1424                                 obj->type = UCL_NULL;
1425                         }
1426                         else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1427                                 obj->type = UCL_STRING;
1428                                 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1429                                                 &obj->value.sv, str_len, need_unescape,
1430                                                 false, var_expand)) == -1) {
1431                                         return false;
1432                                 }
1433                                 obj->len = str_len;
1434                         }
1435                         parser->state = UCL_STATE_AFTER_VALUE;
1436                         p = chunk->pos;
1437
1438                         return true;
1439                         break;
1440                 }
1441         }
1442
1443         return true;
1444 }
1445
1446 /**
1447  * Handle after value data
1448  * @param parser
1449  * @param chunk
1450  * @return
1451  */
1452 static bool
1453 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1454 {
1455         const unsigned char *p;
1456         bool got_sep = false;
1457         struct ucl_stack *st;
1458
1459         p = chunk->pos;
1460
1461         while (p < chunk->end) {
1462                 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1463                         /* Skip whitespaces */
1464                         ucl_chunk_skipc (chunk, p);
1465                 }
1466                 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1467                         /* Skip comment */
1468                         if (!ucl_skip_comments (parser)) {
1469                                 return false;
1470                         }
1471                         /* Treat comment as a separator */
1472                         got_sep = true;
1473                         p = chunk->pos;
1474                 }
1475                 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1476                         if (*p == '}' || *p == ']') {
1477                                 if (parser->stack == NULL) {
1478                                         ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err);
1479                                         return false;
1480                                 }
1481                                 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1482                                                 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1483
1484                                         /* Pop all nested objects from a stack */
1485                                         st = parser->stack;
1486                                         parser->stack = st->next;
1487                                         UCL_FREE (sizeof (struct ucl_stack), st);
1488
1489                                         while (parser->stack != NULL) {
1490                                                 st = parser->stack;
1491                                                 if (st->next == NULL || st->next->level == st->level) {
1492                                                         break;
1493                                                 }
1494                                                 parser->stack = st->next;
1495                                                 UCL_FREE (sizeof (struct ucl_stack), st);
1496                                         }
1497                                 }
1498                                 else {
1499                                         ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err);
1500                                         return false;
1501                                 }
1502
1503                                 if (parser->stack == NULL) {
1504                                         /* Ignore everything after a top object */
1505                                         return true;
1506                                 }
1507                                 else {
1508                                         ucl_chunk_skipc (chunk, p);
1509                                 }
1510                                 got_sep = true;
1511                         }
1512                         else {
1513                                 /* Got a separator */
1514                                 got_sep = true;
1515                                 ucl_chunk_skipc (chunk, p);
1516                         }
1517                 }
1518                 else {
1519                         /* Anything else */
1520                         if (!got_sep) {
1521                                 ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err);
1522                                 return false;
1523                         }
1524                         return true;
1525                 }
1526         }
1527
1528         return true;
1529 }
1530
1531 /**
1532  * Handle macro data
1533  * @param parser
1534  * @param chunk
1535  * @return
1536  */
1537 static bool
1538 ucl_parse_macro_value (struct ucl_parser *parser,
1539                 struct ucl_chunk *chunk, struct ucl_macro *macro,
1540                 unsigned char const **macro_start, size_t *macro_len)
1541 {
1542         const unsigned char *p, *c;
1543         bool need_unescape = false, ucl_escape = false, var_expand = false;
1544
1545         p = chunk->pos;
1546
1547         switch (*p) {
1548         case '"':
1549                 /* We have macro value encoded in quotes */
1550                 c = p;
1551                 ucl_chunk_skipc (chunk, p);
1552                 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1553                         return false;
1554                 }
1555
1556                 *macro_start = c + 1;
1557                 *macro_len = chunk->pos - c - 2;
1558                 p = chunk->pos;
1559                 break;
1560         case '{':
1561                 /* We got a multiline macro body */
1562                 ucl_chunk_skipc (chunk, p);
1563                 /* Skip spaces at the beginning */
1564                 while (p < chunk->end) {
1565                         if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1566                                 ucl_chunk_skipc (chunk, p);
1567                         }
1568                         else {
1569                                 break;
1570                         }
1571                 }
1572                 c = p;
1573                 while (p < chunk->end) {
1574                         if (*p == '}') {
1575                                 break;
1576                         }
1577                         ucl_chunk_skipc (chunk, p);
1578                 }
1579                 *macro_start = c;
1580                 *macro_len = p - c;
1581                 ucl_chunk_skipc (chunk, p);
1582                 break;
1583         default:
1584                 /* Macro is not enclosed in quotes or braces */
1585                 c = p;
1586                 while (p < chunk->end) {
1587                         if (ucl_lex_is_atom_end (*p)) {
1588                                 break;
1589                         }
1590                         ucl_chunk_skipc (chunk, p);
1591                 }
1592                 *macro_start = c;
1593                 *macro_len = p - c;
1594                 break;
1595         }
1596
1597         /* We are at the end of a macro */
1598         /* Skip ';' and space characters and return to previous state */
1599         while (p < chunk->end) {
1600                 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1601                         break;
1602                 }
1603                 ucl_chunk_skipc (chunk, p);
1604         }
1605         return true;
1606 }
1607
1608 /**
1609  * Handle the main states of rcl parser
1610  * @param parser parser structure
1611  * @param data the pointer to the beginning of a chunk
1612  * @param len the length of a chunk
1613  * @return true if chunk has been parsed and false in case of error
1614  */
1615 static bool
1616 ucl_state_machine (struct ucl_parser *parser)
1617 {
1618         ucl_object_t *obj;
1619         struct ucl_chunk *chunk = parser->chunks;
1620         const unsigned char *p, *c = NULL, *macro_start = NULL;
1621         unsigned char *macro_escaped;
1622         size_t macro_len = 0;
1623         struct ucl_macro *macro = NULL;
1624         bool next_key = false, end_of_object = false;
1625
1626         if (parser->top_obj == NULL) {
1627                 if (*chunk->pos == '[') {
1628                         obj = ucl_add_parser_stack (NULL, parser, true, 0);
1629                 }
1630                 else {
1631                         obj = ucl_add_parser_stack (NULL, parser, false, 0);
1632                 }
1633                 if (obj == NULL) {
1634                         return false;
1635                 }
1636                 parser->top_obj = obj;
1637                 parser->cur_obj = obj;
1638                 parser->state = UCL_STATE_INIT;
1639         }
1640
1641         p = chunk->pos;
1642         while (chunk->pos < chunk->end) {
1643                 switch (parser->state) {
1644                 case UCL_STATE_INIT:
1645                         /*
1646                          * At the init state we can either go to the parse array or object
1647                          * if we got [ or { correspondingly or can just treat new data as
1648                          * a key of newly created object
1649                          */
1650                         obj = parser->cur_obj;
1651                         if (!ucl_skip_comments (parser)) {
1652                                 parser->prev_state = parser->state;
1653                                 parser->state = UCL_STATE_ERROR;
1654                                 return false;
1655                         }
1656                         else {
1657                                 p = chunk->pos;
1658                                 if (*p == '[') {
1659                                         parser->state = UCL_STATE_VALUE;
1660                                         ucl_chunk_skipc (chunk, p);
1661                                 }
1662                                 else {
1663                                         parser->state = UCL_STATE_KEY;
1664                                         if (*p == '{') {
1665                                                 ucl_chunk_skipc (chunk, p);
1666                                         }
1667                                 }
1668                         }
1669                         break;
1670                 case UCL_STATE_KEY:
1671                         /* Skip any spaces */
1672                         while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1673                                 ucl_chunk_skipc (chunk, p);
1674                         }
1675                         if (*p == '}') {
1676                                 /* We have the end of an object */
1677                                 parser->state = UCL_STATE_AFTER_VALUE;
1678                                 continue;
1679                         }
1680                         if (parser->stack == NULL) {
1681                                 /* No objects are on stack, but we want to parse a key */
1682                                 ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser "
1683                                                 "expects a key", &parser->err);
1684                                 parser->prev_state = parser->state;
1685                                 parser->state = UCL_STATE_ERROR;
1686                                 return false;
1687                         }
1688                         if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1689                                 parser->prev_state = parser->state;
1690                                 parser->state = UCL_STATE_ERROR;
1691                                 return false;
1692                         }
1693                         if (end_of_object) {
1694                                 p = chunk->pos;
1695                                 parser->state = UCL_STATE_AFTER_VALUE;
1696                                 continue;
1697                         }
1698                         else if (parser->state != UCL_STATE_MACRO_NAME) {
1699                                 if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1700                                         /* Parse more keys and nest objects accordingly */
1701                                         obj = ucl_add_parser_stack (parser->cur_obj, parser, false,
1702                                                         parser->stack->level + 1);
1703                                         if (obj == NULL) {
1704                                                 return false;
1705                                         }
1706                                 }
1707                                 else {
1708                                         parser->state = UCL_STATE_VALUE;
1709                                 }
1710                         }
1711                         else {
1712                                 c = chunk->pos;
1713                         }
1714                         p = chunk->pos;
1715                         break;
1716                 case UCL_STATE_VALUE:
1717                         /* We need to check what we do have */
1718                         if (!ucl_parse_value (parser, chunk)) {
1719                                 parser->prev_state = parser->state;
1720                                 parser->state = UCL_STATE_ERROR;
1721                                 return false;
1722                         }
1723                         /* State is set in ucl_parse_value call */
1724                         p = chunk->pos;
1725                         break;
1726                 case UCL_STATE_AFTER_VALUE:
1727                         if (!ucl_parse_after_value (parser, chunk)) {
1728                                 parser->prev_state = parser->state;
1729                                 parser->state = UCL_STATE_ERROR;
1730                                 return false;
1731                         }
1732                         if (parser->stack != NULL) {
1733                                 if (parser->stack->obj->type == UCL_OBJECT) {
1734                                         parser->state = UCL_STATE_KEY;
1735                                 }
1736                                 else {
1737                                         /* Array */
1738                                         parser->state = UCL_STATE_VALUE;
1739                                 }
1740                         }
1741                         else {
1742                                 /* Skip everything at the end */
1743                                 return true;
1744                         }
1745                         p = chunk->pos;
1746                         break;
1747                 case UCL_STATE_MACRO_NAME:
1748                         if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1749                                 ucl_chunk_skipc (chunk, p);
1750                         }
1751                         else if (p - c > 0) {
1752                                 /* We got macro name */
1753                                 macro_len = (size_t)(p - c);
1754                                 HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1755                                 if (macro == NULL) {
1756                                         ucl_create_err (&parser->err, "error on line %d at column %d: "
1757                                                         "unknown macro: '%.*s', character: '%c'",
1758                                                                 chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1759                                         parser->state = UCL_STATE_ERROR;
1760                                         return false;
1761                                 }
1762                                 /* Now we need to skip all spaces */
1763                                 while (p < chunk->end) {
1764                                         if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1765                                                 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1766                                                         /* Skip comment */
1767                                                         if (!ucl_skip_comments (parser)) {
1768                                                                 return false;
1769                                                         }
1770                                                         p = chunk->pos;
1771                                                 }
1772                                                 break;
1773                                         }
1774                                         ucl_chunk_skipc (chunk, p);
1775                                 }
1776                                 parser->state = UCL_STATE_MACRO;
1777                         }
1778                         break;
1779                 case UCL_STATE_MACRO:
1780                         if (!ucl_parse_macro_value (parser, chunk, macro,
1781                                         &macro_start, &macro_len)) {
1782                                 parser->prev_state = parser->state;
1783                                 parser->state = UCL_STATE_ERROR;
1784                                 return false;
1785                         }
1786                         macro_len = ucl_expand_variable (parser, &macro_escaped, macro_start, macro_len);
1787                         parser->state = parser->prev_state;
1788                         if (macro_escaped == NULL) {
1789                                 if (!macro->handler (macro_start, macro_len, macro->ud)) {
1790                                         return false;
1791                                 }
1792                         }
1793                         else {
1794                                 if (!macro->handler (macro_escaped, macro_len, macro->ud)) {
1795                                         UCL_FREE (macro_len + 1, macro_escaped);
1796                                         return false;
1797                                 }
1798                                 UCL_FREE (macro_len + 1, macro_escaped);
1799                         }
1800                         p = chunk->pos;
1801                         break;
1802                 default:
1803                         /* TODO: add all states */
1804                         ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err);
1805                         parser->state = UCL_STATE_ERROR;
1806                         return false;
1807                 }
1808         }
1809
1810         return true;
1811 }
1812
1813 struct ucl_parser*
1814 ucl_parser_new (int flags)
1815 {
1816         struct ucl_parser *new;
1817
1818         new = UCL_ALLOC (sizeof (struct ucl_parser));
1819         if (new == NULL) {
1820                 return NULL;
1821         }
1822         memset (new, 0, sizeof (struct ucl_parser));
1823
1824         ucl_parser_register_macro (new, "include", ucl_include_handler, new);
1825         ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
1826         ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
1827
1828         new->flags = flags;
1829
1830         /* Initial assumption about filevars */
1831         ucl_parser_set_filevars (new, NULL, false);
1832
1833         return new;
1834 }
1835
1836
1837 void
1838 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
1839                 ucl_macro_handler handler, void* ud)
1840 {
1841         struct ucl_macro *new;
1842
1843         if (macro == NULL || handler == NULL) {
1844                 return;
1845         }
1846         new = UCL_ALLOC (sizeof (struct ucl_macro));
1847         if (new == NULL) {
1848                 return;
1849         }
1850         memset (new, 0, sizeof (struct ucl_macro));
1851         new->handler = handler;
1852         new->name = strdup (macro);
1853         new->ud = ud;
1854         HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
1855 }
1856
1857 void
1858 ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
1859                 const char *value)
1860 {
1861         struct ucl_variable *new = NULL, *cur;
1862
1863         if (var == NULL) {
1864                 return;
1865         }
1866
1867         /* Find whether a variable already exists */
1868         LL_FOREACH (parser->variables, cur) {
1869                 if (strcmp (cur->var, var) == 0) {
1870                         new = cur;
1871                         break;
1872                 }
1873         }
1874
1875         if (value == NULL) {
1876
1877                 if (new != NULL) {
1878                         /* Remove variable */
1879                         LL_DELETE (parser->variables, new);
1880                         free (new->var);
1881                         free (new->value);
1882                         UCL_FREE (sizeof (struct ucl_variable), new);
1883                 }
1884                 else {
1885                         /* Do nothing */
1886                         return;
1887                 }
1888         }
1889         else {
1890                 if (new == NULL) {
1891                         new = UCL_ALLOC (sizeof (struct ucl_variable));
1892                         if (new == NULL) {
1893                                 return;
1894                         }
1895                         memset (new, 0, sizeof (struct ucl_variable));
1896                         new->var = strdup (var);
1897                         new->var_len = strlen (var);
1898                         new->value = strdup (value);
1899                         new->value_len = strlen (value);
1900
1901                         LL_PREPEND (parser->variables, new);
1902                 }
1903                 else {
1904                         free (new->value);
1905                         new->value = strdup (value);
1906                         new->value_len = strlen (value);
1907                 }
1908         }
1909 }
1910
1911 void
1912 ucl_parser_set_variables_handler (struct ucl_parser *parser,
1913                 ucl_variable_handler handler, void *ud)
1914 {
1915         parser->var_handler = handler;
1916         parser->var_data = ud;
1917 }
1918
1919 bool
1920 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
1921                 size_t len)
1922 {
1923         struct ucl_chunk *chunk;
1924
1925         if (data == NULL || len == 0) {
1926                 ucl_create_err (&parser->err, "invalid chunk added");
1927                 return false;
1928         }
1929         if (parser->state != UCL_STATE_ERROR) {
1930                 chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
1931                 if (chunk == NULL) {
1932                         ucl_create_err (&parser->err, "cannot allocate chunk structure");
1933                         return false;
1934                 }
1935                 chunk->begin = data;
1936                 chunk->remain = len;
1937                 chunk->pos = chunk->begin;
1938                 chunk->end = chunk->begin + len;
1939                 chunk->line = 1;
1940                 chunk->column = 0;
1941                 LL_PREPEND (parser->chunks, chunk);
1942                 parser->recursion ++;
1943                 if (parser->recursion > UCL_MAX_RECURSION) {
1944                         ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
1945                                         parser->recursion);
1946                         return false;
1947                 }
1948                 return ucl_state_machine (parser);
1949         }
1950
1951         ucl_create_err (&parser->err, "a parser is in an invalid state");
1952
1953         return false;
1954 }
1955
1956 bool
1957 ucl_parser_add_string (struct ucl_parser *parser, const char *data,
1958                 size_t len)
1959 {
1960         if (data == NULL) {
1961                 ucl_create_err (&parser->err, "invalid string added");
1962                 return false;
1963         }
1964         if (len == 0) {
1965                 len = strlen (data);
1966         }
1967
1968         return ucl_parser_add_chunk (parser, (const unsigned char *)data, len);
1969 }