1 /* Copyright (c) 2013, Vsevolod Stakhov
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 #include "ucl_internal.h"
26 #include "ucl_chartable.h"
30 * The implementation of rcl parser
33 struct ucl_parser_saved_state {
37 const unsigned char *pos;
41 * Move up to len characters
45 * @return new position in chunk
47 #define ucl_chunk_skipc(chunk, p) do{ \
50 (chunk)->column = 0; \
52 else (chunk)->column ++; \
64 ucl_chunk_save_state (struct ucl_chunk *chunk, struct ucl_parser_saved_state *s)
66 s->column = chunk->column;
68 s->line = chunk->line;
69 s->remain = chunk->remain;
73 * Restore parser state
78 ucl_chunk_restore_state (struct ucl_chunk *chunk, struct ucl_parser_saved_state *s)
80 chunk->column = s->column;
82 chunk->line = s->line;
83 chunk->remain = s->remain;
87 ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err)
89 if (chunk->pos < chunk->end) {
90 if (isgraph (*chunk->pos)) {
91 ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'",
92 chunk->line, chunk->column, str, *chunk->pos);
95 ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'",
96 chunk->line, chunk->column, str, (int)*chunk->pos);
100 ucl_create_err (err, "error at the end of chunk: %s", str);
105 * Skip all comments from the current pos resolving nested and multiline comments
110 ucl_skip_comments (struct ucl_parser *parser)
112 struct ucl_chunk *chunk = parser->chunks;
113 const unsigned char *p;
114 int comments_nested = 0;
120 if (parser->state != UCL_STATE_SCOMMENT &&
121 parser->state != UCL_STATE_MCOMMENT) {
122 while (p < chunk->end) {
124 ucl_chunk_skipc (chunk, p);
127 ucl_chunk_skipc (chunk, p);
131 else if (*p == '/' && chunk->remain >= 2) {
133 ucl_chunk_skipc (chunk, p);
135 ucl_chunk_skipc (chunk, p);
137 while (p < chunk->end) {
139 ucl_chunk_skipc (chunk, p);
142 if (comments_nested == 0) {
143 ucl_chunk_skipc (chunk, p);
147 ucl_chunk_skipc (chunk, p);
149 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
151 ucl_chunk_skipc (chunk, p);
152 ucl_chunk_skipc (chunk, p);
155 ucl_chunk_skipc (chunk, p);
157 if (comments_nested != 0) {
158 ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err);
168 * Return multiplier for a character
169 * @param c multiplier character
170 * @param is_bytes if true use 1024 multiplier
173 static inline unsigned long
174 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
180 {'m', 1000 * 1000, 1024 * 1024},
182 {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
186 for (i = 0; i < 3; i ++) {
187 if (tolower (c) == multipliers[i].c) {
189 return multipliers[i].mult_bytes;
191 return multipliers[i].mult_normal;
200 * Return multiplier for time scaling
205 ucl_lex_time_multiplier (const unsigned char c) {
213 {'w', 60 * 60 * 24 * 7},
214 {'y', 60 * 60 * 24 * 7 * 365}
218 for (i = 0; i < 5; i ++) {
219 if (tolower (c) == multipliers[i].c) {
220 return multipliers[i].mult;
228 * Return true if a character is a end of an atom
233 ucl_lex_is_atom_end (const unsigned char c)
235 return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
239 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
246 else if (c1 == '#') {
253 * Check variable found
262 static inline const char *
263 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
264 size_t *out_len, bool strict, bool *found)
266 struct ucl_variable *var;
268 LL_FOREACH (parser->variables, var) {
270 if (remain == var->var_len) {
271 if (memcmp (ptr, var->var, var->var_len) == 0) {
272 *out_len += var->value_len;
274 return (ptr + var->var_len);
279 if (remain >= var->var_len) {
280 if (memcmp (ptr, var->var, var->var_len) == 0) {
281 *out_len += var->value_len;
283 return (ptr + var->var_len);
293 * Check for a variable in a given string
302 ucl_check_variable (struct ucl_parser *parser, const char *ptr, size_t remain, size_t *out_len, bool *vars_found)
304 const char *p, *end, *ret = ptr;
308 /* We need to match the variable enclosed in braces */
313 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, out_len, true, &found);
315 /* {} must be excluded actually */
329 else if (*ptr != '$') {
330 /* Not count escaped dollar sign */
331 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
332 if (found && !*vars_found) {
348 * Expand a single variable
356 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
357 size_t remain, unsigned char **dest)
359 unsigned char *d = *dest;
360 const char *p = ptr + 1, *ret;
361 struct ucl_variable *var;
372 else if (*p == '{') {
378 LL_FOREACH (parser->variables, var) {
379 if (remain >= var->var_len) {
380 if (memcmp (p, var->var, var->var_len) == 0) {
381 memcpy (d, var->value, var->value_len);
400 * Expand variables in string
408 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
409 const char *src, size_t in_len)
411 const char *p, *end = src + in_len;
414 bool vars_found = false;
419 p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
433 *dst = UCL_ALLOC (out_len + 1);
442 p = ucl_expand_single_variable (parser, p, end - p, &d);
455 * Store or copy pointer to the trash stack
456 * @param parser parser object
457 * @param src src string
458 * @param dst destination buffer (trash stack pointer)
459 * @param dst_const const destination pointer (e.g. value of object)
460 * @param in_len input length
461 * @param need_unescape need to unescape source (and copy it)
462 * @param need_lowercase need to lowercase value (and copy)
463 * @param need_expand need to expand variables (and copy as well)
464 * @return output length (excluding \0 symbol)
466 static inline ssize_t
467 ucl_copy_or_store_ptr (struct ucl_parser *parser,
468 const unsigned char *src, unsigned char **dst,
469 const char **dst_const, size_t in_len,
470 bool need_unescape, bool need_lowercase, bool need_expand)
472 ssize_t ret = -1, tret;
475 if (need_unescape || need_lowercase ||
476 (need_expand && parser->variables != NULL) ||
477 !(parser->flags & UCL_PARSER_ZEROCOPY)) {
479 *dst = UCL_ALLOC (in_len + 1);
481 ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err);
484 if (need_lowercase) {
485 ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
488 ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
492 ret = ucl_unescape_json_string (*dst, ret);
497 ret = ucl_expand_variable (parser, dst, tmp, ret);
499 /* Nothing to expand */
515 * Create and append an object at the specified level
521 static inline ucl_object_t *
522 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
524 struct ucl_stack *st;
528 obj = ucl_object_typed_new (UCL_OBJECT);
531 obj->type = UCL_OBJECT;
533 obj->value.ov = ucl_hash_create ();
534 parser->state = UCL_STATE_KEY;
538 obj = ucl_object_typed_new (UCL_ARRAY);
541 obj->type = UCL_ARRAY;
543 parser->state = UCL_STATE_VALUE;
546 st = UCL_ALLOC (sizeof (struct ucl_stack));
549 LL_PREPEND (parser->stack, st);
550 parser->cur_obj = obj;
556 ucl_maybe_parse_number (ucl_object_t *obj,
557 const char *start, const char *end, const char **pos, bool allow_double, bool number_bytes)
559 const char *p = start, *c = start;
561 bool got_dot = false, got_exp = false, need_double = false,
562 is_date = false, valid_start = false, is_hex = false,
573 if (is_hex && isxdigit (*p)) {
576 else if (isdigit (*p)) {
580 else if (!is_hex && (*p == 'x' || *p == 'X')) {
582 allow_double = false;
585 else if (allow_double) {
587 /* Empty digits sequence, not a number */
591 else if (*p == '.') {
593 /* Double dots, not a number */
603 else if (*p == 'e' || *p == 'E') {
605 /* Double exp, not a number */
617 if (!isdigit (*p) && *p != '+' && *p != '-') {
618 /* Wrong exponent sign */
628 /* Got the end of the number, need to check */
644 dv = strtod (c, &endptr);
648 lv = strtoimax (c, &endptr, 16);
651 lv = strtoimax (c, &endptr, 10);
654 if (errno == ERANGE) {
659 /* Now check endptr */
660 if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') {
665 if (endptr < end && endptr != start) {
675 if (p[1] == 's' || p[1] == 'S') {
682 if (p[0] == 'm' || p[0] == 'M') {
686 dv *= ucl_lex_num_multiplier (*p, false);
691 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
697 lv *= ucl_lex_num_multiplier (*p, true);
701 else if (ucl_lex_is_atom_end (p[1])) {
703 dv *= ucl_lex_num_multiplier (*p, false);
706 lv *= ucl_lex_num_multiplier (*p, number_bytes);
711 else if (end - p >= 3) {
712 if (tolower (p[0]) == 'm' &&
713 tolower (p[1]) == 'i' &&
714 tolower (p[2]) == 'n') {
729 dv *= ucl_lex_num_multiplier (*p, false);
732 lv *= ucl_lex_num_multiplier (*p, number_bytes);
740 if (p == end - 1 || ucl_lex_is_atom_end (p[1])) {
758 if (p == end - 1 || ucl_lex_is_atom_end (p[1])) {
764 dv *= ucl_lex_time_multiplier (*p);
776 if (allow_double && (need_double || is_date)) {
778 obj->type = UCL_FLOAT;
781 obj->type = UCL_TIME;
783 obj->value.dv = is_neg ? (-dv) : dv;
787 obj->value.iv = is_neg ? (-lv) : lv;
794 * Parse possible number
797 * @return true if a number has been parsed
800 ucl_lex_number (struct ucl_parser *parser,
801 struct ucl_chunk *chunk, ucl_object_t *obj)
803 const unsigned char *pos;
806 ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos, true, false);
809 chunk->remain -= pos - chunk->pos;
810 chunk->column += pos - chunk->pos;
814 else if (ret == ERANGE) {
815 ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err);
822 * Parse quoted string with possible escapes
825 * @return true if a string has been parsed
828 ucl_lex_json_string (struct ucl_parser *parser,
829 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
831 const unsigned char *p = chunk->pos;
835 while (p < chunk->end) {
838 /* Unmasked control character */
840 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err);
843 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err);
847 else if (c == '\\') {
848 ucl_chunk_skipc (chunk, p);
850 if (p >= chunk->end) {
851 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
854 else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
856 ucl_chunk_skipc (chunk, p);
857 for (i = 0; i < 4 && p < chunk->end; i ++) {
858 if (!isxdigit (*p)) {
859 ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err);
862 ucl_chunk_skipc (chunk, p);
864 if (p >= chunk->end) {
865 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
870 ucl_chunk_skipc (chunk, p);
873 *need_unescape = true;
878 ucl_chunk_skipc (chunk, p);
881 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
887 ucl_chunk_skipc (chunk, p);
890 ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err);
895 * Parse a key in an object
898 * @return true if a key has been parsed
901 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
903 const unsigned char *p, *c = NULL, *end, *t;
904 const char *key = NULL;
905 bool got_quote = false, got_eq = false, got_semicolon = false,
906 need_unescape = false, ucl_escape = false, var_expand = false,
907 got_content = false, got_sep = false;
908 ucl_object_t *nobj, *tobj;
909 ucl_hash_t *container;
915 /* It is macro actually */
916 ucl_chunk_skipc (chunk, p);
917 parser->prev_state = parser->state;
918 parser->state = UCL_STATE_MACRO_NAME;
921 while (p < chunk->end) {
923 * A key must start with alpha, number, '/' or '_' and end with space character
926 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
927 if (!ucl_skip_comments (parser)) {
932 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
933 ucl_chunk_skipc (chunk, p);
935 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
936 /* The first symbol */
938 ucl_chunk_skipc (chunk, p);
941 else if (*p == '"') {
946 ucl_chunk_skipc (chunk, p);
948 else if (*p == '}') {
949 /* We have actually end of an object */
950 *end_of_object = true;
953 else if (*p == '.') {
954 ucl_chunk_skipc (chunk, p);
955 parser->prev_state = parser->state;
956 parser->state = UCL_STATE_MACRO_NAME;
960 /* Invalid identifier */
961 ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err);
966 /* Parse the body of a key */
968 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
970 ucl_chunk_skipc (chunk, p);
972 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
977 ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err);
982 /* We need to parse json like quoted string */
983 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
986 /* Always escape keys obtained via json */
987 end = chunk->pos - 1;
994 if (p >= chunk->end && got_content) {
995 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
998 else if (!got_content) {
1001 *end_of_object = false;
1002 /* We are now at the end of the key, need to parse the rest */
1003 while (p < chunk->end) {
1004 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1005 ucl_chunk_skipc (chunk, p);
1007 else if (*p == '=') {
1008 if (!got_eq && !got_semicolon) {
1009 ucl_chunk_skipc (chunk, p);
1013 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err);
1017 else if (*p == ':') {
1018 if (!got_eq && !got_semicolon) {
1019 ucl_chunk_skipc (chunk, p);
1020 got_semicolon = true;
1023 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err);
1027 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1028 /* Check for comment */
1029 if (!ucl_skip_comments (parser)) {
1040 if (p >= chunk->end && got_content) {
1041 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1045 got_sep = got_semicolon || got_eq;
1049 * Maybe we have more keys nested, so search for termination character.
1051 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1052 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1053 * 3) key1 value[;,\n] <- we treat that as linear object
1057 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1060 /* Check first non-space character after a key */
1061 if (*t != '{' && *t != '[') {
1062 while (t < chunk->end) {
1063 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1066 else if (*t == '{' || *t == '[') {
1075 /* Create a new object */
1076 nobj = ucl_object_new ();
1077 keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1078 &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1080 ucl_object_free(nobj);
1083 else if (keylen == 0) {
1084 ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1085 ucl_object_free(nobj);
1089 container = parser->stack->obj->value.ov;
1091 nobj->keylen = keylen;
1092 tobj = ucl_hash_search_obj (container, nobj);
1094 container = ucl_hash_insert_object (container, nobj);
1097 parser->stack->obj->len ++;
1100 DL_APPEND (tobj, nobj);
1104 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1106 parser->stack->obj->value.ov = container;
1108 parser->cur_obj = nobj;
1117 * @return true if a key has been parsed
1120 ucl_parse_string_value (struct ucl_parser *parser,
1121 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1123 const unsigned char *p;
1125 UCL_BRACE_ROUND = 0,
1129 int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1133 while (p < chunk->end) {
1135 /* Skip pairs of figure braces */
1137 braces[UCL_BRACE_FIGURE][0] ++;
1139 else if (*p == '}') {
1140 braces[UCL_BRACE_FIGURE][1] ++;
1141 if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1142 /* This is not a termination symbol, continue */
1143 ucl_chunk_skipc (chunk, p);
1147 /* Skip pairs of square braces */
1148 else if (*p == '[') {
1149 braces[UCL_BRACE_SQUARE][0] ++;
1151 else if (*p == ']') {
1152 braces[UCL_BRACE_SQUARE][1] ++;
1153 if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1154 /* This is not a termination symbol, continue */
1155 ucl_chunk_skipc (chunk, p);
1159 else if (*p == '$') {
1162 else if (*p == '\\') {
1163 *need_unescape = true;
1164 ucl_chunk_skipc (chunk, p);
1165 if (p < chunk->end) {
1166 ucl_chunk_skipc (chunk, p);
1171 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1174 ucl_chunk_skipc (chunk, p);
1177 if (p >= chunk->end) {
1178 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err);
1186 * Parse multiline string ending with \n{term}\n
1191 * @return size of multiline string or 0 in case of error
1194 ucl_parse_multiline_string (struct ucl_parser *parser,
1195 struct ucl_chunk *chunk, const unsigned char *term,
1196 int term_len, unsigned char const **beg,
1199 const unsigned char *p, *c;
1200 bool newline = false;
1207 while (p < chunk->end) {
1209 if (chunk->end - p < term_len) {
1212 else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) {
1214 chunk->remain -= term_len;
1215 chunk->pos = p + term_len;
1216 chunk->column = term_len;
1230 ucl_chunk_skipc (chunk, p);
1236 static ucl_object_t*
1237 ucl_get_value_object (struct ucl_parser *parser)
1239 ucl_object_t *t, *obj = NULL;
1241 if (parser->stack->obj->type == UCL_ARRAY) {
1242 /* Object must be allocated */
1243 obj = ucl_object_new ();
1244 t = parser->stack->obj->value.av;
1246 parser->cur_obj = obj;
1247 parser->stack->obj->value.av = t;
1248 parser->stack->obj->len ++;
1251 /* Object has been already allocated */
1252 obj = parser->cur_obj;
1265 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1267 const unsigned char *p, *c;
1268 ucl_object_t *obj = NULL;
1269 unsigned int stripped_spaces;
1271 bool need_unescape = false, ucl_escape = false, var_expand = false;
1275 /* Skip any spaces and comments */
1276 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1277 (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1278 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1279 ucl_chunk_skipc (chunk, p);
1281 if (!ucl_skip_comments (parser)) {
1287 while (p < chunk->end) {
1291 obj = ucl_get_value_object (parser);
1292 ucl_chunk_skipc (chunk, p);
1293 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1296 str_len = chunk->pos - c - 2;
1297 obj->type = UCL_STRING;
1298 if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1299 &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1303 parser->state = UCL_STATE_AFTER_VALUE;
1308 obj = ucl_get_value_object (parser);
1309 /* We have a new object */
1310 obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1312 ucl_chunk_skipc (chunk, p);
1316 obj = ucl_get_value_object (parser);
1317 /* We have a new array */
1318 obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1320 ucl_chunk_skipc (chunk, p);
1324 /* We have the array ending */
1325 if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1326 parser->state = UCL_STATE_AFTER_VALUE;
1334 obj = ucl_get_value_object (parser);
1335 /* We have something like multiline value, which must be <<[A-Z]+\n */
1336 if (chunk->end - p > 3) {
1337 if (memcmp (p, "<<", 2) == 0) {
1339 /* We allow only uppercase characters in multiline definitions */
1340 while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1344 /* Set chunk positions and start multiline parsing */
1346 chunk->remain -= p - c;
1350 if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1351 p - c, &c, &var_expand)) == 0) {
1352 ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err);
1355 obj->type = UCL_STRING;
1356 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1357 &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1361 parser->state = UCL_STATE_AFTER_VALUE;
1366 /* Fallback to ordinary strings */
1370 obj = ucl_get_value_object (parser);
1373 if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1374 if (!ucl_lex_number (parser, chunk, obj)) {
1375 if (parser->state == UCL_STATE_ERROR) {
1380 parser->state = UCL_STATE_AFTER_VALUE;
1383 /* Fallback to normal string */
1386 if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1389 /* Cut trailing spaces */
1390 stripped_spaces = 0;
1391 while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1392 UCL_CHARACTER_WHITESPACE)) {
1395 str_len = chunk->pos - c - stripped_spaces;
1397 ucl_set_err (chunk, 0, "string value must not be empty", &parser->err);
1400 else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1402 obj->type = UCL_NULL;
1404 else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1405 obj->type = UCL_STRING;
1406 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1407 &obj->value.sv, str_len, need_unescape,
1408 false, var_expand)) == -1) {
1413 parser->state = UCL_STATE_AFTER_VALUE;
1425 * Handle after value data
1431 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1433 const unsigned char *p;
1434 bool got_sep = false;
1435 struct ucl_stack *st;
1439 while (p < chunk->end) {
1440 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1441 /* Skip whitespaces */
1442 ucl_chunk_skipc (chunk, p);
1444 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1446 if (!ucl_skip_comments (parser)) {
1449 /* Treat comment as a separator */
1453 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1454 if (*p == '}' || *p == ']') {
1455 if (parser->stack == NULL) {
1456 ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err);
1459 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1460 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1462 /* Pop all nested objects from a stack */
1464 parser->stack = st->next;
1465 UCL_FREE (sizeof (struct ucl_stack), st);
1467 while (parser->stack != NULL) {
1469 if (st->next == NULL || st->next->level == st->level) {
1472 parser->stack = st->next;
1473 UCL_FREE (sizeof (struct ucl_stack), st);
1477 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err);
1481 if (parser->stack == NULL) {
1482 /* Ignore everything after a top object */
1486 ucl_chunk_skipc (chunk, p);
1491 /* Got a separator */
1493 ucl_chunk_skipc (chunk, p);
1499 ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err);
1516 ucl_parse_macro_value (struct ucl_parser *parser,
1517 struct ucl_chunk *chunk, struct ucl_macro *macro,
1518 unsigned char const **macro_start, size_t *macro_len)
1520 const unsigned char *p, *c;
1521 bool need_unescape = false, ucl_escape = false, var_expand = false;
1527 /* We have macro value encoded in quotes */
1529 ucl_chunk_skipc (chunk, p);
1530 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1534 *macro_start = c + 1;
1535 *macro_len = chunk->pos - c - 2;
1539 /* We got a multiline macro body */
1540 ucl_chunk_skipc (chunk, p);
1541 /* Skip spaces at the beginning */
1542 while (p < chunk->end) {
1543 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1544 ucl_chunk_skipc (chunk, p);
1551 while (p < chunk->end) {
1555 ucl_chunk_skipc (chunk, p);
1559 ucl_chunk_skipc (chunk, p);
1562 /* Macro is not enclosed in quotes or braces */
1564 while (p < chunk->end) {
1565 if (ucl_lex_is_atom_end (*p)) {
1568 ucl_chunk_skipc (chunk, p);
1575 /* We are at the end of a macro */
1576 /* Skip ';' and space characters and return to previous state */
1577 while (p < chunk->end) {
1578 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1581 ucl_chunk_skipc (chunk, p);
1587 * Handle the main states of rcl parser
1588 * @param parser parser structure
1589 * @param data the pointer to the beginning of a chunk
1590 * @param len the length of a chunk
1591 * @return true if chunk has been parsed and false in case of error
1594 ucl_state_machine (struct ucl_parser *parser)
1597 struct ucl_chunk *chunk = parser->chunks;
1598 const unsigned char *p, *c = NULL, *macro_start = NULL;
1599 unsigned char *macro_escaped;
1600 size_t macro_len = 0;
1601 struct ucl_macro *macro = NULL;
1602 bool next_key = false, end_of_object = false;
1604 if (parser->top_obj == NULL) {
1605 if (*chunk->pos == '[') {
1606 obj = ucl_add_parser_stack (NULL, parser, true, 0);
1609 obj = ucl_add_parser_stack (NULL, parser, false, 0);
1611 parser->top_obj = obj;
1612 parser->cur_obj = obj;
1613 parser->state = UCL_STATE_INIT;
1617 while (chunk->pos < chunk->end) {
1618 switch (parser->state) {
1619 case UCL_STATE_INIT:
1621 * At the init state we can either go to the parse array or object
1622 * if we got [ or { correspondingly or can just treat new data as
1623 * a key of newly created object
1625 obj = parser->cur_obj;
1626 if (!ucl_skip_comments (parser)) {
1627 parser->prev_state = parser->state;
1628 parser->state = UCL_STATE_ERROR;
1634 parser->state = UCL_STATE_VALUE;
1635 ucl_chunk_skipc (chunk, p);
1638 parser->state = UCL_STATE_KEY;
1640 ucl_chunk_skipc (chunk, p);
1646 /* Skip any spaces */
1647 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1648 ucl_chunk_skipc (chunk, p);
1651 /* We have the end of an object */
1652 parser->state = UCL_STATE_AFTER_VALUE;
1655 if (parser->stack == NULL) {
1656 /* No objects are on stack, but we want to parse a key */
1657 ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser "
1658 "expects a key", &parser->err);
1659 parser->prev_state = parser->state;
1660 parser->state = UCL_STATE_ERROR;
1663 if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1664 parser->prev_state = parser->state;
1665 parser->state = UCL_STATE_ERROR;
1668 if (end_of_object) {
1670 parser->state = UCL_STATE_AFTER_VALUE;
1673 else if (parser->state != UCL_STATE_MACRO_NAME) {
1674 if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1675 /* Parse more keys and nest objects accordingly */
1676 obj = ucl_add_parser_stack (parser->cur_obj, parser, false, parser->stack->level + 1);
1679 parser->state = UCL_STATE_VALUE;
1687 case UCL_STATE_VALUE:
1688 /* We need to check what we do have */
1689 if (!ucl_parse_value (parser, chunk)) {
1690 parser->prev_state = parser->state;
1691 parser->state = UCL_STATE_ERROR;
1694 /* State is set in ucl_parse_value call */
1697 case UCL_STATE_AFTER_VALUE:
1698 if (!ucl_parse_after_value (parser, chunk)) {
1699 parser->prev_state = parser->state;
1700 parser->state = UCL_STATE_ERROR;
1703 if (parser->stack != NULL) {
1704 if (parser->stack->obj->type == UCL_OBJECT) {
1705 parser->state = UCL_STATE_KEY;
1709 parser->state = UCL_STATE_VALUE;
1713 /* Skip everything at the end */
1718 case UCL_STATE_MACRO_NAME:
1719 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1720 ucl_chunk_skipc (chunk, p);
1722 else if (p - c > 0) {
1723 /* We got macro name */
1724 macro_len = (size_t)(p - c);
1725 HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1726 if (macro == NULL) {
1727 ucl_create_err (&parser->err, "error on line %d at column %d: "
1728 "unknown macro: '%.*s', character: '%c'",
1729 chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1730 parser->state = UCL_STATE_ERROR;
1733 /* Now we need to skip all spaces */
1734 while (p < chunk->end) {
1735 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1736 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1738 if (!ucl_skip_comments (parser)) {
1745 ucl_chunk_skipc (chunk, p);
1747 parser->state = UCL_STATE_MACRO;
1750 case UCL_STATE_MACRO:
1751 if (!ucl_parse_macro_value (parser, chunk, macro,
1752 ¯o_start, ¯o_len)) {
1753 parser->prev_state = parser->state;
1754 parser->state = UCL_STATE_ERROR;
1757 macro_len = ucl_expand_variable (parser, ¯o_escaped, macro_start, macro_len);
1758 parser->state = parser->prev_state;
1759 if (macro_escaped == NULL) {
1760 if (!macro->handler (macro_start, macro_len, macro->ud)) {
1765 if (!macro->handler (macro_escaped, macro_len, macro->ud)) {
1766 UCL_FREE (macro_len + 1, macro_escaped);
1769 UCL_FREE (macro_len + 1, macro_escaped);
1774 /* TODO: add all states */
1775 ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err);
1776 parser->state = UCL_STATE_ERROR;
1785 ucl_parser_new (int flags)
1787 struct ucl_parser *new;
1789 new = UCL_ALLOC (sizeof (struct ucl_parser));
1790 memset (new, 0, sizeof (struct ucl_parser));
1792 ucl_parser_register_macro (new, "include", ucl_include_handler, new);
1793 ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
1794 ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
1798 /* Initial assumption about filevars */
1799 ucl_parser_set_filevars (new, NULL, false);
1806 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
1807 ucl_macro_handler handler, void* ud)
1809 struct ucl_macro *new;
1811 new = UCL_ALLOC (sizeof (struct ucl_macro));
1812 memset (new, 0, sizeof (struct ucl_macro));
1813 new->handler = handler;
1814 new->name = strdup (macro);
1816 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
1820 ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
1823 struct ucl_variable *new = NULL, *cur;
1829 /* Find whether a variable already exists */
1830 LL_FOREACH (parser->variables, cur) {
1831 if (strcmp (cur->var, var) == 0) {
1837 if (value == NULL) {
1840 /* Remove variable */
1841 LL_DELETE (parser->variables, new);
1844 UCL_FREE (sizeof (struct ucl_variable), new);
1853 new = UCL_ALLOC (sizeof (struct ucl_variable));
1854 memset (new, 0, sizeof (struct ucl_variable));
1855 new->var = strdup (var);
1856 new->var_len = strlen (var);
1857 new->value = strdup (value);
1858 new->value_len = strlen (value);
1860 LL_PREPEND (parser->variables, new);
1864 new->value = strdup (value);
1865 new->value_len = strlen (value);
1871 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
1874 struct ucl_chunk *chunk;
1876 if (parser->state != UCL_STATE_ERROR) {
1877 chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
1878 chunk->begin = data;
1879 chunk->remain = len;
1880 chunk->pos = chunk->begin;
1881 chunk->end = chunk->begin + len;
1884 LL_PREPEND (parser->chunks, chunk);
1885 parser->recursion ++;
1886 if (parser->recursion > UCL_MAX_RECURSION) {
1887 ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
1891 return ucl_state_machine (parser);
1894 ucl_create_err (&parser->err, "a parser is in an invalid state");