1 /* Copyright (c) 2013, Vsevolod Stakhov
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 #include "ucl_internal.h"
26 #include "ucl_chartable.h"
30 * The implementation of ucl parser
33 struct ucl_parser_saved_state {
37 const unsigned char *pos;
41 * Move up to len characters
45 * @return new position in chunk
47 #define ucl_chunk_skipc(chunk, p) do{ \
50 (chunk)->column = 0; \
52 else (chunk)->column ++; \
59 ucl_set_err (struct ucl_parser *parser, int code, const char *str, UT_string **err)
61 const char *fmt_string, *filename;
62 struct ucl_chunk *chunk = parser->chunks;
64 if (parser->cur_file) {
65 filename = parser->cur_file;
68 filename = "<unknown>";
70 if (chunk->pos < chunk->end) {
71 if (isgraph (*chunk->pos)) {
72 fmt_string = "error while parsing %s: "
73 "line: %d, column: %d - '%s', character: '%c'";
76 fmt_string = "error while parsing %s: "
77 "line: %d, column: %d - '%s', character: '0x%02x'";
79 ucl_create_err (err, fmt_string,
80 filename, chunk->line, chunk->column,
84 ucl_create_err (err, "error while parsing %s: at the end of chunk: %s",
90 * Skip all comments from the current pos resolving nested and multiline comments
95 ucl_skip_comments (struct ucl_parser *parser)
97 struct ucl_chunk *chunk = parser->chunks;
98 const unsigned char *p;
99 int comments_nested = 0;
105 if (chunk->remain > 0 && *p == '#') {
106 if (parser->state != UCL_STATE_SCOMMENT &&
107 parser->state != UCL_STATE_MCOMMENT) {
108 while (p < chunk->end) {
110 ucl_chunk_skipc (chunk, p);
113 ucl_chunk_skipc (chunk, p);
117 else if (chunk->remain >= 2 && *p == '/') {
119 ucl_chunk_skipc (chunk, p);
121 ucl_chunk_skipc (chunk, p);
123 while (p < chunk->end) {
124 if (*p == '"' && *(p - 1) != '\\') {
130 ucl_chunk_skipc (chunk, p);
133 if (comments_nested == 0) {
134 ucl_chunk_skipc (chunk, p);
138 ucl_chunk_skipc (chunk, p);
140 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
142 ucl_chunk_skipc (chunk, p);
143 ucl_chunk_skipc (chunk, p);
147 ucl_chunk_skipc (chunk, p);
149 if (comments_nested != 0) {
150 ucl_set_err (parser, UCL_ENESTED,
151 "unfinished multiline comment", &parser->err);
161 * Return multiplier for a character
162 * @param c multiplier character
163 * @param is_bytes if true use 1024 multiplier
166 static inline unsigned long
167 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
173 {'m', 1000 * 1000, 1024 * 1024},
175 {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
179 for (i = 0; i < 3; i ++) {
180 if (tolower (c) == multipliers[i].c) {
182 return multipliers[i].mult_bytes;
184 return multipliers[i].mult_normal;
193 * Return multiplier for time scaling
198 ucl_lex_time_multiplier (const unsigned char c) {
206 {'w', 60 * 60 * 24 * 7},
207 {'y', 60 * 60 * 24 * 7 * 365}
211 for (i = 0; i < 5; i ++) {
212 if (tolower (c) == multipliers[i].c) {
213 return multipliers[i].mult;
221 * Return true if a character is a end of an atom
226 ucl_lex_is_atom_end (const unsigned char c)
228 return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
232 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
239 else if (c1 == '#') {
246 * Check variable found
255 static inline const char *
256 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
257 size_t *out_len, bool strict, bool *found)
259 struct ucl_variable *var;
262 bool need_free = false;
264 LL_FOREACH (parser->variables, var) {
266 if (remain == var->var_len) {
267 if (memcmp (ptr, var->var, var->var_len) == 0) {
268 *out_len += var->value_len;
270 return (ptr + var->var_len);
275 if (remain >= var->var_len) {
276 if (memcmp (ptr, var->var, var->var_len) == 0) {
277 *out_len += var->value_len;
279 return (ptr + var->var_len);
285 /* XXX: can only handle ${VAR} */
286 if (!(*found) && parser->var_handler != NULL && strict) {
287 /* Call generic handler */
288 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
294 return (ptr + remain);
302 * Check for a variable in a given string
311 ucl_check_variable (struct ucl_parser *parser, const char *ptr,
312 size_t remain, size_t *out_len, bool *vars_found)
314 const char *p, *end, *ret = ptr;
318 /* We need to match the variable enclosed in braces */
323 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1,
324 out_len, true, &found);
326 /* {} must be excluded actually */
340 else if (*ptr != '$') {
341 /* Not count escaped dollar sign */
342 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
343 if (found && !*vars_found) {
359 * Expand a single variable
367 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
368 size_t remain, unsigned char **dest)
370 unsigned char *d = *dest, *dst;
371 const char *p = ptr + 1, *ret;
372 struct ucl_variable *var;
374 bool need_free = false;
386 else if (*p == '{') {
393 LL_FOREACH (parser->variables, var) {
394 if (remain >= var->var_len) {
395 if (memcmp (p, var->var, var->var_len) == 0) {
396 memcpy (d, var->value, var->value_len);
405 if (strict && parser->var_handler != NULL) {
406 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
408 memcpy (d, dst, dstlen);
415 /* Leave variable as is */
435 * Expand variables in string
443 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
444 const char *src, size_t in_len)
446 const char *p, *end = src + in_len;
449 bool vars_found = false;
454 p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
468 *dst = UCL_ALLOC (out_len + 1);
477 p = ucl_expand_single_variable (parser, p, end - p, &d);
490 * Store or copy pointer to the trash stack
491 * @param parser parser object
492 * @param src src string
493 * @param dst destination buffer (trash stack pointer)
494 * @param dst_const const destination pointer (e.g. value of object)
495 * @param in_len input length
496 * @param need_unescape need to unescape source (and copy it)
497 * @param need_lowercase need to lowercase value (and copy)
498 * @param need_expand need to expand variables (and copy as well)
499 * @return output length (excluding \0 symbol)
501 static inline ssize_t
502 ucl_copy_or_store_ptr (struct ucl_parser *parser,
503 const unsigned char *src, unsigned char **dst,
504 const char **dst_const, size_t in_len,
505 bool need_unescape, bool need_lowercase, bool need_expand)
507 ssize_t ret = -1, tret;
510 if (need_unescape || need_lowercase ||
511 (need_expand && parser->variables != NULL) ||
512 !(parser->flags & UCL_PARSER_ZEROCOPY)) {
514 *dst = UCL_ALLOC (in_len + 1);
516 ucl_set_err (parser, 0, "cannot allocate memory for a string",
520 if (need_lowercase) {
521 ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
524 ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
528 ret = ucl_unescape_json_string (*dst, ret);
533 ret = ucl_expand_variable (parser, dst, tmp, ret);
535 /* Nothing to expand */
540 /* Free unexpanded value */
541 UCL_FREE (in_len + 1, tmp);
555 * Create and append an object at the specified level
561 static inline ucl_object_t *
562 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
564 struct ucl_stack *st;
568 obj = ucl_object_new_full (UCL_OBJECT, parser->chunks->priority);
571 obj->type = UCL_OBJECT;
573 obj->value.ov = ucl_hash_create (parser->flags & UCL_PARSER_KEY_LOWERCASE);
574 parser->state = UCL_STATE_KEY;
578 obj = ucl_object_new_full (UCL_ARRAY, parser->chunks->priority);
581 obj->type = UCL_ARRAY;
583 parser->state = UCL_STATE_VALUE;
586 st = UCL_ALLOC (sizeof (struct ucl_stack));
588 ucl_set_err (parser, 0, "cannot allocate memory for an object",
590 ucl_object_unref (obj);
595 LL_PREPEND (parser->stack, st);
596 parser->cur_obj = obj;
602 ucl_maybe_parse_number (ucl_object_t *obj,
603 const char *start, const char *end, const char **pos,
604 bool allow_double, bool number_bytes, bool allow_time)
606 const char *p = start, *c = start;
608 bool got_dot = false, got_exp = false, need_double = false,
609 is_time = false, valid_start = false, is_hex = false,
620 if (is_hex && isxdigit (*p)) {
623 else if (isdigit (*p)) {
627 else if (!is_hex && (*p == 'x' || *p == 'X')) {
629 allow_double = false;
632 else if (allow_double) {
634 /* Empty digits sequence, not a number */
638 else if (*p == '.') {
640 /* Double dots, not a number */
650 else if (*p == 'e' || *p == 'E') {
652 /* Double exp, not a number */
664 if (!isdigit (*p) && *p != '+' && *p != '-') {
665 /* Wrong exponent sign */
675 /* Got the end of the number, need to check */
691 dv = strtod (c, &endptr);
695 lv = strtoimax (c, &endptr, 16);
698 lv = strtoimax (c, &endptr, 10);
701 if (errno == ERANGE) {
706 /* Now check endptr */
707 if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') {
712 if (endptr < end && endptr != start) {
722 if (p[1] == 's' || p[1] == 'S') {
729 if (p[0] == 'm' || p[0] == 'M') {
733 dv *= ucl_lex_num_multiplier (*p, false);
738 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
744 lv *= ucl_lex_num_multiplier (*p, true);
748 else if (ucl_lex_is_atom_end (p[1])) {
750 dv *= ucl_lex_num_multiplier (*p, false);
753 lv *= ucl_lex_num_multiplier (*p, number_bytes);
758 else if (allow_time && end - p >= 3) {
759 if (tolower (p[0]) == 'm' &&
760 tolower (p[1]) == 'i' &&
761 tolower (p[2]) == 'n') {
776 dv *= ucl_lex_num_multiplier (*p, false);
779 lv *= ucl_lex_num_multiplier (*p, number_bytes);
788 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
807 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
813 dv *= ucl_lex_time_multiplier (*p);
820 while (p < end && ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) {
823 if (ucl_lex_is_atom_end(*p))
828 else if (endptr == end) {
829 /* Just a number at the end of chunk */
838 if (allow_double && (need_double || is_time)) {
840 obj->type = UCL_FLOAT;
843 obj->type = UCL_TIME;
845 obj->value.dv = is_neg ? (-dv) : dv;
849 obj->value.iv = is_neg ? (-lv) : lv;
856 * Parse possible number
859 * @return true if a number has been parsed
862 ucl_lex_number (struct ucl_parser *parser,
863 struct ucl_chunk *chunk, ucl_object_t *obj)
865 const unsigned char *pos;
868 ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
869 true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
872 chunk->remain -= pos - chunk->pos;
873 chunk->column += pos - chunk->pos;
877 else if (ret == ERANGE) {
878 ucl_set_err (parser, ERANGE, "numeric value out of range", &parser->err);
885 * Parse quoted string with possible escapes
888 * @return true if a string has been parsed
891 ucl_lex_json_string (struct ucl_parser *parser,
892 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
894 const unsigned char *p = chunk->pos;
898 while (p < chunk->end) {
901 /* Unmasked control character */
903 ucl_set_err (parser, UCL_ESYNTAX, "unexpected newline",
907 ucl_set_err (parser, UCL_ESYNTAX, "unexpected control character",
912 else if (c == '\\') {
913 ucl_chunk_skipc (chunk, p);
915 if (p >= chunk->end) {
916 ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character",
920 else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
922 ucl_chunk_skipc (chunk, p);
923 for (i = 0; i < 4 && p < chunk->end; i ++) {
924 if (!isxdigit (*p)) {
925 ucl_set_err (parser, UCL_ESYNTAX, "invalid utf escape",
929 ucl_chunk_skipc (chunk, p);
931 if (p >= chunk->end) {
932 ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character",
938 ucl_chunk_skipc (chunk, p);
941 *need_unescape = true;
946 ucl_chunk_skipc (chunk, p);
949 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
955 ucl_chunk_skipc (chunk, p);
958 ucl_set_err (parser, UCL_ESYNTAX, "no quote at the end of json string",
964 ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont,
970 if ((parser->flags & UCL_PARSER_NO_IMPLICIT_ARRAYS) == 0) {
972 top->flags |= UCL_OBJECT_MULTIVALUE;
973 DL_APPEND (top, elt);
976 if ((top->flags & UCL_OBJECT_MULTIVALUE) != 0) {
977 /* Just add to the explicit array */
978 ucl_array_append (top, elt);
981 /* Convert to an array */
982 ucl_hash_delete (cont, top);
983 nobj = ucl_object_typed_new (UCL_ARRAY);
984 nobj->key = top->key;
985 nobj->keylen = top->keylen;
986 nobj->flags |= UCL_OBJECT_MULTIVALUE;
987 ucl_array_append (nobj, top);
988 ucl_array_append (nobj, elt);
989 ucl_hash_insert (cont, nobj, nobj->key, nobj->keylen);
995 * Parse a key in an object
998 * @return true if a key has been parsed
1001 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
1003 const unsigned char *p, *c = NULL, *end, *t;
1004 const char *key = NULL;
1005 bool got_quote = false, got_eq = false, got_semicolon = false,
1006 need_unescape = false, ucl_escape = false, var_expand = false,
1007 got_content = false, got_sep = false;
1008 ucl_object_t *nobj, *tobj;
1009 ucl_hash_t *container;
1015 /* It is macro actually */
1016 ucl_chunk_skipc (chunk, p);
1017 parser->prev_state = parser->state;
1018 parser->state = UCL_STATE_MACRO_NAME;
1019 *end_of_object = false;
1022 while (p < chunk->end) {
1024 * A key must start with alpha, number, '/' or '_' and end with space character
1027 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1028 if (!ucl_skip_comments (parser)) {
1033 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1034 ucl_chunk_skipc (chunk, p);
1036 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
1037 /* The first symbol */
1039 ucl_chunk_skipc (chunk, p);
1042 else if (*p == '"') {
1043 /* JSON style key */
1047 ucl_chunk_skipc (chunk, p);
1049 else if (*p == '}') {
1050 /* We have actually end of an object */
1051 *end_of_object = true;
1054 else if (*p == '.') {
1055 ucl_chunk_skipc (chunk, p);
1056 parser->prev_state = parser->state;
1057 parser->state = UCL_STATE_MACRO_NAME;
1061 /* Invalid identifier */
1062 ucl_set_err (parser, UCL_ESYNTAX, "key must begin with a letter",
1068 /* Parse the body of a key */
1070 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
1072 ucl_chunk_skipc (chunk, p);
1074 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
1079 ucl_set_err (parser, UCL_ESYNTAX, "invalid character in a key",
1085 /* We need to parse json like quoted string */
1086 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1089 /* Always escape keys obtained via json */
1090 end = chunk->pos - 1;
1097 if (p >= chunk->end && got_content) {
1098 ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err);
1101 else if (!got_content) {
1104 *end_of_object = false;
1105 /* We are now at the end of the key, need to parse the rest */
1106 while (p < chunk->end) {
1107 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1108 ucl_chunk_skipc (chunk, p);
1110 else if (*p == '=') {
1111 if (!got_eq && !got_semicolon) {
1112 ucl_chunk_skipc (chunk, p);
1116 ucl_set_err (parser, UCL_ESYNTAX, "unexpected '=' character",
1121 else if (*p == ':') {
1122 if (!got_eq && !got_semicolon) {
1123 ucl_chunk_skipc (chunk, p);
1124 got_semicolon = true;
1127 ucl_set_err (parser, UCL_ESYNTAX, "unexpected ':' character",
1132 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1133 /* Check for comment */
1134 if (!ucl_skip_comments (parser)) {
1145 if (p >= chunk->end && got_content) {
1146 ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err);
1150 got_sep = got_semicolon || got_eq;
1154 * Maybe we have more keys nested, so search for termination character.
1156 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1157 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1158 * 3) key1 value[;,\n] <- we treat that as linear object
1162 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1165 /* Check first non-space character after a key */
1166 if (*t != '{' && *t != '[') {
1167 while (t < chunk->end) {
1168 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1171 else if (*t == '{' || *t == '[') {
1180 /* Create a new object */
1181 nobj = ucl_object_new_full (UCL_NULL, parser->chunks->priority);
1182 keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1183 &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1185 ucl_object_unref (nobj);
1188 else if (keylen == 0) {
1189 ucl_set_err (parser, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1190 ucl_object_unref (nobj);
1194 container = parser->stack->obj->value.ov;
1196 nobj->keylen = keylen;
1197 tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj));
1199 container = ucl_hash_insert_object (container, nobj,
1200 parser->flags & UCL_PARSER_KEY_LOWERCASE);
1203 parser->stack->obj->len ++;
1207 * The logic here is the following:
1209 * - if we have two objects with the same priority, then we form an
1210 * implicit or explicit array
1211 * - if a new object has bigger priority, then we overwrite an old one
1212 * - if a new object has lower priority, then we ignore it
1214 unsigned priold = ucl_object_get_priority (tobj),
1215 prinew = ucl_object_get_priority (nobj);
1216 if (priold == prinew) {
1217 ucl_parser_append_elt (parser, container, tobj, nobj);
1219 else if (priold > prinew) {
1220 ucl_object_unref (nobj);
1224 ucl_hash_replace (container, tobj, nobj);
1225 ucl_object_unref (tobj);
1230 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1232 parser->stack->obj->value.ov = container;
1234 parser->cur_obj = nobj;
1243 * @return true if a key has been parsed
1246 ucl_parse_string_value (struct ucl_parser *parser,
1247 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1249 const unsigned char *p;
1251 UCL_BRACE_ROUND = 0,
1255 int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1259 while (p < chunk->end) {
1261 /* Skip pairs of figure braces */
1263 braces[UCL_BRACE_FIGURE][0] ++;
1265 else if (*p == '}') {
1266 braces[UCL_BRACE_FIGURE][1] ++;
1267 if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1268 /* This is not a termination symbol, continue */
1269 ucl_chunk_skipc (chunk, p);
1273 /* Skip pairs of square braces */
1274 else if (*p == '[') {
1275 braces[UCL_BRACE_SQUARE][0] ++;
1277 else if (*p == ']') {
1278 braces[UCL_BRACE_SQUARE][1] ++;
1279 if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1280 /* This is not a termination symbol, continue */
1281 ucl_chunk_skipc (chunk, p);
1285 else if (*p == '$') {
1288 else if (*p == '\\') {
1289 *need_unescape = true;
1290 ucl_chunk_skipc (chunk, p);
1291 if (p < chunk->end) {
1292 ucl_chunk_skipc (chunk, p);
1297 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1300 ucl_chunk_skipc (chunk, p);
1307 * Parse multiline string ending with \n{term}\n
1312 * @return size of multiline string or 0 in case of error
1315 ucl_parse_multiline_string (struct ucl_parser *parser,
1316 struct ucl_chunk *chunk, const unsigned char *term,
1317 int term_len, unsigned char const **beg,
1320 const unsigned char *p, *c, *tend;
1321 bool newline = false;
1328 while (p < chunk->end) {
1330 if (chunk->end - p < term_len) {
1333 else if (memcmp (p, term, term_len) == 0) {
1334 tend = p + term_len;
1335 if (*tend != '\n' && *tend != ';' && *tend != ',') {
1336 /* Incomplete terminator */
1337 ucl_chunk_skipc (chunk, p);
1341 chunk->remain -= term_len;
1342 chunk->pos = p + term_len;
1343 chunk->column = term_len;
1357 ucl_chunk_skipc (chunk, p);
1363 static ucl_object_t*
1364 ucl_get_value_object (struct ucl_parser *parser)
1366 ucl_object_t *t, *obj = NULL;
1368 if (parser == NULL || parser->stack == NULL || parser->stack->obj == NULL) {
1372 if (parser->stack->obj->type == UCL_ARRAY) {
1373 /* Object must be allocated */
1374 obj = ucl_object_new_full (UCL_NULL, parser->chunks->priority);
1375 t = parser->stack->obj;
1376 ucl_array_append (t, obj);
1377 parser->cur_obj = obj;
1380 /* Object has been already allocated */
1381 obj = parser->cur_obj;
1394 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1396 const unsigned char *p, *c;
1397 ucl_object_t *obj = NULL;
1398 unsigned int stripped_spaces;
1400 bool need_unescape = false, ucl_escape = false, var_expand = false;
1404 /* Skip any spaces and comments */
1405 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1406 (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1407 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1408 ucl_chunk_skipc (chunk, p);
1410 if (!ucl_skip_comments (parser)) {
1416 while (p < chunk->end) {
1420 obj = ucl_get_value_object (parser);
1421 ucl_chunk_skipc (chunk, p);
1422 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1425 str_len = chunk->pos - c - 2;
1426 obj->type = UCL_STRING;
1427 if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1428 &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1432 parser->state = UCL_STATE_AFTER_VALUE;
1437 obj = ucl_get_value_object (parser);
1438 /* We have a new object */
1439 obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1444 ucl_chunk_skipc (chunk, p);
1448 obj = ucl_get_value_object (parser);
1449 /* We have a new array */
1450 obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1455 ucl_chunk_skipc (chunk, p);
1459 /* We have the array ending */
1460 if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1461 parser->state = UCL_STATE_AFTER_VALUE;
1469 obj = ucl_get_value_object (parser);
1470 /* We have something like multiline value, which must be <<[A-Z]+\n */
1471 if (chunk->end - p > 3) {
1472 if (memcmp (p, "<<", 2) == 0) {
1474 /* We allow only uppercase characters in multiline definitions */
1475 while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1479 /* Set chunk positions and start multiline parsing */
1481 chunk->remain -= p - c;
1485 if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1486 p - c, &c, &var_expand)) == 0) {
1487 ucl_set_err (parser, UCL_ESYNTAX,
1488 "unterminated multiline value", &parser->err);
1491 obj->type = UCL_STRING;
1492 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1493 &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1497 parser->state = UCL_STATE_AFTER_VALUE;
1502 /* Fallback to ordinary strings */
1506 obj = ucl_get_value_object (parser);
1509 if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1510 if (!ucl_lex_number (parser, chunk, obj)) {
1511 if (parser->state == UCL_STATE_ERROR) {
1516 parser->state = UCL_STATE_AFTER_VALUE;
1519 /* Fallback to normal string */
1522 if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1525 /* Cut trailing spaces */
1526 stripped_spaces = 0;
1527 while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1528 UCL_CHARACTER_WHITESPACE)) {
1531 str_len = chunk->pos - c - stripped_spaces;
1533 ucl_set_err (parser, 0, "string value must not be empty",
1537 else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1539 obj->type = UCL_NULL;
1541 else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1542 obj->type = UCL_STRING;
1543 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1544 &obj->value.sv, str_len, need_unescape,
1545 false, var_expand)) == -1) {
1550 parser->state = UCL_STATE_AFTER_VALUE;
1562 * Handle after value data
1568 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1570 const unsigned char *p;
1571 bool got_sep = false;
1572 struct ucl_stack *st;
1576 while (p < chunk->end) {
1577 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1578 /* Skip whitespaces */
1579 ucl_chunk_skipc (chunk, p);
1581 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1583 if (!ucl_skip_comments (parser)) {
1586 /* Treat comment as a separator */
1590 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1591 if (*p == '}' || *p == ']') {
1592 if (parser->stack == NULL) {
1593 ucl_set_err (parser, UCL_ESYNTAX,
1594 "end of array or object detected without corresponding start",
1598 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1599 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1601 /* Pop all nested objects from a stack */
1603 parser->stack = st->next;
1604 UCL_FREE (sizeof (struct ucl_stack), st);
1606 while (parser->stack != NULL) {
1608 if (st->next == NULL || st->next->level == st->level) {
1611 parser->stack = st->next;
1612 UCL_FREE (sizeof (struct ucl_stack), st);
1616 ucl_set_err (parser, UCL_ESYNTAX,
1617 "unexpected terminating symbol detected",
1622 if (parser->stack == NULL) {
1623 /* Ignore everything after a top object */
1627 ucl_chunk_skipc (chunk, p);
1632 /* Got a separator */
1634 ucl_chunk_skipc (chunk, p);
1640 ucl_set_err (parser, UCL_ESYNTAX, "delimiter is missing",
1658 ucl_parse_macro_value (struct ucl_parser *parser,
1659 struct ucl_chunk *chunk, struct ucl_macro *macro,
1660 unsigned char const **macro_start, size_t *macro_len)
1662 const unsigned char *p, *c;
1663 bool need_unescape = false, ucl_escape = false, var_expand = false;
1669 /* We have macro value encoded in quotes */
1671 ucl_chunk_skipc (chunk, p);
1672 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1676 *macro_start = c + 1;
1677 *macro_len = chunk->pos - c - 2;
1681 /* We got a multiline macro body */
1682 ucl_chunk_skipc (chunk, p);
1683 /* Skip spaces at the beginning */
1684 while (p < chunk->end) {
1685 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1686 ucl_chunk_skipc (chunk, p);
1693 while (p < chunk->end) {
1697 ucl_chunk_skipc (chunk, p);
1701 ucl_chunk_skipc (chunk, p);
1704 /* Macro is not enclosed in quotes or braces */
1706 while (p < chunk->end) {
1707 if (ucl_lex_is_atom_end (*p)) {
1710 ucl_chunk_skipc (chunk, p);
1717 /* We are at the end of a macro */
1718 /* Skip ';' and space characters and return to previous state */
1719 while (p < chunk->end) {
1720 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1723 ucl_chunk_skipc (chunk, p);
1729 * Parse macro arguments as UCL object
1730 * @param parser parser structure
1731 * @param chunk the current data chunk
1734 static ucl_object_t *
1735 ucl_parse_macro_arguments (struct ucl_parser *parser,
1736 struct ucl_chunk *chunk)
1738 ucl_object_t *res = NULL;
1739 struct ucl_parser *params_parser;
1740 int obraces = 1, ebraces = 0, state = 0;
1741 const unsigned char *p, *c;
1742 size_t args_len = 0;
1743 struct ucl_parser_saved_state saved;
1745 saved.column = chunk->column;
1746 saved.line = chunk->line;
1747 saved.pos = chunk->pos;
1748 saved.remain = chunk->remain;
1751 if (*p != '(' || chunk->remain < 2) {
1755 /* Set begin and start */
1756 ucl_chunk_skipc (chunk, p);
1759 while ((p) < (chunk)->end) {
1762 /* Parse symbols and check for '(', ')' and '"' */
1766 else if (*p == ')') {
1769 else if (*p == '"') {
1773 if (obraces == ebraces) {
1779 /* Check overflow */
1780 if (chunk->remain == 0) {
1783 ucl_chunk_skipc (chunk, p);
1786 /* We have quote character, so skip all but quotes */
1787 if (*p == '"' && *(p - 1) != '\\') {
1790 if (chunk->remain == 0) {
1793 ucl_chunk_skipc (chunk, p);
1797 * We have read the full body of arguments, so we need to parse and set
1800 params_parser = ucl_parser_new (parser->flags);
1801 if (!ucl_parser_add_chunk (params_parser, c, args_len)) {
1802 ucl_set_err (parser, UCL_ESYNTAX, "macro arguments parsing error",
1806 res = ucl_parser_get_object (params_parser);
1808 ucl_parser_free (params_parser);
1819 chunk->column = saved.column;
1820 chunk->line = saved.line;
1821 chunk->pos = saved.pos;
1822 chunk->remain = saved.remain;
1827 #define SKIP_SPACES_COMMENTS(parser, chunk, p) do { \
1828 while ((p) < (chunk)->end) { \
1829 if (!ucl_test_character (*(p), UCL_CHARACTER_WHITESPACE_UNSAFE)) { \
1830 if ((chunk)->remain >= 2 && ucl_lex_is_comment ((p)[0], (p)[1])) { \
1831 if (!ucl_skip_comments (parser)) { \
1838 ucl_chunk_skipc (chunk, p); \
1843 * Handle the main states of rcl parser
1844 * @param parser parser structure
1845 * @param data the pointer to the beginning of a chunk
1846 * @param len the length of a chunk
1847 * @return true if chunk has been parsed and false in case of error
1850 ucl_state_machine (struct ucl_parser *parser)
1852 ucl_object_t *obj, *macro_args;
1853 struct ucl_chunk *chunk = parser->chunks;
1854 const unsigned char *p, *c = NULL, *macro_start = NULL;
1855 unsigned char *macro_escaped;
1856 size_t macro_len = 0;
1857 struct ucl_macro *macro = NULL;
1858 bool next_key = false, end_of_object = false, ret;
1860 if (parser->top_obj == NULL) {
1861 if (*chunk->pos == '[') {
1862 obj = ucl_add_parser_stack (NULL, parser, true, 0);
1865 obj = ucl_add_parser_stack (NULL, parser, false, 0);
1870 parser->top_obj = obj;
1871 parser->cur_obj = obj;
1872 parser->state = UCL_STATE_INIT;
1876 while (chunk->pos < chunk->end) {
1877 switch (parser->state) {
1878 case UCL_STATE_INIT:
1880 * At the init state we can either go to the parse array or object
1881 * if we got [ or { correspondingly or can just treat new data as
1882 * a key of newly created object
1884 if (!ucl_skip_comments (parser)) {
1885 parser->prev_state = parser->state;
1886 parser->state = UCL_STATE_ERROR;
1890 /* Skip any spaces */
1891 while (p < chunk->end && ucl_test_character (*p,
1892 UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1893 ucl_chunk_skipc (chunk, p);
1897 parser->state = UCL_STATE_VALUE;
1898 ucl_chunk_skipc (chunk, p);
1901 parser->state = UCL_STATE_KEY;
1903 ucl_chunk_skipc (chunk, p);
1909 /* Skip any spaces */
1910 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1911 ucl_chunk_skipc (chunk, p);
1914 /* We have the end of an object */
1915 parser->state = UCL_STATE_AFTER_VALUE;
1918 if (parser->stack == NULL) {
1919 /* No objects are on stack, but we want to parse a key */
1920 ucl_set_err (parser, UCL_ESYNTAX, "top object is finished but the parser "
1921 "expects a key", &parser->err);
1922 parser->prev_state = parser->state;
1923 parser->state = UCL_STATE_ERROR;
1926 if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1927 parser->prev_state = parser->state;
1928 parser->state = UCL_STATE_ERROR;
1931 if (end_of_object) {
1933 parser->state = UCL_STATE_AFTER_VALUE;
1936 else if (parser->state != UCL_STATE_MACRO_NAME) {
1937 if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1938 /* Parse more keys and nest objects accordingly */
1939 obj = ucl_add_parser_stack (parser->cur_obj, parser, false,
1940 parser->stack->level + 1);
1946 parser->state = UCL_STATE_VALUE;
1954 case UCL_STATE_VALUE:
1955 /* We need to check what we do have */
1956 if (!ucl_parse_value (parser, chunk)) {
1957 parser->prev_state = parser->state;
1958 parser->state = UCL_STATE_ERROR;
1961 /* State is set in ucl_parse_value call */
1964 case UCL_STATE_AFTER_VALUE:
1965 if (!ucl_parse_after_value (parser, chunk)) {
1966 parser->prev_state = parser->state;
1967 parser->state = UCL_STATE_ERROR;
1970 if (parser->stack != NULL) {
1971 if (parser->stack->obj->type == UCL_OBJECT) {
1972 parser->state = UCL_STATE_KEY;
1976 parser->state = UCL_STATE_VALUE;
1980 /* Skip everything at the end */
1985 case UCL_STATE_MACRO_NAME:
1986 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) &&
1988 ucl_chunk_skipc (chunk, p);
1990 else if (p - c > 0) {
1991 /* We got macro name */
1992 macro_len = (size_t)(p - c);
1993 HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1994 if (macro == NULL) {
1995 ucl_create_err (&parser->err, "error on line %d at column %d: "
1996 "unknown macro: '%.*s', character: '%c'",
1997 chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1998 parser->state = UCL_STATE_ERROR;
2001 /* Now we need to skip all spaces */
2002 SKIP_SPACES_COMMENTS(parser, chunk, p);
2003 parser->state = UCL_STATE_MACRO;
2006 case UCL_STATE_MACRO:
2007 if (*chunk->pos == '(') {
2008 macro_args = ucl_parse_macro_arguments (parser, chunk);
2011 SKIP_SPACES_COMMENTS(parser, chunk, p);
2017 if (!ucl_parse_macro_value (parser, chunk, macro,
2018 ¯o_start, ¯o_len)) {
2019 parser->prev_state = parser->state;
2020 parser->state = UCL_STATE_ERROR;
2023 macro_len = ucl_expand_variable (parser, ¯o_escaped,
2024 macro_start, macro_len);
2025 parser->state = parser->prev_state;
2026 if (macro_escaped == NULL) {
2027 ret = macro->handler (macro_start, macro_len, macro_args,
2031 ret = macro->handler (macro_escaped, macro_len, macro_args,
2033 UCL_FREE (macro_len + 1, macro_escaped);
2037 ucl_object_unref (macro_args);
2044 /* TODO: add all states */
2045 ucl_set_err (parser, UCL_EINTERNAL,
2046 "internal error: parser is in an unknown state", &parser->err);
2047 parser->state = UCL_STATE_ERROR;
2056 ucl_parser_new (int flags)
2058 struct ucl_parser *new;
2060 new = UCL_ALLOC (sizeof (struct ucl_parser));
2064 memset (new, 0, sizeof (struct ucl_parser));
2066 ucl_parser_register_macro (new, "include", ucl_include_handler, new);
2067 ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
2068 ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
2072 /* Initial assumption about filevars */
2073 ucl_parser_set_filevars (new, NULL, false);
2080 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
2081 ucl_macro_handler handler, void* ud)
2083 struct ucl_macro *new;
2085 if (macro == NULL || handler == NULL) {
2088 new = UCL_ALLOC (sizeof (struct ucl_macro));
2092 memset (new, 0, sizeof (struct ucl_macro));
2093 new->handler = handler;
2094 new->name = strdup (macro);
2096 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
2100 ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
2103 struct ucl_variable *new = NULL, *cur;
2109 /* Find whether a variable already exists */
2110 LL_FOREACH (parser->variables, cur) {
2111 if (strcmp (cur->var, var) == 0) {
2117 if (value == NULL) {
2120 /* Remove variable */
2121 DL_DELETE (parser->variables, new);
2124 UCL_FREE (sizeof (struct ucl_variable), new);
2133 new = UCL_ALLOC (sizeof (struct ucl_variable));
2137 memset (new, 0, sizeof (struct ucl_variable));
2138 new->var = strdup (var);
2139 new->var_len = strlen (var);
2140 new->value = strdup (value);
2141 new->value_len = strlen (value);
2143 DL_APPEND (parser->variables, new);
2147 new->value = strdup (value);
2148 new->value_len = strlen (value);
2154 ucl_parser_set_variables_handler (struct ucl_parser *parser,
2155 ucl_variable_handler handler, void *ud)
2157 parser->var_handler = handler;
2158 parser->var_data = ud;
2162 ucl_parser_add_chunk_priority (struct ucl_parser *parser, const unsigned char *data,
2163 size_t len, unsigned priority)
2165 struct ucl_chunk *chunk;
2168 ucl_create_err (&parser->err, "invalid chunk added");
2172 parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority);
2175 if (parser->state != UCL_STATE_ERROR) {
2176 chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
2177 if (chunk == NULL) {
2178 ucl_create_err (&parser->err, "cannot allocate chunk structure");
2181 chunk->begin = data;
2182 chunk->remain = len;
2183 chunk->pos = chunk->begin;
2184 chunk->end = chunk->begin + len;
2187 chunk->priority = priority;
2188 LL_PREPEND (parser->chunks, chunk);
2189 parser->recursion ++;
2190 if (parser->recursion > UCL_MAX_RECURSION) {
2191 ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
2195 return ucl_state_machine (parser);
2198 ucl_create_err (&parser->err, "a parser is in an invalid state");
2204 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
2207 return ucl_parser_add_chunk_priority (parser, data, len, 0);
2211 ucl_parser_add_string (struct ucl_parser *parser, const char *data,
2215 ucl_create_err (&parser->err, "invalid string added");
2219 len = strlen (data);
2222 return ucl_parser_add_chunk (parser, (const unsigned char *)data, len);