2 * Copyright (c) 2015, Juniper Networks, Inc.
4 * This SOFTWARE is licensed under the LICENSE provided in the
5 * ../Copyright file. By downloading, installing, copying, or otherwise
6 * using the SOFTWARE, you agree to be bound by the terms of that
8 * Phil Shafer, August 2015
12 * CSV encoder generates comma-separated value files for specific
13 * subsets of data. This is not (and cannot be) a generalized
14 * facility, but for specific subsets of data, CSV data can be
15 * reasonably generated. For example, the df XML content:
18 * <total-blocks>4</total-blocks>
19 * <used-blocks>4</used-blocks>
20 * <available-blocks>0</available-blocks>
21 * <used-percent>100</used-percent>
22 * <mounted-on>/proc</mounted-on>
25 * could be represented as:
27 * #+name,total-blocks,used-blocks,available-blocks,used-percent,mounted-on
28 * procfs,4,4,0,100,/proc
30 * Data is then constrained to be sibling leaf values. In addition,
31 * singular leafs can also be matched. The costs include recording
32 * the specific leaf names (to ensure consistency) and some
35 * Some escaping is needed for CSV files, following the rules of RFC4180:
37 * - Fields containing a line-break, double-quote or commas should be
38 * quoted. (If they are not, the file will likely be impossible to
40 * - A (double) quote character in a field must be represented by two
41 * (double) quote characters.
42 * - Leading and trialing whitespace require fields be quoted.
44 * Cheesy, but simple. The RFC also requires MS-DOS end-of-line, which
45 * we only do with the "dos" option. Strange that we still live in a
46 * DOS-friendly world, but then again, we make spaceships based on the
47 * horse butts (http://www.astrodigital.org/space/stshorse.html).
51 #include <sys/types.h>
59 #include "xo_encoder.h"
63 #define UNUSED __attribute__ ((__unused__))
67 * The CSV encoder has three moving parts:
69 * - The path holds the path we are matching against
70 * - This is given as input via "options" and does not change
72 * - The stack holds the current names of the open elements
73 * - The "open" operations push, while the "close" pop
74 * - Turns out, at this point, the stack is unused, but I've
75 * left "drippings" in the code because I see this as useful
76 * for future features (under CSV_STACK_IS_NEEDED).
78 * - The leafs record the current set of leaf
79 * - A key from the parent list counts as a leaf (unless CF_NO_KEYS)
80 * - Once the path is matched, all other leafs at that level are leafs
81 * - Leafs are recorded to get the header comment accurately recorded
82 * - Once the first line is emited, the set of leafs _cannot_ change
84 * We use offsets into the buffers, since we know they can be
85 * realloc'd out from under us, as the size increases. The 'path'
86 * is fixed, we allocate it once, so it doesn't need offsets.
88 typedef struct path_frame_s {
89 char *pf_name; /* Path member name; points into c_path_buf */
90 uint32_t pf_flags; /* Flags for this path element (PFF_*) */
93 typedef struct stack_frame_s {
94 ssize_t sf_off; /* Element name; offset in c_stack_buf */
95 uint32_t sf_flags; /* Flags for this frame (SFF_*) */
98 /* Flags for sf_flags */
100 typedef struct leaf_s {
101 ssize_t f_name; /* Name of leaf; offset in c_name_buf */
102 ssize_t f_value; /* Value of leaf; offset in c_value_buf */
103 uint32_t f_flags; /* Flags for this value (FF_*) */
104 #ifdef CSV_STACK_IS_NEEDED
105 ssize_t f_depth; /* Depth of stack when leaf was recorded */
106 #endif /* CSV_STACK_IS_NEEDED */
109 /* Flags for f_flags */
110 #define LF_KEY (1<<0) /* Leaf is a key */
111 #define LF_HAS_VALUE (1<<1) /* Value has been set */
113 typedef struct csv_private_s {
114 uint32_t c_flags; /* Flags for this encoder */
116 /* The path for which we select leafs */
117 char *c_path_buf; /* Buffer containing path members */
118 path_frame_t *c_path; /* Array of path members */
119 ssize_t c_path_max; /* Depth of c_path[] */
120 ssize_t c_path_cur; /* Current depth in c_path[] */
122 /* A stack of open elements (xo_op_list, xo_op_container) */
123 #if CSV_STACK_IS_NEEDED
124 xo_buffer_t c_stack_buf; /* Buffer used for stack content */
125 stack_frame_t *c_stack; /* Stack of open tags */
126 ssize_t c_stack_max; /* Maximum stack depth */
127 #endif /* CSV_STACK_IS_NEEDED */
128 ssize_t c_stack_depth; /* Current stack depth */
130 /* List of leafs we are emitting (to ensure consistency) */
131 xo_buffer_t c_name_buf; /* String buffer for leaf names */
132 xo_buffer_t c_value_buf; /* String buffer for leaf values */
133 leaf_t *c_leaf; /* List of leafs */
134 ssize_t c_leaf_depth; /* Current depth of c_leaf[] (next free) */
135 ssize_t c_leaf_max; /* Max depth of c_leaf[] */
137 xo_buffer_t c_data; /* Buffer for creating data */
140 #define C_STACK_MAX 32 /* default c_stack_max */
141 #define C_LEAF_MAX 32 /* default c_leaf_max */
143 /* Flags for this structure */
144 #define CF_HEADER_DONE (1<<0) /* Have already written the header */
145 #define CF_NO_HEADER (1<<1) /* Do not generate header */
146 #define CF_NO_KEYS (1<<2) /* Do not generate excess keys */
147 #define CF_VALUE_ONLY (1<<3) /* Only generate the value */
149 #define CF_DOS_NEWLINE (1<<4) /* Generate CR-NL, just like MS-DOS */
150 #define CF_LEAFS_DONE (1<<5) /* Leafs are already been recorded */
151 #define CF_NO_QUOTES (1<<6) /* Do not generate quotes */
152 #define CF_RECORD_DATA (1<<7) /* Record all sibling leafs */
154 #define CF_DEBUG (1<<8) /* Make debug output */
155 #define CF_HAS_PATH (1<<9) /* A "path" option was provided */
158 * A simple debugging print function, similar to psu_dbg. Controlled by
159 * the undocumented "debug" option.
162 csv_dbg (xo_handle_t *xop UNUSED, csv_private_t *csv UNUSED,
163 const char *fmt, ...)
165 if (csv == NULL || !(csv->c_flags & CF_DEBUG))
171 vfprintf(stderr, fmt, vap);
176 * Create the private data for this handle, initialize it, and record
177 * the pointer in the handle.
180 csv_create (xo_handle_t *xop)
182 csv_private_t *csv = xo_realloc(NULL, sizeof(*csv));
186 bzero(csv, sizeof(*csv));
187 xo_buf_init(&csv->c_data);
188 xo_buf_init(&csv->c_name_buf);
189 xo_buf_init(&csv->c_value_buf);
190 #ifdef CSV_STACK_IS_NEEDED
191 xo_buf_init(&csv->c_stack_buf);
192 #endif /* CSV_STACK_IS_NEEDED */
194 xo_set_private(xop, csv);
200 * Clean up and release any data in use by this handle
203 csv_destroy (xo_handle_t *xop UNUSED, csv_private_t *csv)
206 xo_buf_cleanup(&csv->c_data);
207 xo_buf_cleanup(&csv->c_name_buf);
208 xo_buf_cleanup(&csv->c_value_buf);
209 #ifdef CSV_STACK_IS_NEEDED
210 xo_buf_cleanup(&csv->c_stack_buf);
211 #endif /* CSV_STACK_IS_NEEDED */
214 xo_free(csv->c_leaf);
216 xo_free(csv->c_path_buf);
220 * Return the element name at the top of the path stack. This is the
221 * item that we are currently trying to match on.
224 csv_path_top (csv_private_t *csv, ssize_t delta)
226 if (!(csv->c_flags & CF_HAS_PATH) || csv->c_path == NULL)
229 ssize_t cur = csv->c_path_cur + delta;
234 return csv->c_path[cur].pf_name;
238 * Underimplemented stack functionality
241 csv_stack_push (csv_private_t *csv UNUSED, const char *name UNUSED)
243 #ifdef CSV_STACK_IS_NEEDED
244 csv->c_stack_depth += 1;
245 #endif /* CSV_STACK_IS_NEEDED */
249 * Underimplemented stack functionality
252 csv_stack_pop (csv_private_t *csv UNUSED, const char *name UNUSED)
254 #ifdef CSV_STACK_IS_NEEDED
255 csv->c_stack_depth -= 1;
256 #endif /* CSV_STACK_IS_NEEDED */
259 /* Flags for csv_quote_flags */
260 #define QF_NEEDS_QUOTES (1<<0) /* Needs to be quoted */
261 #define QF_NEEDS_ESCAPE (1<<1) /* Needs to be escaped */
264 * Determine how much quote processing is needed. The details of the
265 * quoting rules are given at the top of this file. We return a set
266 * of flags, indicating what's needed.
269 csv_quote_flags (xo_handle_t *xop UNUSED, csv_private_t *csv UNUSED,
272 static const char quoted[] = "\n\r\",";
273 static const char escaped[] = "\"";
275 if (csv->c_flags & CF_NO_QUOTES) /* User doesn't want quotes */
278 size_t len = strlen(value);
281 if (strcspn(value, quoted) != len)
282 rc |= QF_NEEDS_QUOTES;
283 else if (isspace((int) value[0])) /* Leading whitespace */
284 rc |= QF_NEEDS_QUOTES;
285 else if (isspace((int) value[len - 1])) /* Trailing whitespace */
286 rc |= QF_NEEDS_QUOTES;
288 if (strcspn(value, escaped) != len)
289 rc |= QF_NEEDS_ESCAPE;
291 csv_dbg(xop, csv, "csv: quote flags [%s] -> %x (%zu/%zu)\n",
292 value, rc, len, strcspn(value, quoted));
298 * Escape the string, following the rules in RFC4180
301 csv_escape (xo_buffer_t *xbp, const char *value, size_t len)
303 const char *cp, *ep, *np;
305 for (cp = value, ep = value + len; cp && cp < ep; cp = np) {
306 np = strchr(cp, '"');
309 xo_buf_append(xbp, cp, np - cp);
310 xo_buf_append(xbp, "\"", 1);
312 xo_buf_append(xbp, cp, ep - cp);
317 * Append a newline to the buffer, following the settings of the "dos"
321 csv_append_newline (xo_buffer_t *xbp, csv_private_t *csv)
323 if (csv->c_flags & CF_DOS_NEWLINE)
324 xo_buf_append(xbp, "\r\n", 2);
326 xo_buf_append(xbp, "\n", 1);
330 * Create a 'record' of 'fields' from our recorded leaf values. If
331 * this is the first line and "no-header" isn't given, make a record
332 * containing the leaf names.
335 csv_emit_record (xo_handle_t *xop, csv_private_t *csv)
337 csv_dbg(xop, csv, "csv: emit: ...\n");
340 uint32_t quote_flags;
343 /* If we have no data, then don't bother */
344 if (csv->c_leaf_depth == 0)
347 if (!(csv->c_flags & (CF_HEADER_DONE | CF_NO_HEADER))) {
348 csv->c_flags |= CF_HEADER_DONE;
350 for (fnum = 0; fnum < csv->c_leaf_depth; fnum++) {
351 lp = &csv->c_leaf[fnum];
352 const char *name = xo_buf_data(&csv->c_name_buf, lp->f_name);
355 xo_buf_append(&csv->c_data, ",", 1);
357 xo_buf_append(&csv->c_data, name, strlen(name));
360 csv_append_newline(&csv->c_data, csv);
363 for (fnum = 0; fnum < csv->c_leaf_depth; fnum++) {
364 lp = &csv->c_leaf[fnum];
367 if (lp->f_flags & LF_HAS_VALUE) {
368 value = xo_buf_data(&csv->c_value_buf, lp->f_value);
373 quote_flags = csv_quote_flags(xop, csv, value);
376 xo_buf_append(&csv->c_data, ",", 1);
378 if (quote_flags & QF_NEEDS_QUOTES)
379 xo_buf_append(&csv->c_data, "\"", 1);
381 if (quote_flags & QF_NEEDS_ESCAPE)
382 csv_escape(&csv->c_data, value, strlen(value));
384 xo_buf_append(&csv->c_data, value, strlen(value));
386 if (quote_flags & QF_NEEDS_QUOTES)
387 xo_buf_append(&csv->c_data, "\"", 1);
390 csv_append_newline(&csv->c_data, csv);
392 /* We flush if either flush flag is set */
393 if (xo_get_flags(xop) & (XOF_FLUSH | XOF_FLUSH_LINE))
396 /* Clean out values from leafs */
397 for (fnum = 0; fnum < csv->c_leaf_depth; fnum++) {
398 lp = &csv->c_leaf[fnum];
400 lp->f_flags &= ~LF_HAS_VALUE;
404 xo_buf_reset(&csv->c_value_buf);
407 * Once we emit the first line, our set of leafs is locked and
410 csv->c_flags |= CF_LEAFS_DONE;
414 * Open a "level" of hierarchy, either a container or an instance. Look
415 * for a match in the path=x/y/z hierarchy, and ignore if not a match.
416 * If we're at the end of the path, start recording leaf values.
419 csv_open_level (xo_handle_t *xop UNUSED, csv_private_t *csv,
420 const char *name, int instance)
422 /* An new "open" event means we stop recording */
423 if (csv->c_flags & CF_RECORD_DATA) {
424 csv->c_flags &= ~CF_RECORD_DATA;
425 csv_emit_record(xop, csv);
429 const char *path_top = csv_path_top(csv, 0);
431 /* If the top of the stack does not match the name, then ignore */
432 if (path_top == NULL) {
433 if (instance && !(csv->c_flags & CF_HAS_PATH)) {
434 csv_dbg(xop, csv, "csv: recording (no-path) ...\n");
435 csv->c_flags |= CF_RECORD_DATA;
438 } else if (xo_streq(path_top, name)) {
439 csv->c_path_cur += 1; /* Advance to next path member */
441 csv_dbg(xop, csv, "csv: match: [%s] (%zd/%zd)\n", name,
442 csv->c_path_cur, csv->c_path_max);
444 /* If we're all the way thru the path members, start recording */
445 if (csv->c_path_cur == csv->c_path_max) {
446 csv_dbg(xop, csv, "csv: recording ...\n");
447 csv->c_flags |= CF_RECORD_DATA;
451 /* Push the name on the stack */
452 csv_stack_push(csv, name);
458 * Close a "level", either a container or an instance.
461 csv_close_level (xo_handle_t *xop UNUSED, csv_private_t *csv, const char *name)
463 /* If we're recording, a close triggers an emit */
464 if (csv->c_flags & CF_RECORD_DATA) {
465 csv->c_flags &= ~CF_RECORD_DATA;
466 csv_emit_record(xop, csv);
469 const char *path_top = csv_path_top(csv, -1);
470 csv_dbg(xop, csv, "csv: close: [%s] [%s] (%zd)\n", name,
471 path_top ?: "", csv->c_path_cur);
473 /* If the top of the stack does not match the name, then ignore */
474 if (path_top != NULL && xo_streq(path_top, name)) {
475 csv->c_path_cur -= 1;
479 /* Pop the name off the stack */
480 csv_stack_pop(csv, name);
486 * Return the index of a given leaf in the c_leaf[] array, where we
487 * record leaf values. If the leaf is new and we haven't stopped recording
488 * leafs, then make a new slot for it and record the name.
491 csv_leaf_num (xo_handle_t *xop UNUSED, csv_private_t *csv,
492 const char *name, xo_xff_flags_t flags)
496 xo_buffer_t *xbp = &csv->c_name_buf;
498 for (fnum = 0; fnum < csv->c_leaf_depth; fnum++) {
499 lp = &csv->c_leaf[fnum];
501 const char *fname = xo_buf_data(xbp, lp->f_name);
502 if (xo_streq(fname, name))
506 /* If we're done with adding new leafs, then bail */
507 if (csv->c_flags & CF_LEAFS_DONE)
510 /* This leaf does not exist yet, so we need to create it */
511 /* Start by checking if there's enough room */
512 if (csv->c_leaf_depth + 1 >= csv->c_leaf_max) {
513 /* Out of room; realloc it */
514 ssize_t new_max = csv->c_leaf_max * 2;
516 new_max = C_LEAF_MAX;
518 lp = xo_realloc(csv->c_leaf, new_max * sizeof(*lp));
520 return -1; /* No luck; bail */
522 /* Zero out the new portion */
523 bzero(&lp[csv->c_leaf_max], csv->c_leaf_max * sizeof(*lp));
525 /* Update csv data */
527 csv->c_leaf_max = new_max;
530 lp = &csv->c_leaf[csv->c_leaf_depth++];
531 #ifdef CSV_STACK_IS_NEEDED
532 lp->f_depth = csv->c_stack_depth;
533 #endif /* CSV_STACK_IS_NEEDED */
535 lp->f_name = xo_buf_offset(xbp);
537 char *cp = xo_buf_cur(xbp);
538 xo_buf_append(xbp, name, strlen(name) + 1);
541 lp->f_flags |= LF_KEY;
543 csv_dbg(xop, csv, "csv: leaf: name: %zd [%s] [%s] %x\n",
544 fnum, name, cp, lp->f_flags);
550 * Record a new value for a leaf
553 csv_leaf_set (xo_handle_t *xop UNUSED, csv_private_t *csv, leaf_t *lp,
556 xo_buffer_t *xbp = &csv->c_value_buf;
558 lp->f_value = xo_buf_offset(xbp);
559 lp->f_flags |= LF_HAS_VALUE;
561 char *cp = xo_buf_cur(xbp);
562 xo_buf_append(xbp, value, strlen(value) + 1);
564 csv_dbg(xop, csv, "csv: leaf: value: [%s] [%s] %x\n",
565 value, cp, lp->f_flags);
569 * Record the requested set of leaf names. The input should be a set
570 * of leaf names, separated by periods.
573 csv_record_leafs (xo_handle_t *xop, csv_private_t *csv, const char *leafs_raw)
576 ssize_t len = strlen(leafs_raw);
577 char *leafs_buf = alloca(len + 1);
579 memcpy(leafs_buf, leafs_raw, len + 1); /* Make local copy */
581 for (cp = leafs_buf, ep = leafs_buf + len; cp && cp < ep; cp = np) {
582 np = strchr(cp, '.');
586 if (*cp == '\0') /* Skip empty names */
589 csv_dbg(xop, csv, "adding leaf: [%s]\n", cp);
590 csv_leaf_num(xop, csv, cp, 0);
594 * Since we've been told explicitly what leafs matter, ignore the rest
596 csv->c_flags |= CF_LEAFS_DONE;
602 * Record the requested path elements. The input should be a set of
603 * container or instances names, separated by slashes.
606 csv_record_path (xo_handle_t *xop, csv_private_t *csv, const char *path_raw)
610 ssize_t len = strlen(path_raw);
611 char *path_buf = xo_realloc(NULL, len + 1);
613 memcpy(path_buf, path_raw, len + 1);
615 for (cp = path_buf, ep = path_buf + len, count = 2;
616 cp && cp < ep; cp = np) {
617 np = strchr(cp, '/');
624 path_frame_t *path = xo_realloc(NULL, sizeof(path[0]) * count);
626 xo_failure(xop, "allocation failure for path '%s'", path_buf);
630 bzero(path, sizeof(path[0]) * count);
632 for (count = 0, cp = path_buf; cp && cp < ep; cp = np) {
633 path[count++].pf_name = cp;
635 np = strchr(cp, '/');
638 csv_dbg(xop, csv, "path: [%s]\n", cp);
641 path[count].pf_name = NULL;
643 if (csv->c_path) /* In case two paths are given */
644 xo_free(csv->c_path);
645 if (csv->c_path_buf) /* In case two paths are given */
646 xo_free(csv->c_path_buf);
648 csv->c_path_buf = path_buf;
650 csv->c_path_max = count;
657 * Extract the option values. The format is:
658 * -libxo encoder=csv:kw=val+kw=val+kw=val,pretty,etc
661 csv_options (xo_handle_t *xop, csv_private_t *csv, const char *raw_opts)
663 ssize_t len = strlen(raw_opts);
664 char *options = alloca(len + 1);
665 memcpy(options, raw_opts, len);
668 char *cp, *ep, *np, *vp;
669 for (cp = options, ep = options + len + 1; cp && cp < ep; cp = np) {
670 np = strchr(cp, '+');
674 vp = strchr(cp, '=');
678 if (xo_streq(cp, "path")) {
679 /* Record the path */
680 if (vp != NULL && csv_record_path(xop, csv, vp))
683 csv->c_flags |= CF_HAS_PATH; /* Yup, we have an explicit path now */
685 } else if (xo_streq(cp, "leafs")
686 || xo_streq(cp, "leaf")
687 || xo_streq(cp, "leaves")) {
688 /* Record the leafs */
689 if (vp != NULL && csv_record_leafs(xop, csv, vp))
692 } else if (xo_streq(cp, "no-keys")) {
693 csv->c_flags |= CF_NO_KEYS;
694 } else if (xo_streq(cp, "no-header")) {
695 csv->c_flags |= CF_NO_HEADER;
696 } else if (xo_streq(cp, "value-only")) {
697 csv->c_flags |= CF_VALUE_ONLY;
698 } else if (xo_streq(cp, "dos")) {
699 csv->c_flags |= CF_DOS_NEWLINE;
700 } else if (xo_streq(cp, "no-quotes")) {
701 csv->c_flags |= CF_NO_QUOTES;
702 } else if (xo_streq(cp, "debug")) {
703 csv->c_flags |= CF_DEBUG;
706 "unknown encoder option value: '%s'", cp);
715 * Handler for incoming data values. We just record each leaf name and
716 * value. The values are emittd when the instance is closed.
719 csv_data (xo_handle_t *xop UNUSED, csv_private_t *csv UNUSED,
720 const char *name, const char *value,
721 xo_xof_flags_t flags)
723 csv_dbg(xop, csv, "data: [%s]=[%s] %llx\n", name, value, (unsigned long long) flags);
725 if (!(csv->c_flags & CF_RECORD_DATA))
728 /* Find the leaf number */
729 int fnum = csv_leaf_num(xop, csv, name, flags);
731 return 0; /* Don't bother recording */
733 leaf_t *lp = &csv->c_leaf[fnum];
734 csv_leaf_set(xop, csv, lp, value);
740 * The callback from libxo, passing us operations/events as they
744 csv_handler (XO_ENCODER_HANDLER_ARGS)
747 csv_private_t *csv = private;
748 xo_buffer_t *xbp = csv ? &csv->c_data : NULL;
750 csv_dbg(xop, csv, "op %s: [%s] [%s]\n", xo_encoder_op_name(op),
751 name ?: "", value ?: "");
754 /* If we don't have private data, we're sunk */
755 if (csv == NULL && op != XO_OP_CREATE)
759 case XO_OP_CREATE: /* Called when the handle is init'd */
760 rc = csv_create(xop);
764 rc = csv_options(xop, csv, value);
767 case XO_OP_OPEN_LIST:
768 case XO_OP_CLOSE_LIST:
769 break; /* Ignore these ops */
771 case XO_OP_OPEN_CONTAINER:
772 case XO_OP_OPEN_LEAF_LIST:
773 rc = csv_open_level(xop, csv, name, 0);
776 case XO_OP_OPEN_INSTANCE:
777 rc = csv_open_level(xop, csv, name, 1);
780 case XO_OP_CLOSE_CONTAINER:
781 case XO_OP_CLOSE_LEAF_LIST:
782 case XO_OP_CLOSE_INSTANCE:
783 rc = csv_close_level(xop, csv, name);
786 case XO_OP_STRING: /* Quoted UTF-8 string */
787 case XO_OP_CONTENT: /* Other content */
788 rc = csv_data(xop, csv, name, value, flags);
791 case XO_OP_FINISH: /* Clean up function */
794 case XO_OP_FLUSH: /* Clean up function */
795 rc = write(1, xbp->xb_bufp, xbp->xb_curp - xbp->xb_bufp);
802 case XO_OP_DESTROY: /* Clean up function */
803 csv_destroy(xop, csv);
806 case XO_OP_ATTRIBUTE: /* Attribute name/value */
809 case XO_OP_VERSION: /* Version string */
817 * Callback when our encoder is loaded.
820 xo_encoder_library_init (XO_ENCODER_INIT_ARGS)
822 arg->xei_handler = csv_handler;
823 arg->xei_version = XO_ENCODER_VERSION;