1 /* $Id: html.c,v 1.200 2017/01/21 02:29:57 schwarze Exp $ */
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include <sys/types.h>
32 #include "mandoc_aux.h"
41 #define HTML_NOSTACK (1 << 0)
42 #define HTML_AUTOCLOSE (1 << 1)
43 #define HTML_NLBEFORE (1 << 2)
44 #define HTML_NLBEGIN (1 << 3)
45 #define HTML_NLEND (1 << 4)
46 #define HTML_NLAFTER (1 << 5)
47 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
48 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
49 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
50 #define HTML_INDENT (1 << 6)
51 #define HTML_NOINDENT (1 << 7)
54 static const struct htmldata htmltags[TAG_MAX] = {
56 {"head", HTML_NLALL | HTML_INDENT},
58 {"meta", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
59 {"title", HTML_NLAROUND},
60 {"div", HTML_NLAROUND},
61 {"h1", HTML_NLAROUND},
62 {"h2", HTML_NLAROUND},
64 {"link", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
65 {"br", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
67 {"table", HTML_NLALL | HTML_INDENT},
68 {"tbody", HTML_NLALL | HTML_INDENT},
69 {"col", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
70 {"tr", HTML_NLALL | HTML_INDENT},
71 {"td", HTML_NLAROUND},
72 {"li", HTML_NLAROUND | HTML_INDENT},
73 {"ul", HTML_NLALL | HTML_INDENT},
74 {"ol", HTML_NLALL | HTML_INDENT},
75 {"dl", HTML_NLALL | HTML_INDENT},
76 {"dt", HTML_NLAROUND},
77 {"dd", HTML_NLAROUND | HTML_INDENT},
78 {"pre", HTML_NLALL | HTML_NOINDENT},
83 {"style", HTML_NLALL | HTML_INDENT},
84 {"math", HTML_NLALL | HTML_INDENT},
102 static const char *const roffscales[SCALE_MAX] = {
115 static void a2width(const char *, struct roffsu *);
116 static void print_byte(struct html *, char);
117 static void print_endline(struct html *);
118 static void print_endword(struct html *);
119 static void print_indent(struct html *);
120 static void print_word(struct html *, const char *);
122 static void print_ctag(struct html *, struct tag *);
123 static int print_escape(struct html *, char);
124 static int print_encode(struct html *, const char *, const char *, int);
125 static void print_href(struct html *, const char *, const char *, int);
126 static void print_metaf(struct html *, enum mandoc_esc);
130 html_alloc(const struct manoutput *outopts)
134 h = mandoc_calloc(1, sizeof(struct html));
137 h->style = outopts->style;
138 h->base_man = outopts->man;
139 h->base_includes = outopts->includes;
140 if (outopts->fragment)
141 h->oflags |= HTML_FRAGMENT;
152 h = (struct html *)p;
154 while ((tag = h->tags.head) != NULL) {
155 h->tags.head = tag->next;
163 print_gen_head(struct html *h)
167 print_otag(h, TAG_META, "?", "charset", "utf-8");
170 * Print a default style-sheet.
173 t = print_otag(h, TAG_STYLE, "");
174 print_text(h, "table.head, table.foot { width: 100%; }");
176 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
178 print_text(h, "td.head-vol { text-align: center; }");
180 print_text(h, "div.Pp { margin: 1ex 0ex; }");
184 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
185 h->style, "type", "text/css", "media", "all");
189 print_metaf(struct html *h, enum mandoc_esc deco)
194 case ESCAPE_FONTPREV:
197 case ESCAPE_FONTITALIC:
198 font = HTMLFONT_ITALIC;
200 case ESCAPE_FONTBOLD:
201 font = HTMLFONT_BOLD;
207 case ESCAPE_FONTROMAN:
208 font = HTMLFONT_NONE;
215 print_tagq(h, h->metaf);
223 case HTMLFONT_ITALIC:
224 h->metaf = print_otag(h, TAG_I, "");
227 h->metaf = print_otag(h, TAG_B, "");
230 h->metaf = print_otag(h, TAG_B, "");
231 print_otag(h, TAG_I, "");
239 html_strlen(const char *cp)
245 * Account for escaped sequences within string length
246 * calculations. This follows the logic in term_strlen() as we
247 * must calculate the width of produced strings.
248 * Assume that characters are always width of "1". This is
249 * hacky, but it gets the job done for approximation of widths.
255 rsz = strcspn(cp, "\\");
267 switch (mandoc_escape(&cp, NULL, NULL)) {
271 case ESCAPE_NUMBERED:
273 case ESCAPE_OVERSTRIKE:
279 case ESCAPE_SKIPCHAR:
290 print_escape(struct html *h, char c)
295 print_word(h, "<");
298 print_word(h, ">");
301 print_word(h, "&");
304 print_word(h, """);
307 print_word(h, " ");
321 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
328 static const char rejs[9] = { '\\', '<', '>', '&', '"',
329 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
332 pend = strchr(p, '\0');
337 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
338 h->flags &= ~HTML_SKIPCHAR;
343 for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
352 if (print_escape(h, *p++))
355 esc = mandoc_escape(&p, &seq, &len);
356 if (ESCAPE_ERROR == esc)
361 case ESCAPE_FONTPREV:
362 case ESCAPE_FONTBOLD:
363 case ESCAPE_FONTITALIC:
365 case ESCAPE_FONTROMAN:
369 case ESCAPE_SKIPCHAR:
370 h->flags |= HTML_SKIPCHAR;
376 if (h->flags & HTML_SKIPCHAR) {
377 h->flags &= ~HTML_SKIPCHAR;
383 /* Skip past "u" header. */
384 c = mchars_num2uc(seq + 1, len - 1);
386 case ESCAPE_NUMBERED:
387 c = mchars_num2char(seq, len);
392 c = mchars_spec2cp(seq, len);
400 case ESCAPE_OVERSTRIKE:
408 if ((c < 0x20 && c != 0x09) ||
409 (c > 0x7E && c < 0xA0))
412 (void)snprintf(numbuf, sizeof(numbuf), "&#%d;", c);
413 print_word(h, numbuf);
414 } else if (print_escape(h, c) == 0)
422 print_href(struct html *h, const char *name, const char *sec, int man)
426 pp = man ? h->base_man : h->base_includes;
427 while ((p = strchr(pp, '%')) != NULL) {
428 print_encode(h, pp, p, 1);
429 if (man && p[1] == 'S') {
433 print_encode(h, sec, NULL, 1);
434 } else if ((man && p[1] == 'N') ||
435 (man == 0 && p[1] == 'I'))
436 print_encode(h, name, NULL, 1);
438 print_encode(h, p, p + 2, 1);
442 print_encode(h, pp, NULL, 1);
446 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
449 struct roffsu mysu, *su;
455 int i, have_style, tflags;
457 tflags = htmltags[tag].flags;
459 /* Push this tags onto the stack of open scopes. */
461 if ((tflags & HTML_NOSTACK) == 0) {
462 t = mandoc_malloc(sizeof(struct tag));
464 t->next = h->tags.head;
469 if (tflags & HTML_NLBEFORE)
473 else if ((h->flags & HTML_NOSPACE) == 0) {
474 if (h->flags & HTML_KEEP)
475 print_word(h, " ");
477 if (h->flags & HTML_PREKEEP)
478 h->flags |= HTML_KEEP;
483 if ( ! (h->flags & HTML_NONOSPACE))
484 h->flags &= ~HTML_NOSPACE;
486 h->flags |= HTML_NOSPACE;
488 /* Print out the tag name and attributes. */
491 print_word(h, htmltags[tag].name);
496 while (*fmt != '\0') {
498 print_word(h, " style=\"");
503 s = va_arg(ap, char *);
516 s = va_arg(ap, char *);
527 print_href(h, s, va_arg(ap, char *), 1);
531 print_href(h, s, NULL, 0);
539 print_encode(h, s, NULL, 1);
545 /* Print out styles. */
549 while (*fmt != '\0') {
551 /* First letter: input argument type. */
556 SCALE_HS_INIT(su, i);
559 s = va_arg(ap, char *);
562 su = va_arg(ap, struct roffsu *);
566 SCALE_VS_INIT(su, i);
569 s = va_arg(ap, char *);
576 /* Second letter: style name. */
580 attr = "margin-bottom";
586 attr = "text-indent";
589 attr = "margin-left";
604 print_word(h, va_arg(ap, char *));
613 if (su->unit == SCALE_MM && (v /= 100.0) == 0.0)
615 else if (su->unit == SCALE_BU)
620 (void)snprintf(numbuf, sizeof(numbuf), "%.2f", v);
621 print_word(h, numbuf);
622 print_word(h, roffscales[su->unit]);
632 /* Accommodate for "well-formed" singleton escaping. */
634 if (HTML_AUTOCLOSE & htmltags[tag].flags)
639 if (tflags & HTML_NLBEGIN)
642 h->flags |= HTML_NOSPACE;
644 if (tflags & HTML_INDENT)
646 if (tflags & HTML_NOINDENT)
653 print_ctag(struct html *h, struct tag *tag)
658 * Remember to close out and nullify the current
659 * meta-font and table, if applicable.
666 tflags = htmltags[tag->tag].flags;
668 if (tflags & HTML_INDENT)
670 if (tflags & HTML_NOINDENT)
672 if (tflags & HTML_NLEND)
677 print_word(h, htmltags[tag->tag].name);
679 if (tflags & HTML_NLAFTER)
682 h->tags.head = tag->next;
687 print_gen_decls(struct html *h)
689 print_word(h, "<!DOCTYPE html>");
694 print_text(struct html *h, const char *word)
696 if (h->col && (h->flags & HTML_NOSPACE) == 0) {
697 if ( ! (HTML_KEEP & h->flags)) {
698 if (HTML_PREKEEP & h->flags)
699 h->flags |= HTML_KEEP;
702 print_word(h, " ");
705 assert(NULL == h->metaf);
707 case HTMLFONT_ITALIC:
708 h->metaf = print_otag(h, TAG_I, "");
711 h->metaf = print_otag(h, TAG_B, "");
714 h->metaf = print_otag(h, TAG_B, "");
715 print_otag(h, TAG_I, "");
723 if ( ! print_encode(h, word, NULL, 0)) {
724 if ( ! (h->flags & HTML_NONOSPACE))
725 h->flags &= ~HTML_NOSPACE;
726 h->flags &= ~HTML_NONEWLINE;
728 h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
731 print_tagq(h, h->metaf);
735 h->flags &= ~HTML_IGNDELIM;
739 print_tagq(struct html *h, const struct tag *until)
743 while ((tag = h->tags.head) != NULL) {
745 if (until && tag == until)
751 print_stagq(struct html *h, const struct tag *suntil)
755 while ((tag = h->tags.head) != NULL) {
756 if (suntil && tag == suntil)
763 print_paragraph(struct html *h)
767 t = print_otag(h, TAG_DIV, "c", "Pp");
772 /***********************************************************************
773 * Low level output functions.
774 * They implement line breaking using a short static buffer.
775 ***********************************************************************/
778 * Buffer one HTML output byte.
779 * If the buffer is full, flush and deactivate it and start a new line.
780 * If the buffer is inactive, print directly.
783 print_byte(struct html *h, char c)
785 if ((h->flags & HTML_BUFFER) == 0) {
791 if (h->col + h->bufcol < sizeof(h->buf)) {
792 h->buf[h->bufcol++] = c;
801 fwrite(h->buf, h->bufcol, 1, stdout);
803 h->col = (h->indent + 1) * 2 + h->bufcol + 1;
805 h->flags &= ~HTML_BUFFER;
809 * If something was printed on the current output line, end it.
810 * Not to be called right after print_indent().
813 print_endline(struct html *h)
820 fwrite(h->buf, h->bufcol, 1, stdout);
825 h->flags |= HTML_NOSPACE;
826 h->flags &= ~HTML_BUFFER;
830 * Flush the HTML output buffer.
831 * If it is inactive, activate it.
834 print_endword(struct html *h)
841 if ((h->flags & HTML_BUFFER) == 0) {
843 h->flags |= HTML_BUFFER;
844 } else if (h->bufcol) {
846 fwrite(h->buf, h->bufcol, 1, stdout);
847 h->col += h->bufcol + 1;
853 * If at the beginning of a new output line,
854 * perform indentation and mark the line as containing output.
855 * Make sure to really produce some output right afterwards,
856 * but do not use print_otag() for producing it.
859 print_indent(struct html *h)
866 if (h->noindent == 0) {
867 h->col = h->indent * 2;
868 for (i = 0; i < h->col; i++)
871 h->flags &= ~HTML_NOSPACE;
875 * Print or buffer some characters
876 * depending on the current HTML output buffer state.
879 print_word(struct html *h, const char *cp)
882 print_byte(h, *cp++);
886 * Calculate the scaling unit passed in a `-width' argument. This uses
887 * either a native scaling unit (e.g., 1i, 2m) or the string length of
891 a2width(const char *p, struct roffsu *su)
893 if (a2roffsu(p, su, SCALE_MAX) < 2) {
895 su->scale = html_strlen(p);
896 } else if (su->scale < 0.0)