1 /* $Id: html.c,v 1.213 2017/06/08 12:54:58 schwarze Exp $ */
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include <sys/types.h>
31 #include "mandoc_aux.h"
42 #define HTML_NOSTACK (1 << 0)
43 #define HTML_AUTOCLOSE (1 << 1)
44 #define HTML_NLBEFORE (1 << 2)
45 #define HTML_NLBEGIN (1 << 3)
46 #define HTML_NLEND (1 << 4)
47 #define HTML_NLAFTER (1 << 5)
48 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
49 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
50 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
51 #define HTML_INDENT (1 << 6)
52 #define HTML_NOINDENT (1 << 7)
55 static const struct htmldata htmltags[TAG_MAX] = {
57 {"head", HTML_NLALL | HTML_INDENT},
59 {"meta", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
60 {"title", HTML_NLAROUND},
61 {"div", HTML_NLAROUND},
62 {"h1", HTML_NLAROUND},
63 {"h2", HTML_NLAROUND},
65 {"link", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
66 {"br", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
68 {"table", HTML_NLALL | HTML_INDENT},
69 {"colgroup", HTML_NLALL | HTML_INDENT},
70 {"col", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
71 {"tr", HTML_NLALL | HTML_INDENT},
72 {"td", HTML_NLAROUND},
73 {"li", HTML_NLAROUND | HTML_INDENT},
74 {"ul", HTML_NLALL | HTML_INDENT},
75 {"ol", HTML_NLALL | HTML_INDENT},
76 {"dl", HTML_NLALL | HTML_INDENT},
77 {"dt", HTML_NLAROUND},
78 {"dd", HTML_NLAROUND | HTML_INDENT},
79 {"pre", HTML_NLALL | HTML_NOINDENT},
86 {"style", HTML_NLALL | HTML_INDENT},
87 {"math", HTML_NLALL | HTML_INDENT},
105 static const char *const roffscales[SCALE_MAX] = {
118 static void a2width(const char *, struct roffsu *);
119 static void print_byte(struct html *, char);
120 static void print_endword(struct html *);
121 static void print_indent(struct html *);
122 static void print_word(struct html *, const char *);
124 static void print_ctag(struct html *, struct tag *);
125 static int print_escape(struct html *, char);
126 static int print_encode(struct html *, const char *, const char *, int);
127 static void print_href(struct html *, const char *, const char *, int);
128 static void print_metaf(struct html *, enum mandoc_esc);
132 html_alloc(const struct manoutput *outopts)
136 h = mandoc_calloc(1, sizeof(struct html));
139 h->style = outopts->style;
140 h->base_man = outopts->man;
141 h->base_includes = outopts->includes;
142 if (outopts->fragment)
143 h->oflags |= HTML_FRAGMENT;
154 h = (struct html *)p;
156 while ((tag = h->tag) != NULL) {
165 print_gen_head(struct html *h)
169 print_otag(h, TAG_META, "?", "charset", "utf-8");
172 * Print a default style-sheet.
175 t = print_otag(h, TAG_STYLE, "");
176 print_text(h, "table.head, table.foot { width: 100%; }");
178 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
180 print_text(h, "td.head-vol { text-align: center; }");
182 print_text(h, "div.Pp { margin: 1ex 0ex; }");
186 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
187 h->style, "type", "text/css", "media", "all");
191 print_metaf(struct html *h, enum mandoc_esc deco)
196 case ESCAPE_FONTPREV:
199 case ESCAPE_FONTITALIC:
200 font = HTMLFONT_ITALIC;
202 case ESCAPE_FONTBOLD:
203 font = HTMLFONT_BOLD;
209 case ESCAPE_FONTROMAN:
210 font = HTMLFONT_NONE;
217 print_tagq(h, h->metaf);
225 case HTMLFONT_ITALIC:
226 h->metaf = print_otag(h, TAG_I, "");
229 h->metaf = print_otag(h, TAG_B, "");
232 h->metaf = print_otag(h, TAG_B, "");
233 print_otag(h, TAG_I, "");
241 html_make_id(const struct roff_node *n)
243 const struct roff_node *nch;
246 for (nch = n->child; nch != NULL; nch = nch->next)
247 if (nch->type != ROFFT_TEXT)
253 /* http://www.w3.org/TR/html5/dom.html#the-id-attribute */
255 for (cp = buf; *cp != '\0'; cp++)
263 html_strlen(const char *cp)
269 * Account for escaped sequences within string length
270 * calculations. This follows the logic in term_strlen() as we
271 * must calculate the width of produced strings.
272 * Assume that characters are always width of "1". This is
273 * hacky, but it gets the job done for approximation of widths.
279 rsz = strcspn(cp, "\\");
291 switch (mandoc_escape(&cp, NULL, NULL)) {
295 case ESCAPE_NUMBERED:
297 case ESCAPE_OVERSTRIKE:
303 case ESCAPE_SKIPCHAR:
314 print_escape(struct html *h, char c)
319 print_word(h, "<");
322 print_word(h, ">");
325 print_word(h, "&");
328 print_word(h, """);
331 print_word(h, " ");
345 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
352 static const char rejs[9] = { '\\', '<', '>', '&', '"',
353 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
356 pend = strchr(p, '\0');
361 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
362 h->flags &= ~HTML_SKIPCHAR;
367 for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
376 if (print_escape(h, *p++))
379 esc = mandoc_escape(&p, &seq, &len);
380 if (ESCAPE_ERROR == esc)
385 case ESCAPE_FONTPREV:
386 case ESCAPE_FONTBOLD:
387 case ESCAPE_FONTITALIC:
389 case ESCAPE_FONTROMAN:
393 case ESCAPE_SKIPCHAR:
394 h->flags |= HTML_SKIPCHAR;
400 if (h->flags & HTML_SKIPCHAR) {
401 h->flags &= ~HTML_SKIPCHAR;
407 /* Skip past "u" header. */
408 c = mchars_num2uc(seq + 1, len - 1);
410 case ESCAPE_NUMBERED:
411 c = mchars_num2char(seq, len);
416 c = mchars_spec2cp(seq, len);
424 case ESCAPE_OVERSTRIKE:
432 if ((c < 0x20 && c != 0x09) ||
433 (c > 0x7E && c < 0xA0))
436 (void)snprintf(numbuf, sizeof(numbuf), "&#%d;", c);
437 print_word(h, numbuf);
438 } else if (print_escape(h, c) == 0)
446 print_href(struct html *h, const char *name, const char *sec, int man)
450 pp = man ? h->base_man : h->base_includes;
451 while ((p = strchr(pp, '%')) != NULL) {
452 print_encode(h, pp, p, 1);
453 if (man && p[1] == 'S') {
457 print_encode(h, sec, NULL, 1);
458 } else if ((man && p[1] == 'N') ||
459 (man == 0 && p[1] == 'I'))
460 print_encode(h, name, NULL, 1);
462 print_encode(h, p, p + 2, 1);
466 print_encode(h, pp, NULL, 1);
470 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
473 struct roffsu mysu, *su;
479 int i, have_style, tflags;
481 tflags = htmltags[tag].flags;
483 /* Push this tag onto the stack of open scopes. */
485 if ((tflags & HTML_NOSTACK) == 0) {
486 t = mandoc_malloc(sizeof(struct tag));
493 if (tflags & HTML_NLBEFORE)
497 else if ((h->flags & HTML_NOSPACE) == 0) {
498 if (h->flags & HTML_KEEP)
499 print_word(h, " ");
501 if (h->flags & HTML_PREKEEP)
502 h->flags |= HTML_KEEP;
507 if ( ! (h->flags & HTML_NONOSPACE))
508 h->flags &= ~HTML_NOSPACE;
510 h->flags |= HTML_NOSPACE;
512 /* Print out the tag name and attributes. */
515 print_word(h, htmltags[tag].name);
520 while (*fmt != '\0') {
527 /* Parse a non-style attribute and its arguments. */
529 arg1 = va_arg(ap, char *);
542 arg1 = va_arg(ap, char *);
549 arg2 = va_arg(ap, char *);
553 /* Print the non-style attributes. */
561 print_href(h, arg1, NULL, 0);
565 print_href(h, arg1, arg2, 1);
570 print_encode(h, arg1, NULL, 1);
574 print_encode(h, arg1, NULL, 1);
575 print_word(h, "\" title=\"");
576 print_encode(h, arg1, NULL, 1);
580 print_encode(h, arg1, NULL, 1);
586 /* Print out styles. */
588 while (*fmt != '\0') {
592 /* First letter: input argument type. */
598 SCALE_HS_INIT(su, i);
601 arg1 = va_arg(ap, char *);
604 su = va_arg(ap, struct roffsu *);
609 SCALE_VS_INIT(su, i);
612 if ((arg2 = va_arg(ap, char *)) == NULL)
617 /* Increase to make even bold text fit. */
632 /* Second letter: style name. */
636 attr = "margin-bottom";
642 attr = "text-indent";
645 attr = "margin-left";
658 arg1 = va_arg(ap, char *);
663 if (su == NULL && arg1 == NULL)
667 print_word(h, " style=\"");
675 if (su->unit == SCALE_MM && (v /= 100.0) == 0.0)
677 else if (su->unit == SCALE_BU)
679 (void)snprintf(numbuf, sizeof(numbuf), "%.2f", v);
680 print_word(h, numbuf);
681 print_word(h, roffscales[su->unit]);
692 /* Accommodate for "well-formed" singleton escaping. */
694 if (HTML_AUTOCLOSE & htmltags[tag].flags)
699 if (tflags & HTML_NLBEGIN)
702 h->flags |= HTML_NOSPACE;
704 if (tflags & HTML_INDENT)
706 if (tflags & HTML_NOINDENT)
713 print_ctag(struct html *h, struct tag *tag)
718 * Remember to close out and nullify the current
719 * meta-font and table, if applicable.
726 tflags = htmltags[tag->tag].flags;
728 if (tflags & HTML_INDENT)
730 if (tflags & HTML_NOINDENT)
732 if (tflags & HTML_NLEND)
737 print_word(h, htmltags[tag->tag].name);
739 if (tflags & HTML_NLAFTER)
747 print_gen_decls(struct html *h)
749 print_word(h, "<!DOCTYPE html>");
754 print_text(struct html *h, const char *word)
756 if (h->col && (h->flags & HTML_NOSPACE) == 0) {
757 if ( ! (HTML_KEEP & h->flags)) {
758 if (HTML_PREKEEP & h->flags)
759 h->flags |= HTML_KEEP;
762 print_word(h, " ");
765 assert(NULL == h->metaf);
767 case HTMLFONT_ITALIC:
768 h->metaf = print_otag(h, TAG_I, "");
771 h->metaf = print_otag(h, TAG_B, "");
774 h->metaf = print_otag(h, TAG_B, "");
775 print_otag(h, TAG_I, "");
783 if ( ! print_encode(h, word, NULL, 0)) {
784 if ( ! (h->flags & HTML_NONOSPACE))
785 h->flags &= ~HTML_NOSPACE;
786 h->flags &= ~HTML_NONEWLINE;
788 h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
791 print_tagq(h, h->metaf);
795 h->flags &= ~HTML_IGNDELIM;
799 print_tagq(struct html *h, const struct tag *until)
803 while ((tag = h->tag) != NULL) {
805 if (until && tag == until)
811 print_stagq(struct html *h, const struct tag *suntil)
815 while ((tag = h->tag) != NULL) {
816 if (suntil && tag == suntil)
823 print_paragraph(struct html *h)
827 t = print_otag(h, TAG_DIV, "c", "Pp");
832 /***********************************************************************
833 * Low level output functions.
834 * They implement line breaking using a short static buffer.
835 ***********************************************************************/
838 * Buffer one HTML output byte.
839 * If the buffer is full, flush and deactivate it and start a new line.
840 * If the buffer is inactive, print directly.
843 print_byte(struct html *h, char c)
845 if ((h->flags & HTML_BUFFER) == 0) {
851 if (h->col + h->bufcol < sizeof(h->buf)) {
852 h->buf[h->bufcol++] = c;
861 fwrite(h->buf, h->bufcol, 1, stdout);
863 h->col = (h->indent + 1) * 2 + h->bufcol + 1;
865 h->flags &= ~HTML_BUFFER;
869 * If something was printed on the current output line, end it.
870 * Not to be called right after print_indent().
873 print_endline(struct html *h)
880 fwrite(h->buf, h->bufcol, 1, stdout);
885 h->flags |= HTML_NOSPACE;
886 h->flags &= ~HTML_BUFFER;
890 * Flush the HTML output buffer.
891 * If it is inactive, activate it.
894 print_endword(struct html *h)
901 if ((h->flags & HTML_BUFFER) == 0) {
903 h->flags |= HTML_BUFFER;
904 } else if (h->bufcol) {
906 fwrite(h->buf, h->bufcol, 1, stdout);
907 h->col += h->bufcol + 1;
913 * If at the beginning of a new output line,
914 * perform indentation and mark the line as containing output.
915 * Make sure to really produce some output right afterwards,
916 * but do not use print_otag() for producing it.
919 print_indent(struct html *h)
926 if (h->noindent == 0) {
927 h->col = h->indent * 2;
928 for (i = 0; i < h->col; i++)
931 h->flags &= ~HTML_NOSPACE;
935 * Print or buffer some characters
936 * depending on the current HTML output buffer state.
939 print_word(struct html *h, const char *cp)
942 print_byte(h, *cp++);
946 * Calculate the scaling unit passed in a `-width' argument. This uses
947 * either a native scaling unit (e.g., 1i, 2m) or the string length of
951 a2width(const char *p, struct roffsu *su)
955 end = a2roffsu(p, su, SCALE_MAX);
956 if (end == NULL || *end != '\0') {
958 su->scale = html_strlen(p);
959 } else if (su->scale < 0.0)