1 /* $Id: html.c,v 1.219 2017/07/15 17:57:51 schwarze Exp $ */
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include <sys/types.h>
31 #include "mandoc_aux.h"
42 #define HTML_NOSTACK (1 << 0)
43 #define HTML_AUTOCLOSE (1 << 1)
44 #define HTML_NLBEFORE (1 << 2)
45 #define HTML_NLBEGIN (1 << 3)
46 #define HTML_NLEND (1 << 4)
47 #define HTML_NLAFTER (1 << 5)
48 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
49 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
50 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
51 #define HTML_INDENT (1 << 6)
52 #define HTML_NOINDENT (1 << 7)
55 static const struct htmldata htmltags[TAG_MAX] = {
57 {"head", HTML_NLALL | HTML_INDENT},
59 {"meta", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
60 {"title", HTML_NLAROUND},
61 {"div", HTML_NLAROUND},
62 {"h1", HTML_NLAROUND},
63 {"h2", HTML_NLAROUND},
65 {"link", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
66 {"br", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
68 {"table", HTML_NLALL | HTML_INDENT},
69 {"colgroup", HTML_NLALL | HTML_INDENT},
70 {"col", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
71 {"tr", HTML_NLALL | HTML_INDENT},
72 {"td", HTML_NLAROUND},
73 {"li", HTML_NLAROUND | HTML_INDENT},
74 {"ul", HTML_NLALL | HTML_INDENT},
75 {"ol", HTML_NLALL | HTML_INDENT},
76 {"dl", HTML_NLALL | HTML_INDENT},
77 {"dt", HTML_NLAROUND},
78 {"dd", HTML_NLAROUND | HTML_INDENT},
79 {"pre", HTML_NLALL | HTML_NOINDENT},
86 {"style", HTML_NLALL | HTML_INDENT},
87 {"math", HTML_NLALL | HTML_INDENT},
106 static const char *const roffscales[SCALE_MAX] = {
119 static void a2width(const char *, struct roffsu *);
120 static void print_byte(struct html *, char);
121 static void print_endword(struct html *);
122 static void print_indent(struct html *);
123 static void print_word(struct html *, const char *);
125 static void print_ctag(struct html *, struct tag *);
126 static int print_escape(struct html *, char);
127 static int print_encode(struct html *, const char *, const char *, int);
128 static void print_href(struct html *, const char *, const char *, int);
129 static void print_metaf(struct html *, enum mandoc_esc);
133 html_alloc(const struct manoutput *outopts)
137 h = mandoc_calloc(1, sizeof(struct html));
140 h->style = outopts->style;
141 h->base_man = outopts->man;
142 h->base_includes = outopts->includes;
143 if (outopts->fragment)
144 h->oflags |= HTML_FRAGMENT;
155 h = (struct html *)p;
157 while ((tag = h->tag) != NULL) {
166 print_gen_head(struct html *h)
170 print_otag(h, TAG_META, "?", "charset", "utf-8");
173 * Print a default style-sheet.
176 t = print_otag(h, TAG_STYLE, "");
177 print_text(h, "table.head, table.foot { width: 100%; }");
179 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
181 print_text(h, "td.head-vol { text-align: center; }");
183 print_text(h, "div.Pp { margin: 1ex 0ex; }");
187 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
188 h->style, "type", "text/css", "media", "all");
192 print_metaf(struct html *h, enum mandoc_esc deco)
197 case ESCAPE_FONTPREV:
200 case ESCAPE_FONTITALIC:
201 font = HTMLFONT_ITALIC;
203 case ESCAPE_FONTBOLD:
204 font = HTMLFONT_BOLD;
210 case ESCAPE_FONTROMAN:
211 font = HTMLFONT_NONE;
218 print_tagq(h, h->metaf);
226 case HTMLFONT_ITALIC:
227 h->metaf = print_otag(h, TAG_I, "");
230 h->metaf = print_otag(h, TAG_B, "");
233 h->metaf = print_otag(h, TAG_B, "");
234 print_otag(h, TAG_I, "");
242 html_make_id(const struct roff_node *n)
244 const struct roff_node *nch;
247 for (nch = n->child; nch != NULL; nch = nch->next)
248 if (nch->type != ROFFT_TEXT)
254 /* http://www.w3.org/TR/html5/dom.html#the-id-attribute */
256 for (cp = buf; *cp != '\0'; cp++)
264 html_strlen(const char *cp)
270 * Account for escaped sequences within string length
271 * calculations. This follows the logic in term_strlen() as we
272 * must calculate the width of produced strings.
273 * Assume that characters are always width of "1". This is
274 * hacky, but it gets the job done for approximation of widths.
280 rsz = strcspn(cp, "\\");
292 switch (mandoc_escape(&cp, NULL, NULL)) {
296 case ESCAPE_NUMBERED:
298 case ESCAPE_OVERSTRIKE:
304 case ESCAPE_SKIPCHAR:
315 print_escape(struct html *h, char c)
320 print_word(h, "<");
323 print_word(h, ">");
326 print_word(h, "&");
329 print_word(h, """);
332 print_word(h, " ");
346 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
352 int c, len, breakline, nospace;
354 static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
355 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
358 pend = strchr(p, '\0');
364 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
365 h->flags &= ~HTML_SKIPCHAR;
370 for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
374 (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
375 t = print_otag(h, TAG_DIV, "");
376 print_text(h, "\\~");
379 while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
393 if (print_escape(h, *p++))
396 esc = mandoc_escape(&p, &seq, &len);
397 if (ESCAPE_ERROR == esc)
402 case ESCAPE_FONTPREV:
403 case ESCAPE_FONTBOLD:
404 case ESCAPE_FONTITALIC:
406 case ESCAPE_FONTROMAN:
410 case ESCAPE_SKIPCHAR:
411 h->flags |= HTML_SKIPCHAR;
417 if (h->flags & HTML_SKIPCHAR) {
418 h->flags &= ~HTML_SKIPCHAR;
424 /* Skip past "u" header. */
425 c = mchars_num2uc(seq + 1, len - 1);
427 case ESCAPE_NUMBERED:
428 c = mchars_num2char(seq, len);
433 c = mchars_spec2cp(seq, len);
444 case ESCAPE_OVERSTRIKE:
452 if ((c < 0x20 && c != 0x09) ||
453 (c > 0x7E && c < 0xA0))
456 (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
457 print_word(h, numbuf);
458 } else if (print_escape(h, c) == 0)
466 print_href(struct html *h, const char *name, const char *sec, int man)
470 pp = man ? h->base_man : h->base_includes;
471 while ((p = strchr(pp, '%')) != NULL) {
472 print_encode(h, pp, p, 1);
473 if (man && p[1] == 'S') {
477 print_encode(h, sec, NULL, 1);
478 } else if ((man && p[1] == 'N') ||
479 (man == 0 && p[1] == 'I'))
480 print_encode(h, name, NULL, 1);
482 print_encode(h, p, p + 2, 1);
486 print_encode(h, pp, NULL, 1);
490 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
493 struct roffsu mysu, *su;
499 int i, have_style, tflags;
501 tflags = htmltags[tag].flags;
503 /* Push this tag onto the stack of open scopes. */
505 if ((tflags & HTML_NOSTACK) == 0) {
506 t = mandoc_malloc(sizeof(struct tag));
513 if (tflags & HTML_NLBEFORE)
517 else if ((h->flags & HTML_NOSPACE) == 0) {
518 if (h->flags & HTML_KEEP)
519 print_word(h, " ");
521 if (h->flags & HTML_PREKEEP)
522 h->flags |= HTML_KEEP;
527 if ( ! (h->flags & HTML_NONOSPACE))
528 h->flags &= ~HTML_NOSPACE;
530 h->flags |= HTML_NOSPACE;
532 /* Print out the tag name and attributes. */
535 print_word(h, htmltags[tag].name);
540 while (*fmt != '\0') {
547 /* Parse a non-style attribute and its arguments. */
549 arg1 = va_arg(ap, char *);
562 arg1 = va_arg(ap, char *);
569 arg2 = va_arg(ap, char *);
573 /* Print the non-style attributes. */
581 print_href(h, arg1, NULL, 0);
585 print_href(h, arg1, arg2, 1);
590 print_encode(h, arg1, NULL, 1);
594 print_encode(h, arg1, NULL, 1);
595 print_word(h, "\" title=\"");
596 print_encode(h, arg1, NULL, 1);
600 print_encode(h, arg1, NULL, 1);
606 /* Print out styles. */
608 while (*fmt != '\0') {
612 /* First letter: input argument type. */
618 SCALE_HS_INIT(su, i);
621 arg1 = va_arg(ap, char *);
624 su = va_arg(ap, struct roffsu *);
627 if ((arg2 = va_arg(ap, char *)) != NULL) {
632 if (su != NULL && su->unit == SCALE_EN &&
633 su->scale > 5.9 && su->scale < 6.1)
639 /* Make even bold text fit. */
656 /* Second letter: style name. */
663 attr = "text-indent";
666 attr = "margin-left";
676 arg1 = va_arg(ap, char *);
681 if (su == NULL && arg1 == NULL)
685 print_word(h, " style=\"");
693 if (su->unit == SCALE_MM && (v /= 100.0) == 0.0)
695 else if (su->unit == SCALE_BU)
697 (void)snprintf(numbuf, sizeof(numbuf), "%.2f", v);
698 print_word(h, numbuf);
699 print_word(h, roffscales[su->unit]);
710 /* Accommodate for "well-formed" singleton escaping. */
712 if (HTML_AUTOCLOSE & htmltags[tag].flags)
717 if (tflags & HTML_NLBEGIN)
720 h->flags |= HTML_NOSPACE;
722 if (tflags & HTML_INDENT)
724 if (tflags & HTML_NOINDENT)
731 print_ctag(struct html *h, struct tag *tag)
736 * Remember to close out and nullify the current
737 * meta-font and table, if applicable.
744 tflags = htmltags[tag->tag].flags;
746 if (tflags & HTML_INDENT)
748 if (tflags & HTML_NOINDENT)
750 if (tflags & HTML_NLEND)
755 print_word(h, htmltags[tag->tag].name);
757 if (tflags & HTML_NLAFTER)
765 print_gen_decls(struct html *h)
767 print_word(h, "<!DOCTYPE html>");
772 print_text(struct html *h, const char *word)
774 if (h->col && (h->flags & HTML_NOSPACE) == 0) {
775 if ( ! (HTML_KEEP & h->flags)) {
776 if (HTML_PREKEEP & h->flags)
777 h->flags |= HTML_KEEP;
780 print_word(h, " ");
783 assert(NULL == h->metaf);
785 case HTMLFONT_ITALIC:
786 h->metaf = print_otag(h, TAG_I, "");
789 h->metaf = print_otag(h, TAG_B, "");
792 h->metaf = print_otag(h, TAG_B, "");
793 print_otag(h, TAG_I, "");
801 if ( ! print_encode(h, word, NULL, 0)) {
802 if ( ! (h->flags & HTML_NONOSPACE))
803 h->flags &= ~HTML_NOSPACE;
804 h->flags &= ~HTML_NONEWLINE;
806 h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
809 print_tagq(h, h->metaf);
813 h->flags &= ~HTML_IGNDELIM;
817 print_tagq(struct html *h, const struct tag *until)
821 while ((tag = h->tag) != NULL) {
823 if (until && tag == until)
829 print_stagq(struct html *h, const struct tag *suntil)
833 while ((tag = h->tag) != NULL) {
834 if (suntil && tag == suntil)
841 print_paragraph(struct html *h)
845 t = print_otag(h, TAG_DIV, "c", "Pp");
850 /***********************************************************************
851 * Low level output functions.
852 * They implement line breaking using a short static buffer.
853 ***********************************************************************/
856 * Buffer one HTML output byte.
857 * If the buffer is full, flush and deactivate it and start a new line.
858 * If the buffer is inactive, print directly.
861 print_byte(struct html *h, char c)
863 if ((h->flags & HTML_BUFFER) == 0) {
869 if (h->col + h->bufcol < sizeof(h->buf)) {
870 h->buf[h->bufcol++] = c;
879 fwrite(h->buf, h->bufcol, 1, stdout);
881 h->col = (h->indent + 1) * 2 + h->bufcol + 1;
883 h->flags &= ~HTML_BUFFER;
887 * If something was printed on the current output line, end it.
888 * Not to be called right after print_indent().
891 print_endline(struct html *h)
898 fwrite(h->buf, h->bufcol, 1, stdout);
903 h->flags |= HTML_NOSPACE;
904 h->flags &= ~HTML_BUFFER;
908 * Flush the HTML output buffer.
909 * If it is inactive, activate it.
912 print_endword(struct html *h)
919 if ((h->flags & HTML_BUFFER) == 0) {
921 h->flags |= HTML_BUFFER;
922 } else if (h->bufcol) {
924 fwrite(h->buf, h->bufcol, 1, stdout);
925 h->col += h->bufcol + 1;
931 * If at the beginning of a new output line,
932 * perform indentation and mark the line as containing output.
933 * Make sure to really produce some output right afterwards,
934 * but do not use print_otag() for producing it.
937 print_indent(struct html *h)
944 if (h->noindent == 0) {
945 h->col = h->indent * 2;
946 for (i = 0; i < h->col; i++)
949 h->flags &= ~HTML_NOSPACE;
953 * Print or buffer some characters
954 * depending on the current HTML output buffer state.
957 print_word(struct html *h, const char *cp)
960 print_byte(h, *cp++);
964 * Calculate the scaling unit passed in a `-width' argument. This uses
965 * either a native scaling unit (e.g., 1i, 2m) or the string length of
969 a2width(const char *p, struct roffsu *su)
973 end = a2roffsu(p, su, SCALE_MAX);
974 if (end == NULL || *end != '\0') {
976 su->scale = html_strlen(p);
977 } else if (su->scale < 0.0)