1 /* $Id: html.c,v 1.238 2018/06/25 16:54:59 schwarze Exp $ */
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include <sys/types.h>
32 #include "mandoc_aux.h"
33 #include "mandoc_ohash.h"
44 #define HTML_NOSTACK (1 << 0)
45 #define HTML_AUTOCLOSE (1 << 1)
46 #define HTML_NLBEFORE (1 << 2)
47 #define HTML_NLBEGIN (1 << 3)
48 #define HTML_NLEND (1 << 4)
49 #define HTML_NLAFTER (1 << 5)
50 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
51 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
52 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
53 #define HTML_INDENT (1 << 6)
54 #define HTML_NOINDENT (1 << 7)
57 static const struct htmldata htmltags[TAG_MAX] = {
59 {"head", HTML_NLALL | HTML_INDENT},
61 {"meta", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
62 {"title", HTML_NLAROUND},
63 {"div", HTML_NLAROUND},
65 {"h1", HTML_NLAROUND},
66 {"h2", HTML_NLAROUND},
68 {"link", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
69 {"br", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
71 {"table", HTML_NLALL | HTML_INDENT},
72 {"tr", HTML_NLALL | HTML_INDENT},
73 {"td", HTML_NLAROUND},
74 {"li", HTML_NLAROUND | HTML_INDENT},
75 {"ul", HTML_NLALL | HTML_INDENT},
76 {"ol", HTML_NLALL | HTML_INDENT},
77 {"dl", HTML_NLALL | HTML_INDENT},
78 {"dt", HTML_NLAROUND},
79 {"dd", HTML_NLAROUND | HTML_INDENT},
80 {"pre", HTML_NLALL | HTML_NOINDENT},
87 {"style", HTML_NLALL | HTML_INDENT},
88 {"math", HTML_NLALL | HTML_INDENT},
107 /* Avoid duplicate HTML id= attributes. */
108 static struct ohash id_unique;
110 static void print_byte(struct html *, char);
111 static void print_endword(struct html *);
112 static void print_indent(struct html *);
113 static void print_word(struct html *, const char *);
115 static void print_ctag(struct html *, struct tag *);
116 static int print_escape(struct html *, char);
117 static int print_encode(struct html *, const char *, const char *, int);
118 static void print_href(struct html *, const char *, const char *, int);
119 static void print_metaf(struct html *, enum mandoc_esc);
123 html_alloc(const struct manoutput *outopts)
127 h = mandoc_calloc(1, sizeof(struct html));
130 h->style = outopts->style;
131 h->base_man = outopts->man;
132 h->base_includes = outopts->includes;
133 if (outopts->fragment)
134 h->oflags |= HTML_FRAGMENT;
136 mandoc_ohash_init(&id_unique, 4, 0);
149 h = (struct html *)p;
150 while ((tag = h->tag) != NULL) {
156 cp = ohash_first(&id_unique, &slot);
159 cp = ohash_next(&id_unique, &slot);
161 ohash_delete(&id_unique);
165 print_gen_head(struct html *h)
169 print_otag(h, TAG_META, "?", "charset", "utf-8");
170 if (h->style != NULL) {
171 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
172 h->style, "type", "text/css", "media", "all");
177 * Print a minimal embedded style sheet.
180 t = print_otag(h, TAG_STYLE, "");
181 print_text(h, "table.head, table.foot { width: 100%; }");
183 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
185 print_text(h, "td.head-vol { text-align: center; }");
187 print_text(h, "div.Pp { margin: 1ex 0ex; }");
189 print_text(h, "div.Nd, div.Bf, div.Op { display: inline; }");
191 print_text(h, "span.Pa, span.Ad { font-style: italic; }");
193 print_text(h, "span.Ms { font-weight: bold; }");
195 print_text(h, "dl.Bl-diag ");
197 print_text(h, " dt { font-weight: bold; }");
199 print_text(h, "code.Nm, code.Fl, code.Cm, code.Ic, "
200 "code.In, code.Fd, code.Fn,");
202 print_text(h, "code.Cd { font-weight: bold; "
203 "font-family: inherit; }");
208 print_metaf(struct html *h, enum mandoc_esc deco)
213 case ESCAPE_FONTPREV:
216 case ESCAPE_FONTITALIC:
217 font = HTMLFONT_ITALIC;
219 case ESCAPE_FONTBOLD:
220 font = HTMLFONT_BOLD;
226 case ESCAPE_FONTROMAN:
227 font = HTMLFONT_NONE;
234 print_tagq(h, h->metaf);
242 case HTMLFONT_ITALIC:
243 h->metaf = print_otag(h, TAG_I, "");
246 h->metaf = print_otag(h, TAG_B, "");
249 h->metaf = print_otag(h, TAG_B, "");
250 print_otag(h, TAG_I, "");
258 html_make_id(const struct roff_node *n, int unique)
260 const struct roff_node *nch;
261 char *buf, *bufs, *cp;
265 for (nch = n->child; nch != NULL; nch = nch->next)
266 if (nch->type != ROFFT_TEXT)
275 * In ID attributes, only use ASCII characters that are
276 * permitted in URL-fragment strings according to the
278 * https://url.spec.whatwg.org/#url-fragment-string
281 for (cp = buf; *cp != '\0'; cp++)
282 if (isalnum((unsigned char)*cp) == 0 &&
283 strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL)
289 /* Avoid duplicate HTML id= attributes. */
293 slot = ohash_qlookup(&id_unique, buf);
294 cp = ohash_find(&id_unique, slot);
298 if (++suffix > 127) {
302 mandoc_asprintf(&bufs, "%s_%d", buf, suffix);
303 slot = ohash_qlookup(&id_unique, bufs);
304 cp = ohash_find(&id_unique, slot);
309 ohash_insert(&id_unique, slot, buf);
314 print_escape(struct html *h, char c)
319 print_word(h, "<");
322 print_word(h, ">");
325 print_word(h, "&");
328 print_word(h, """);
331 print_word(h, " ");
345 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
351 int c, len, breakline, nospace;
353 static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
354 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
357 pend = strchr(p, '\0');
363 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
364 h->flags &= ~HTML_SKIPCHAR;
369 for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
373 (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
374 t = print_otag(h, TAG_DIV, "");
375 print_text(h, "\\~");
378 while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
392 if (print_escape(h, *p++))
395 esc = mandoc_escape(&p, &seq, &len);
396 if (ESCAPE_ERROR == esc)
401 case ESCAPE_FONTPREV:
402 case ESCAPE_FONTBOLD:
403 case ESCAPE_FONTITALIC:
405 case ESCAPE_FONTROMAN:
409 case ESCAPE_SKIPCHAR:
410 h->flags |= HTML_SKIPCHAR;
416 if (h->flags & HTML_SKIPCHAR) {
417 h->flags &= ~HTML_SKIPCHAR;
423 /* Skip past "u" header. */
424 c = mchars_num2uc(seq + 1, len - 1);
426 case ESCAPE_NUMBERED:
427 c = mchars_num2char(seq, len);
432 c = mchars_spec2cp(seq, len);
443 case ESCAPE_OVERSTRIKE:
451 if ((c < 0x20 && c != 0x09) ||
452 (c > 0x7E && c < 0xA0))
455 (void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
456 print_word(h, numbuf);
457 } else if (print_escape(h, c) == 0)
465 print_href(struct html *h, const char *name, const char *sec, int man)
469 pp = man ? h->base_man : h->base_includes;
470 while ((p = strchr(pp, '%')) != NULL) {
471 print_encode(h, pp, p, 1);
472 if (man && p[1] == 'S') {
476 print_encode(h, sec, NULL, 1);
477 } else if ((man && p[1] == 'N') ||
478 (man == 0 && p[1] == 'I'))
479 print_encode(h, name, NULL, 1);
481 print_encode(h, p, p + 2, 1);
485 print_encode(h, pp, NULL, 1);
489 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
497 tflags = htmltags[tag].flags;
499 /* Push this tag onto the stack of open scopes. */
501 if ((tflags & HTML_NOSTACK) == 0) {
502 t = mandoc_malloc(sizeof(struct tag));
509 if (tflags & HTML_NLBEFORE)
513 else if ((h->flags & HTML_NOSPACE) == 0) {
514 if (h->flags & HTML_KEEP)
515 print_word(h, " ");
517 if (h->flags & HTML_PREKEEP)
518 h->flags |= HTML_KEEP;
523 if ( ! (h->flags & HTML_NONOSPACE))
524 h->flags &= ~HTML_NOSPACE;
526 h->flags |= HTML_NOSPACE;
528 /* Print out the tag name and attributes. */
531 print_word(h, htmltags[tag].name);
535 while (*fmt != '\0') {
537 /* Parse attributes and arguments. */
539 arg1 = va_arg(ap, char *);
553 arg2 = va_arg(ap, char *);
557 arg1 = va_arg(ap, char *);
563 arg2 = va_arg(ap, char *);
567 /* Print the attributes. */
575 print_href(h, arg1, NULL, 0);
579 print_href(h, arg1, arg2, 1);
584 print_encode(h, arg1, NULL, 1);
588 print_encode(h, arg1, NULL, 1);
589 print_word(h, "\" title=\"");
590 print_encode(h, arg1, NULL, 1);
595 print_encode(h, arg1, NULL, 1);
609 /* Accommodate for "well-formed" singleton escaping. */
611 if (HTML_AUTOCLOSE & htmltags[tag].flags)
616 if (tflags & HTML_NLBEGIN)
619 h->flags |= HTML_NOSPACE;
621 if (tflags & HTML_INDENT)
623 if (tflags & HTML_NOINDENT)
630 print_ctag(struct html *h, struct tag *tag)
635 * Remember to close out and nullify the current
636 * meta-font and table, if applicable.
643 tflags = htmltags[tag->tag].flags;
645 if (tflags & HTML_INDENT)
647 if (tflags & HTML_NOINDENT)
649 if (tflags & HTML_NLEND)
654 print_word(h, htmltags[tag->tag].name);
656 if (tflags & HTML_NLAFTER)
664 print_gen_decls(struct html *h)
666 print_word(h, "<!DOCTYPE html>");
671 print_gen_comment(struct html *h, struct roff_node *n)
675 print_word(h, "<!-- This is an automatically generated file."
679 while (n != NULL && n->type == ROFFT_COMMENT) {
680 if (strstr(n->string, "-->") == NULL &&
681 (wantblank || *n->string != '\0')) {
684 print_word(h, n->string);
685 wantblank = *n->string != '\0';
691 print_word(h, " -->");
697 print_text(struct html *h, const char *word)
699 if (h->col && (h->flags & HTML_NOSPACE) == 0) {
700 if ( ! (HTML_KEEP & h->flags)) {
701 if (HTML_PREKEEP & h->flags)
702 h->flags |= HTML_KEEP;
705 print_word(h, " ");
708 assert(NULL == h->metaf);
710 case HTMLFONT_ITALIC:
711 h->metaf = print_otag(h, TAG_I, "");
714 h->metaf = print_otag(h, TAG_B, "");
717 h->metaf = print_otag(h, TAG_B, "");
718 print_otag(h, TAG_I, "");
726 if ( ! print_encode(h, word, NULL, 0)) {
727 if ( ! (h->flags & HTML_NONOSPACE))
728 h->flags &= ~HTML_NOSPACE;
729 h->flags &= ~HTML_NONEWLINE;
731 h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
734 print_tagq(h, h->metaf);
738 h->flags &= ~HTML_IGNDELIM;
742 print_tagq(struct html *h, const struct tag *until)
746 while ((tag = h->tag) != NULL) {
748 if (until && tag == until)
754 print_stagq(struct html *h, const struct tag *suntil)
758 while ((tag = h->tag) != NULL) {
759 if (suntil && tag == suntil)
766 print_paragraph(struct html *h)
770 t = print_otag(h, TAG_DIV, "c", "Pp");
775 /***********************************************************************
776 * Low level output functions.
777 * They implement line breaking using a short static buffer.
778 ***********************************************************************/
781 * Buffer one HTML output byte.
782 * If the buffer is full, flush and deactivate it and start a new line.
783 * If the buffer is inactive, print directly.
786 print_byte(struct html *h, char c)
788 if ((h->flags & HTML_BUFFER) == 0) {
794 if (h->col + h->bufcol < sizeof(h->buf)) {
795 h->buf[h->bufcol++] = c;
804 fwrite(h->buf, h->bufcol, 1, stdout);
806 h->col = (h->indent + 1) * 2 + h->bufcol + 1;
808 h->flags &= ~HTML_BUFFER;
812 * If something was printed on the current output line, end it.
813 * Not to be called right after print_indent().
816 print_endline(struct html *h)
823 fwrite(h->buf, h->bufcol, 1, stdout);
828 h->flags |= HTML_NOSPACE;
829 h->flags &= ~HTML_BUFFER;
833 * Flush the HTML output buffer.
834 * If it is inactive, activate it.
837 print_endword(struct html *h)
844 if ((h->flags & HTML_BUFFER) == 0) {
846 h->flags |= HTML_BUFFER;
847 } else if (h->bufcol) {
849 fwrite(h->buf, h->bufcol, 1, stdout);
850 h->col += h->bufcol + 1;
856 * If at the beginning of a new output line,
857 * perform indentation and mark the line as containing output.
858 * Make sure to really produce some output right afterwards,
859 * but do not use print_otag() for producing it.
862 print_indent(struct html *h)
869 if (h->noindent == 0) {
870 h->col = h->indent * 2;
871 for (i = 0; i < h->col; i++)
874 h->flags &= ~HTML_NOSPACE;
878 * Print or buffer some characters
879 * depending on the current HTML output buffer state.
882 print_word(struct html *h, const char *cp)
885 print_byte(h, *cp++);