1 /* $Id: html.c,v 1.207 2017/02/05 20:22:04 schwarze Exp $ */
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include <sys/types.h>
32 #include "mandoc_aux.h"
41 #define HTML_NOSTACK (1 << 0)
42 #define HTML_AUTOCLOSE (1 << 1)
43 #define HTML_NLBEFORE (1 << 2)
44 #define HTML_NLBEGIN (1 << 3)
45 #define HTML_NLEND (1 << 4)
46 #define HTML_NLAFTER (1 << 5)
47 #define HTML_NLAROUND (HTML_NLBEFORE | HTML_NLAFTER)
48 #define HTML_NLINSIDE (HTML_NLBEGIN | HTML_NLEND)
49 #define HTML_NLALL (HTML_NLAROUND | HTML_NLINSIDE)
50 #define HTML_INDENT (1 << 6)
51 #define HTML_NOINDENT (1 << 7)
54 static const struct htmldata htmltags[TAG_MAX] = {
56 {"head", HTML_NLALL | HTML_INDENT},
58 {"meta", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
59 {"title", HTML_NLAROUND},
60 {"div", HTML_NLAROUND},
61 {"h1", HTML_NLAROUND},
62 {"h2", HTML_NLAROUND},
64 {"link", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
65 {"br", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
67 {"table", HTML_NLALL | HTML_INDENT},
68 {"colgroup", HTML_NLALL | HTML_INDENT},
69 {"col", HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
70 {"tr", HTML_NLALL | HTML_INDENT},
71 {"td", HTML_NLAROUND},
72 {"li", HTML_NLAROUND | HTML_INDENT},
73 {"ul", HTML_NLALL | HTML_INDENT},
74 {"ol", HTML_NLALL | HTML_INDENT},
75 {"dl", HTML_NLALL | HTML_INDENT},
76 {"dt", HTML_NLAROUND},
77 {"dd", HTML_NLAROUND | HTML_INDENT},
78 {"pre", HTML_NLALL | HTML_NOINDENT},
85 {"style", HTML_NLALL | HTML_INDENT},
86 {"math", HTML_NLALL | HTML_INDENT},
104 static const char *const roffscales[SCALE_MAX] = {
117 static void a2width(const char *, struct roffsu *);
118 static void print_byte(struct html *, char);
119 static void print_endword(struct html *);
120 static void print_indent(struct html *);
121 static void print_word(struct html *, const char *);
123 static void print_ctag(struct html *, struct tag *);
124 static int print_escape(struct html *, char);
125 static int print_encode(struct html *, const char *, const char *, int);
126 static void print_href(struct html *, const char *, const char *, int);
127 static void print_metaf(struct html *, enum mandoc_esc);
131 html_alloc(const struct manoutput *outopts)
135 h = mandoc_calloc(1, sizeof(struct html));
138 h->style = outopts->style;
139 h->base_man = outopts->man;
140 h->base_includes = outopts->includes;
141 if (outopts->fragment)
142 h->oflags |= HTML_FRAGMENT;
153 h = (struct html *)p;
155 while ((tag = h->tag) != NULL) {
164 print_gen_head(struct html *h)
168 print_otag(h, TAG_META, "?", "charset", "utf-8");
171 * Print a default style-sheet.
174 t = print_otag(h, TAG_STYLE, "");
175 print_text(h, "table.head, table.foot { width: 100%; }");
177 print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
179 print_text(h, "td.head-vol { text-align: center; }");
181 print_text(h, "div.Pp { margin: 1ex 0ex; }");
185 print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
186 h->style, "type", "text/css", "media", "all");
190 print_metaf(struct html *h, enum mandoc_esc deco)
195 case ESCAPE_FONTPREV:
198 case ESCAPE_FONTITALIC:
199 font = HTMLFONT_ITALIC;
201 case ESCAPE_FONTBOLD:
202 font = HTMLFONT_BOLD;
208 case ESCAPE_FONTROMAN:
209 font = HTMLFONT_NONE;
216 print_tagq(h, h->metaf);
224 case HTMLFONT_ITALIC:
225 h->metaf = print_otag(h, TAG_I, "");
228 h->metaf = print_otag(h, TAG_B, "");
231 h->metaf = print_otag(h, TAG_B, "");
232 print_otag(h, TAG_I, "");
240 html_strlen(const char *cp)
246 * Account for escaped sequences within string length
247 * calculations. This follows the logic in term_strlen() as we
248 * must calculate the width of produced strings.
249 * Assume that characters are always width of "1". This is
250 * hacky, but it gets the job done for approximation of widths.
256 rsz = strcspn(cp, "\\");
268 switch (mandoc_escape(&cp, NULL, NULL)) {
272 case ESCAPE_NUMBERED:
274 case ESCAPE_OVERSTRIKE:
280 case ESCAPE_SKIPCHAR:
291 print_escape(struct html *h, char c)
296 print_word(h, "<");
299 print_word(h, ">");
302 print_word(h, "&");
305 print_word(h, """);
308 print_word(h, " ");
322 print_encode(struct html *h, const char *p, const char *pend, int norecurse)
329 static const char rejs[9] = { '\\', '<', '>', '&', '"',
330 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
333 pend = strchr(p, '\0');
338 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
339 h->flags &= ~HTML_SKIPCHAR;
344 for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
353 if (print_escape(h, *p++))
356 esc = mandoc_escape(&p, &seq, &len);
357 if (ESCAPE_ERROR == esc)
362 case ESCAPE_FONTPREV:
363 case ESCAPE_FONTBOLD:
364 case ESCAPE_FONTITALIC:
366 case ESCAPE_FONTROMAN:
370 case ESCAPE_SKIPCHAR:
371 h->flags |= HTML_SKIPCHAR;
377 if (h->flags & HTML_SKIPCHAR) {
378 h->flags &= ~HTML_SKIPCHAR;
384 /* Skip past "u" header. */
385 c = mchars_num2uc(seq + 1, len - 1);
387 case ESCAPE_NUMBERED:
388 c = mchars_num2char(seq, len);
393 c = mchars_spec2cp(seq, len);
401 case ESCAPE_OVERSTRIKE:
409 if ((c < 0x20 && c != 0x09) ||
410 (c > 0x7E && c < 0xA0))
413 (void)snprintf(numbuf, sizeof(numbuf), "&#%d;", c);
414 print_word(h, numbuf);
415 } else if (print_escape(h, c) == 0)
423 print_href(struct html *h, const char *name, const char *sec, int man)
427 pp = man ? h->base_man : h->base_includes;
428 while ((p = strchr(pp, '%')) != NULL) {
429 print_encode(h, pp, p, 1);
430 if (man && p[1] == 'S') {
434 print_encode(h, sec, NULL, 1);
435 } else if ((man && p[1] == 'N') ||
436 (man == 0 && p[1] == 'I'))
437 print_encode(h, name, NULL, 1);
439 print_encode(h, p, p + 2, 1);
443 print_encode(h, pp, NULL, 1);
447 print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
450 struct roffsu mysu, *su;
456 int i, have_style, tflags;
458 tflags = htmltags[tag].flags;
460 /* Push this tag onto the stack of open scopes. */
462 if ((tflags & HTML_NOSTACK) == 0) {
463 t = mandoc_malloc(sizeof(struct tag));
470 if (tflags & HTML_NLBEFORE)
474 else if ((h->flags & HTML_NOSPACE) == 0) {
475 if (h->flags & HTML_KEEP)
476 print_word(h, " ");
478 if (h->flags & HTML_PREKEEP)
479 h->flags |= HTML_KEEP;
484 if ( ! (h->flags & HTML_NONOSPACE))
485 h->flags &= ~HTML_NOSPACE;
487 h->flags |= HTML_NOSPACE;
489 /* Print out the tag name and attributes. */
492 print_word(h, htmltags[tag].name);
497 while (*fmt != '\0') {
504 /* Parse a non-style attribute and its arguments. */
506 arg1 = va_arg(ap, char *);
519 arg1 = va_arg(ap, char *);
526 arg2 = va_arg(ap, char *);
530 /* Print the non-style attributes. */
538 print_href(h, arg1, arg2, 1);
542 print_href(h, arg1, NULL, 0);
550 print_encode(h, arg1, NULL, 1);
556 /* Print out styles. */
558 while (*fmt != '\0') {
562 /* First letter: input argument type. */
568 SCALE_HS_INIT(su, i);
571 arg1 = va_arg(ap, char *);
574 su = va_arg(ap, struct roffsu *);
579 SCALE_VS_INIT(su, i);
583 if ((arg2 = va_arg(ap, char *)) == NULL)
594 /* Second letter: style name. */
598 attr = "margin-bottom";
604 attr = "text-indent";
607 attr = "margin-left";
620 arg1 = va_arg(ap, char *);
625 if (su == NULL && arg1 == NULL)
629 print_word(h, " style=\"");
637 if (su->unit == SCALE_MM && (v /= 100.0) == 0.0)
639 else if (su->unit == SCALE_BU)
641 (void)snprintf(numbuf, sizeof(numbuf), "%.2f", v);
642 print_word(h, numbuf);
643 print_word(h, roffscales[su->unit]);
654 /* Accommodate for "well-formed" singleton escaping. */
656 if (HTML_AUTOCLOSE & htmltags[tag].flags)
661 if (tflags & HTML_NLBEGIN)
664 h->flags |= HTML_NOSPACE;
666 if (tflags & HTML_INDENT)
668 if (tflags & HTML_NOINDENT)
675 print_ctag(struct html *h, struct tag *tag)
680 * Remember to close out and nullify the current
681 * meta-font and table, if applicable.
688 tflags = htmltags[tag->tag].flags;
690 if (tflags & HTML_INDENT)
692 if (tflags & HTML_NOINDENT)
694 if (tflags & HTML_NLEND)
699 print_word(h, htmltags[tag->tag].name);
701 if (tflags & HTML_NLAFTER)
709 print_gen_decls(struct html *h)
711 print_word(h, "<!DOCTYPE html>");
716 print_text(struct html *h, const char *word)
718 if (h->col && (h->flags & HTML_NOSPACE) == 0) {
719 if ( ! (HTML_KEEP & h->flags)) {
720 if (HTML_PREKEEP & h->flags)
721 h->flags |= HTML_KEEP;
724 print_word(h, " ");
727 assert(NULL == h->metaf);
729 case HTMLFONT_ITALIC:
730 h->metaf = print_otag(h, TAG_I, "");
733 h->metaf = print_otag(h, TAG_B, "");
736 h->metaf = print_otag(h, TAG_B, "");
737 print_otag(h, TAG_I, "");
745 if ( ! print_encode(h, word, NULL, 0)) {
746 if ( ! (h->flags & HTML_NONOSPACE))
747 h->flags &= ~HTML_NOSPACE;
748 h->flags &= ~HTML_NONEWLINE;
750 h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
753 print_tagq(h, h->metaf);
757 h->flags &= ~HTML_IGNDELIM;
761 print_tagq(struct html *h, const struct tag *until)
765 while ((tag = h->tag) != NULL) {
767 if (until && tag == until)
773 print_stagq(struct html *h, const struct tag *suntil)
777 while ((tag = h->tag) != NULL) {
778 if (suntil && tag == suntil)
785 print_paragraph(struct html *h)
789 t = print_otag(h, TAG_DIV, "c", "Pp");
794 /***********************************************************************
795 * Low level output functions.
796 * They implement line breaking using a short static buffer.
797 ***********************************************************************/
800 * Buffer one HTML output byte.
801 * If the buffer is full, flush and deactivate it and start a new line.
802 * If the buffer is inactive, print directly.
805 print_byte(struct html *h, char c)
807 if ((h->flags & HTML_BUFFER) == 0) {
813 if (h->col + h->bufcol < sizeof(h->buf)) {
814 h->buf[h->bufcol++] = c;
823 fwrite(h->buf, h->bufcol, 1, stdout);
825 h->col = (h->indent + 1) * 2 + h->bufcol + 1;
827 h->flags &= ~HTML_BUFFER;
831 * If something was printed on the current output line, end it.
832 * Not to be called right after print_indent().
835 print_endline(struct html *h)
842 fwrite(h->buf, h->bufcol, 1, stdout);
847 h->flags |= HTML_NOSPACE;
848 h->flags &= ~HTML_BUFFER;
852 * Flush the HTML output buffer.
853 * If it is inactive, activate it.
856 print_endword(struct html *h)
863 if ((h->flags & HTML_BUFFER) == 0) {
865 h->flags |= HTML_BUFFER;
866 } else if (h->bufcol) {
868 fwrite(h->buf, h->bufcol, 1, stdout);
869 h->col += h->bufcol + 1;
875 * If at the beginning of a new output line,
876 * perform indentation and mark the line as containing output.
877 * Make sure to really produce some output right afterwards,
878 * but do not use print_otag() for producing it.
881 print_indent(struct html *h)
888 if (h->noindent == 0) {
889 h->col = h->indent * 2;
890 for (i = 0; i < h->col; i++)
893 h->flags &= ~HTML_NOSPACE;
897 * Print or buffer some characters
898 * depending on the current HTML output buffer state.
901 print_word(struct html *h, const char *cp)
904 print_byte(h, *cp++);
908 * Calculate the scaling unit passed in a `-width' argument. This uses
909 * either a native scaling unit (e.g., 1i, 2m) or the string length of
913 a2width(const char *p, struct roffsu *su)
915 if (a2roffsu(p, su, SCALE_MAX) < 2) {
917 su->scale = html_strlen(p);
918 } else if (su->scale < 0.0)