]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/mdocml/html.c
Merge ^/head r275387 through r275477.
[FreeBSD/FreeBSD.git] / contrib / mdocml / html.c
1 /*      $Id: html.c,v 1.181 2014/10/29 00:17:43 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2011, 2012, 2013, 2014 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdarg.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30
31 #include "mandoc.h"
32 #include "mandoc_aux.h"
33 #include "libmandoc.h"
34 #include "out.h"
35 #include "html.h"
36 #include "main.h"
37
38 struct  htmldata {
39         const char       *name;
40         int               flags;
41 #define HTML_CLRLINE     (1 << 0)
42 #define HTML_NOSTACK     (1 << 1)
43 #define HTML_AUTOCLOSE   (1 << 2) /* Tag has auto-closure. */
44 };
45
46 static  const struct htmldata htmltags[TAG_MAX] = {
47         {"html",        HTML_CLRLINE}, /* TAG_HTML */
48         {"head",        HTML_CLRLINE}, /* TAG_HEAD */
49         {"body",        HTML_CLRLINE}, /* TAG_BODY */
50         {"meta",        HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_META */
51         {"title",       HTML_CLRLINE}, /* TAG_TITLE */
52         {"div",         HTML_CLRLINE}, /* TAG_DIV */
53         {"h1",          0}, /* TAG_H1 */
54         {"h2",          0}, /* TAG_H2 */
55         {"span",        0}, /* TAG_SPAN */
56         {"link",        HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_LINK */
57         {"br",          HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_BR */
58         {"a",           0}, /* TAG_A */
59         {"table",       HTML_CLRLINE}, /* TAG_TABLE */
60         {"tbody",       HTML_CLRLINE}, /* TAG_TBODY */
61         {"col",         HTML_CLRLINE | HTML_NOSTACK | HTML_AUTOCLOSE}, /* TAG_COL */
62         {"tr",          HTML_CLRLINE}, /* TAG_TR */
63         {"td",          HTML_CLRLINE}, /* TAG_TD */
64         {"li",          HTML_CLRLINE}, /* TAG_LI */
65         {"ul",          HTML_CLRLINE}, /* TAG_UL */
66         {"ol",          HTML_CLRLINE}, /* TAG_OL */
67         {"dl",          HTML_CLRLINE}, /* TAG_DL */
68         {"dt",          HTML_CLRLINE}, /* TAG_DT */
69         {"dd",          HTML_CLRLINE}, /* TAG_DD */
70         {"blockquote",  HTML_CLRLINE}, /* TAG_BLOCKQUOTE */
71         {"pre",         HTML_CLRLINE }, /* TAG_PRE */
72         {"b",           0 }, /* TAG_B */
73         {"i",           0 }, /* TAG_I */
74         {"code",        0 }, /* TAG_CODE */
75         {"small",       0 }, /* TAG_SMALL */
76         {"style",       HTML_CLRLINE}, /* TAG_STYLE */
77         {"math",        HTML_CLRLINE}, /* TAG_MATH */
78         {"mrow",        0}, /* TAG_MROW */
79         {"mi",          0}, /* TAG_MI */
80         {"mo",          0}, /* TAG_MO */
81         {"msup",        0}, /* TAG_MSUP */
82         {"msub",        0}, /* TAG_MSUB */
83         {"msubsup",     0}, /* TAG_MSUBSUP */
84         {"mfrac",       0}, /* TAG_MFRAC */
85         {"msqrt",       0}, /* TAG_MSQRT */
86         {"mfenced",     0}, /* TAG_MFENCED */
87         {"mtable",      0}, /* TAG_MTABLE */
88         {"mtr",         0}, /* TAG_MTR */
89         {"mtd",         0}, /* TAG_MTD */
90         {"munderover",  0}, /* TAG_MUNDEROVER */
91         {"munder",      0}, /* TAG_MUNDER*/
92         {"mover",       0}, /* TAG_MOVER*/
93 };
94
95 static  const char      *const htmlattrs[ATTR_MAX] = {
96         "name", /* ATTR_NAME */
97         "rel", /* ATTR_REL */
98         "href", /* ATTR_HREF */
99         "type", /* ATTR_TYPE */
100         "media", /* ATTR_MEDIA */
101         "class", /* ATTR_CLASS */
102         "style", /* ATTR_STYLE */
103         "id", /* ATTR_ID */
104         "colspan", /* ATTR_COLSPAN */
105         "charset", /* ATTR_CHARSET */
106         "open", /* ATTR_OPEN */
107         "close", /* ATTR_CLOSE */
108         "mathvariant", /* ATTR_MATHVARIANT */
109 };
110
111 static  const char      *const roffscales[SCALE_MAX] = {
112         "cm", /* SCALE_CM */
113         "in", /* SCALE_IN */
114         "pc", /* SCALE_PC */
115         "pt", /* SCALE_PT */
116         "em", /* SCALE_EM */
117         "em", /* SCALE_MM */
118         "ex", /* SCALE_EN */
119         "ex", /* SCALE_BU */
120         "em", /* SCALE_VS */
121         "ex", /* SCALE_FS */
122 };
123
124 static  void     bufncat(struct html *, const char *, size_t);
125 static  void     print_ctag(struct html *, enum htmltag);
126 static  int      print_escape(char);
127 static  int      print_encode(struct html *, const char *, int);
128 static  void     print_metaf(struct html *, enum mandoc_esc);
129 static  void     print_attr(struct html *, const char *, const char *);
130
131
132 void *
133 html_alloc(const struct mchars *mchars, char *outopts)
134 {
135         struct html     *h;
136         const char      *toks[5];
137         char            *v;
138
139         toks[0] = "style";
140         toks[1] = "man";
141         toks[2] = "includes";
142         toks[3] = "fragment";
143         toks[4] = NULL;
144
145         h = mandoc_calloc(1, sizeof(struct html));
146
147         h->tags.head = NULL;
148         h->symtab = mchars;
149
150         while (outopts && *outopts)
151                 switch (getsubopt(&outopts, UNCONST(toks), &v)) {
152                 case 0:
153                         h->style = v;
154                         break;
155                 case 1:
156                         h->base_man = v;
157                         break;
158                 case 2:
159                         h->base_includes = v;
160                         break;
161                 case 3:
162                         h->oflags |= HTML_FRAGMENT;
163                         break;
164                 default:
165                         break;
166                 }
167
168         return(h);
169 }
170
171 void
172 html_free(void *p)
173 {
174         struct tag      *tag;
175         struct html     *h;
176
177         h = (struct html *)p;
178
179         while ((tag = h->tags.head) != NULL) {
180                 h->tags.head = tag->next;
181                 free(tag);
182         }
183
184         free(h);
185 }
186
187 void
188 print_gen_head(struct html *h)
189 {
190         struct htmlpair  tag[4];
191         struct tag      *t;
192
193         tag[0].key = ATTR_CHARSET;
194         tag[0].val = "utf-8";
195         print_otag(h, TAG_META, 1, tag);
196
197         /*
198          * Print a default style-sheet.
199          */
200         t = print_otag(h, TAG_STYLE, 0, NULL);
201         print_text(h, "table.head, table.foot { width: 100%; }\n"
202               "td.head-rtitle, td.foot-os { text-align: right; }\n"
203               "td.head-vol { text-align: center; }\n"
204               "table.foot td { width: 50%; }\n"
205               "table.head td { width: 33%; }\n"
206               "div.spacer { margin: 1em 0; }\n");
207         print_tagq(h, t);
208
209         if (h->style) {
210                 tag[0].key = ATTR_REL;
211                 tag[0].val = "stylesheet";
212                 tag[1].key = ATTR_HREF;
213                 tag[1].val = h->style;
214                 tag[2].key = ATTR_TYPE;
215                 tag[2].val = "text/css";
216                 tag[3].key = ATTR_MEDIA;
217                 tag[3].val = "all";
218                 print_otag(h, TAG_LINK, 4, tag);
219         }
220 }
221
222 static void
223 print_metaf(struct html *h, enum mandoc_esc deco)
224 {
225         enum htmlfont    font;
226
227         switch (deco) {
228         case ESCAPE_FONTPREV:
229                 font = h->metal;
230                 break;
231         case ESCAPE_FONTITALIC:
232                 font = HTMLFONT_ITALIC;
233                 break;
234         case ESCAPE_FONTBOLD:
235                 font = HTMLFONT_BOLD;
236                 break;
237         case ESCAPE_FONTBI:
238                 font = HTMLFONT_BI;
239                 break;
240         case ESCAPE_FONT:
241                 /* FALLTHROUGH */
242         case ESCAPE_FONTROMAN:
243                 font = HTMLFONT_NONE;
244                 break;
245         default:
246                 abort();
247                 /* NOTREACHED */
248         }
249
250         if (h->metaf) {
251                 print_tagq(h, h->metaf);
252                 h->metaf = NULL;
253         }
254
255         h->metal = h->metac;
256         h->metac = font;
257
258         switch (font) {
259         case HTMLFONT_ITALIC:
260                 h->metaf = print_otag(h, TAG_I, 0, NULL);
261                 break;
262         case HTMLFONT_BOLD:
263                 h->metaf = print_otag(h, TAG_B, 0, NULL);
264                 break;
265         case HTMLFONT_BI:
266                 h->metaf = print_otag(h, TAG_B, 0, NULL);
267                 print_otag(h, TAG_I, 0, NULL);
268                 break;
269         default:
270                 break;
271         }
272 }
273
274 int
275 html_strlen(const char *cp)
276 {
277         size_t           rsz;
278         int              skip, sz;
279
280         /*
281          * Account for escaped sequences within string length
282          * calculations.  This follows the logic in term_strlen() as we
283          * must calculate the width of produced strings.
284          * Assume that characters are always width of "1".  This is
285          * hacky, but it gets the job done for approximation of widths.
286          */
287
288         sz = 0;
289         skip = 0;
290         while (1) {
291                 rsz = strcspn(cp, "\\");
292                 if (rsz) {
293                         cp += rsz;
294                         if (skip) {
295                                 skip = 0;
296                                 rsz--;
297                         }
298                         sz += rsz;
299                 }
300                 if ('\0' == *cp)
301                         break;
302                 cp++;
303                 switch (mandoc_escape(&cp, NULL, NULL)) {
304                 case ESCAPE_ERROR:
305                         return(sz);
306                 case ESCAPE_UNICODE:
307                         /* FALLTHROUGH */
308                 case ESCAPE_NUMBERED:
309                         /* FALLTHROUGH */
310                 case ESCAPE_SPECIAL:
311                         if (skip)
312                                 skip = 0;
313                         else
314                                 sz++;
315                         break;
316                 case ESCAPE_SKIPCHAR:
317                         skip = 1;
318                         break;
319                 default:
320                         break;
321                 }
322         }
323         return(sz);
324 }
325
326 static int
327 print_escape(char c)
328 {
329
330         switch (c) {
331         case '<':
332                 printf("&lt;");
333                 break;
334         case '>':
335                 printf("&gt;");
336                 break;
337         case '&':
338                 printf("&amp;");
339                 break;
340         case '"':
341                 printf("&quot;");
342                 break;
343         case ASCII_NBRSP:
344                 putchar('-');
345                 break;
346         case ASCII_HYPH:
347                 putchar('-');
348                 /* FALLTHROUGH */
349         case ASCII_BREAK:
350                 break;
351         default:
352                 return(0);
353         }
354         return(1);
355 }
356
357 static int
358 print_encode(struct html *h, const char *p, int norecurse)
359 {
360         size_t           sz;
361         int              c, len, nospace;
362         const char      *seq;
363         enum mandoc_esc  esc;
364         static const char rejs[9] = { '\\', '<', '>', '&', '"',
365                 ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
366
367         nospace = 0;
368
369         while ('\0' != *p) {
370                 if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
371                         h->flags &= ~HTML_SKIPCHAR;
372                         p++;
373                         continue;
374                 }
375
376                 sz = strcspn(p, rejs);
377
378                 fwrite(p, 1, sz, stdout);
379                 p += (int)sz;
380
381                 if ('\0' == *p)
382                         break;
383
384                 if (print_escape(*p++))
385                         continue;
386
387                 esc = mandoc_escape(&p, &seq, &len);
388                 if (ESCAPE_ERROR == esc)
389                         break;
390
391                 switch (esc) {
392                 case ESCAPE_FONT:
393                         /* FALLTHROUGH */
394                 case ESCAPE_FONTPREV:
395                         /* FALLTHROUGH */
396                 case ESCAPE_FONTBOLD:
397                         /* FALLTHROUGH */
398                 case ESCAPE_FONTITALIC:
399                         /* FALLTHROUGH */
400                 case ESCAPE_FONTBI:
401                         /* FALLTHROUGH */
402                 case ESCAPE_FONTROMAN:
403                         if (0 == norecurse)
404                                 print_metaf(h, esc);
405                         continue;
406                 case ESCAPE_SKIPCHAR:
407                         h->flags |= HTML_SKIPCHAR;
408                         continue;
409                 default:
410                         break;
411                 }
412
413                 if (h->flags & HTML_SKIPCHAR) {
414                         h->flags &= ~HTML_SKIPCHAR;
415                         continue;
416                 }
417
418                 switch (esc) {
419                 case ESCAPE_UNICODE:
420                         /* Skip past "u" header. */
421                         c = mchars_num2uc(seq + 1, len - 1);
422                         break;
423                 case ESCAPE_NUMBERED:
424                         c = mchars_num2char(seq, len);
425                         if (c < 0)
426                                 continue;
427                         break;
428                 case ESCAPE_SPECIAL:
429                         c = mchars_spec2cp(h->symtab, seq, len);
430                         if (c <= 0)
431                                 continue;
432                         break;
433                 case ESCAPE_NOSPACE:
434                         if ('\0' == *p)
435                                 nospace = 1;
436                         continue;
437                 default:
438                         continue;
439                 }
440                 if ((c < 0x20 && c != 0x09) ||
441                     (c > 0x7E && c < 0xA0))
442                         c = 0xFFFD;
443                 if (c > 0x7E)
444                         printf("&#%d;", c);
445                 else if ( ! print_escape(c))
446                         putchar(c);
447         }
448
449         return(nospace);
450 }
451
452 static void
453 print_attr(struct html *h, const char *key, const char *val)
454 {
455         printf(" %s=\"", key);
456         (void)print_encode(h, val, 1);
457         putchar('\"');
458 }
459
460 struct tag *
461 print_otag(struct html *h, enum htmltag tag,
462                 int sz, const struct htmlpair *p)
463 {
464         int              i;
465         struct tag      *t;
466
467         /* Push this tags onto the stack of open scopes. */
468
469         if ( ! (HTML_NOSTACK & htmltags[tag].flags)) {
470                 t = mandoc_malloc(sizeof(struct tag));
471                 t->tag = tag;
472                 t->next = h->tags.head;
473                 h->tags.head = t;
474         } else
475                 t = NULL;
476
477         if ( ! (HTML_NOSPACE & h->flags))
478                 if ( ! (HTML_CLRLINE & htmltags[tag].flags)) {
479                         /* Manage keeps! */
480                         if ( ! (HTML_KEEP & h->flags)) {
481                                 if (HTML_PREKEEP & h->flags)
482                                         h->flags |= HTML_KEEP;
483                                 putchar(' ');
484                         } else
485                                 printf("&#160;");
486                 }
487
488         if ( ! (h->flags & HTML_NONOSPACE))
489                 h->flags &= ~HTML_NOSPACE;
490         else
491                 h->flags |= HTML_NOSPACE;
492
493         /* Print out the tag name and attributes. */
494
495         printf("<%s", htmltags[tag].name);
496         for (i = 0; i < sz; i++)
497                 print_attr(h, htmlattrs[p[i].key], p[i].val);
498
499         /* Accommodate for "well-formed" singleton escaping. */
500
501         if (HTML_AUTOCLOSE & htmltags[tag].flags)
502                 putchar('/');
503
504         putchar('>');
505
506         h->flags |= HTML_NOSPACE;
507
508         if ((HTML_AUTOCLOSE | HTML_CLRLINE) & htmltags[tag].flags)
509                 putchar('\n');
510
511         return(t);
512 }
513
514 static void
515 print_ctag(struct html *h, enum htmltag tag)
516 {
517
518         printf("</%s>", htmltags[tag].name);
519         if (HTML_CLRLINE & htmltags[tag].flags) {
520                 h->flags |= HTML_NOSPACE;
521                 putchar('\n');
522         }
523 }
524
525 void
526 print_gen_decls(struct html *h)
527 {
528
529         puts("<!DOCTYPE html>");
530 }
531
532 void
533 print_text(struct html *h, const char *word)
534 {
535
536         if ( ! (HTML_NOSPACE & h->flags)) {
537                 /* Manage keeps! */
538                 if ( ! (HTML_KEEP & h->flags)) {
539                         if (HTML_PREKEEP & h->flags)
540                                 h->flags |= HTML_KEEP;
541                         putchar(' ');
542                 } else
543                         printf("&#160;");
544         }
545
546         assert(NULL == h->metaf);
547         switch (h->metac) {
548         case HTMLFONT_ITALIC:
549                 h->metaf = print_otag(h, TAG_I, 0, NULL);
550                 break;
551         case HTMLFONT_BOLD:
552                 h->metaf = print_otag(h, TAG_B, 0, NULL);
553                 break;
554         case HTMLFONT_BI:
555                 h->metaf = print_otag(h, TAG_B, 0, NULL);
556                 print_otag(h, TAG_I, 0, NULL);
557                 break;
558         default:
559                 break;
560         }
561
562         assert(word);
563         if ( ! print_encode(h, word, 0)) {
564                 if ( ! (h->flags & HTML_NONOSPACE))
565                         h->flags &= ~HTML_NOSPACE;
566         } else
567                 h->flags |= HTML_NOSPACE;
568
569         if (h->metaf) {
570                 print_tagq(h, h->metaf);
571                 h->metaf = NULL;
572         }
573
574         h->flags &= ~HTML_IGNDELIM;
575 }
576
577 void
578 print_tagq(struct html *h, const struct tag *until)
579 {
580         struct tag      *tag;
581
582         while ((tag = h->tags.head) != NULL) {
583                 /*
584                  * Remember to close out and nullify the current
585                  * meta-font and table, if applicable.
586                  */
587                 if (tag == h->metaf)
588                         h->metaf = NULL;
589                 if (tag == h->tblt)
590                         h->tblt = NULL;
591                 print_ctag(h, tag->tag);
592                 h->tags.head = tag->next;
593                 free(tag);
594                 if (until && tag == until)
595                         return;
596         }
597 }
598
599 void
600 print_stagq(struct html *h, const struct tag *suntil)
601 {
602         struct tag      *tag;
603
604         while ((tag = h->tags.head) != NULL) {
605                 if (suntil && tag == suntil)
606                         return;
607                 /*
608                  * Remember to close out and nullify the current
609                  * meta-font and table, if applicable.
610                  */
611                 if (tag == h->metaf)
612                         h->metaf = NULL;
613                 if (tag == h->tblt)
614                         h->tblt = NULL;
615                 print_ctag(h, tag->tag);
616                 h->tags.head = tag->next;
617                 free(tag);
618         }
619 }
620
621 void
622 print_paragraph(struct html *h)
623 {
624         struct tag      *t;
625         struct htmlpair  tag;
626
627         PAIR_CLASS_INIT(&tag, "spacer");
628         t = print_otag(h, TAG_DIV, 1, &tag);
629         print_tagq(h, t);
630 }
631
632
633 void
634 bufinit(struct html *h)
635 {
636
637         h->buf[0] = '\0';
638         h->buflen = 0;
639 }
640
641 void
642 bufcat_style(struct html *h, const char *key, const char *val)
643 {
644
645         bufcat(h, key);
646         bufcat(h, ":");
647         bufcat(h, val);
648         bufcat(h, ";");
649 }
650
651 void
652 bufcat(struct html *h, const char *p)
653 {
654
655         /*
656          * XXX This is broken and not easy to fix.
657          * When using the -Oincludes option, buffmt_includes()
658          * may pass in strings overrunning BUFSIZ, causing a crash.
659          */
660
661         h->buflen = strlcat(h->buf, p, BUFSIZ);
662         assert(h->buflen < BUFSIZ);
663 }
664
665 void
666 bufcat_fmt(struct html *h, const char *fmt, ...)
667 {
668         va_list          ap;
669
670         va_start(ap, fmt);
671         (void)vsnprintf(h->buf + (int)h->buflen,
672             BUFSIZ - h->buflen - 1, fmt, ap);
673         va_end(ap);
674         h->buflen = strlen(h->buf);
675 }
676
677 static void
678 bufncat(struct html *h, const char *p, size_t sz)
679 {
680
681         assert(h->buflen + sz + 1 < BUFSIZ);
682         strncat(h->buf, p, sz);
683         h->buflen += sz;
684 }
685
686 void
687 buffmt_includes(struct html *h, const char *name)
688 {
689         const char      *p, *pp;
690
691         pp = h->base_includes;
692
693         bufinit(h);
694         while (NULL != (p = strchr(pp, '%'))) {
695                 bufncat(h, pp, (size_t)(p - pp));
696                 switch (*(p + 1)) {
697                 case'I':
698                         bufcat(h, name);
699                         break;
700                 default:
701                         bufncat(h, p, 2);
702                         break;
703                 }
704                 pp = p + 2;
705         }
706         if (pp)
707                 bufcat(h, pp);
708 }
709
710 void
711 buffmt_man(struct html *h, const char *name, const char *sec)
712 {
713         const char      *p, *pp;
714
715         pp = h->base_man;
716
717         bufinit(h);
718         while (NULL != (p = strchr(pp, '%'))) {
719                 bufncat(h, pp, (size_t)(p - pp));
720                 switch (*(p + 1)) {
721                 case 'S':
722                         bufcat(h, sec ? sec : "1");
723                         break;
724                 case 'N':
725                         bufcat_fmt(h, "%s", name);
726                         break;
727                 default:
728                         bufncat(h, p, 2);
729                         break;
730                 }
731                 pp = p + 2;
732         }
733         if (pp)
734                 bufcat(h, pp);
735 }
736
737 void
738 bufcat_su(struct html *h, const char *p, const struct roffsu *su)
739 {
740         double           v;
741
742         v = su->scale;
743         if (SCALE_MM == su->unit && 0.0 == (v /= 100.0))
744                 v = 1.0;
745         else if (SCALE_BU == su->unit)
746                 v /= 24.0;
747
748         bufcat_fmt(h, "%s: %.2f%s;", p, v, roffscales[su->unit]);
749 }
750
751 void
752 bufcat_id(struct html *h, const char *src)
753 {
754
755         /* Cf. <http://www.w3.org/TR/html4/types.html#h-6.2>. */
756
757         while ('\0' != *src)
758                 bufcat_fmt(h, "%.2x", *src++);
759 }