]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/mandoc/cgi.c
Followup to r347996
[FreeBSD/FreeBSD.git] / contrib / mandoc / cgi.c
1 /*      $Id: cgi.c,v 1.166 2019/03/06 12:32:41 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016, 2017, 2018 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "mandoc_parse.h"
42 #include "main.h"
43 #include "manconf.h"
44 #include "mansearch.h"
45 #include "cgi.h"
46
47 /*
48  * A query as passed to the search function.
49  */
50 struct  query {
51         char            *manpath; /* desired manual directory */
52         char            *arch; /* architecture */
53         char            *sec; /* manual section */
54         char            *query; /* unparsed query expression */
55         int              equal; /* match whole names, not substrings */
56 };
57
58 struct  req {
59         struct query      q;
60         char            **p; /* array of available manpaths */
61         size_t            psz; /* number of available manpaths */
62         int               isquery; /* QUERY_STRING used, not PATH_INFO */
63 };
64
65 enum    focus {
66         FOCUS_NONE = 0,
67         FOCUS_QUERY
68 };
69
70 static  void             html_print(const char *);
71 static  void             html_putchar(char);
72 static  int              http_decode(char *);
73 static  void             http_encode(const char *p);
74 static  void             parse_manpath_conf(struct req *);
75 static  void             parse_path_info(struct req *req, const char *path);
76 static  void             parse_query_string(struct req *, const char *);
77 static  void             pg_error_badrequest(const char *);
78 static  void             pg_error_internal(void);
79 static  void             pg_index(const struct req *);
80 static  void             pg_noresult(const struct req *, const char *);
81 static  void             pg_redirect(const struct req *, const char *);
82 static  void             pg_search(const struct req *);
83 static  void             pg_searchres(const struct req *,
84                                 struct manpage *, size_t);
85 static  void             pg_show(struct req *, const char *);
86 static  void             resp_begin_html(int, const char *, const char *);
87 static  void             resp_begin_http(int, const char *);
88 static  void             resp_catman(const struct req *, const char *);
89 static  void             resp_copy(const char *);
90 static  void             resp_end_html(void);
91 static  void             resp_format(const struct req *, const char *);
92 static  void             resp_searchform(const struct req *, enum focus);
93 static  void             resp_show(const struct req *, const char *);
94 static  void             set_query_attr(char **, char **);
95 static  int              validate_arch(const char *);
96 static  int              validate_filename(const char *);
97 static  int              validate_manpath(const struct req *, const char *);
98 static  int              validate_urifrag(const char *);
99
100 static  const char       *scriptname = SCRIPT_NAME;
101
102 static  const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
103 static  const char *const sec_numbers[] = {
104     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
105 };
106 static  const char *const sec_names[] = {
107     "All Sections",
108     "1 - General Commands",
109     "2 - System Calls",
110     "3 - Library Functions",
111     "3p - Perl Library",
112     "4 - Device Drivers",
113     "5 - File Formats",
114     "6 - Games",
115     "7 - Miscellaneous Information",
116     "8 - System Manager\'s Manual",
117     "9 - Kernel Developer\'s Manual"
118 };
119 static  const int sec_MAX = sizeof(sec_names) / sizeof(char *);
120
121 static  const char *const arch_names[] = {
122     "amd64",       "alpha",       "armv7",      "arm64",
123     "hppa",        "i386",        "landisk",
124     "loongson",    "luna88k",     "macppc",      "mips64",
125     "octeon",      "sgi",         "socppc",      "sparc64",
126     "amiga",       "arc",         "armish",      "arm32",
127     "atari",       "aviion",      "beagle",      "cats",
128     "hppa64",      "hp300",
129     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
130     "mvmeppc",     "palm",        "pc532",       "pegasos",
131     "pmax",        "powerpc",     "solbourne",   "sparc",
132     "sun3",        "vax",         "wgrisc",      "x68k",
133     "zaurus"
134 };
135 static  const int arch_MAX = sizeof(arch_names) / sizeof(char *);
136
137 /*
138  * Print a character, escaping HTML along the way.
139  * This will pass non-ASCII straight to output: be warned!
140  */
141 static void
142 html_putchar(char c)
143 {
144
145         switch (c) {
146         case '"':
147                 printf("&quot;");
148                 break;
149         case '&':
150                 printf("&amp;");
151                 break;
152         case '>':
153                 printf("&gt;");
154                 break;
155         case '<':
156                 printf("&lt;");
157                 break;
158         default:
159                 putchar((unsigned char)c);
160                 break;
161         }
162 }
163
164 /*
165  * Call through to html_putchar().
166  * Accepts NULL strings.
167  */
168 static void
169 html_print(const char *p)
170 {
171
172         if (NULL == p)
173                 return;
174         while ('\0' != *p)
175                 html_putchar(*p++);
176 }
177
178 /*
179  * Transfer the responsibility for the allocated string *val
180  * to the query structure.
181  */
182 static void
183 set_query_attr(char **attr, char **val)
184 {
185
186         free(*attr);
187         if (**val == '\0') {
188                 *attr = NULL;
189                 free(*val);
190         } else
191                 *attr = *val;
192         *val = NULL;
193 }
194
195 /*
196  * Parse the QUERY_STRING for key-value pairs
197  * and store the values into the query structure.
198  */
199 static void
200 parse_query_string(struct req *req, const char *qs)
201 {
202         char            *key, *val;
203         size_t           keysz, valsz;
204
205         req->isquery    = 1;
206         req->q.manpath  = NULL;
207         req->q.arch     = NULL;
208         req->q.sec      = NULL;
209         req->q.query    = NULL;
210         req->q.equal    = 1;
211
212         key = val = NULL;
213         while (*qs != '\0') {
214
215                 /* Parse one key. */
216
217                 keysz = strcspn(qs, "=;&");
218                 key = mandoc_strndup(qs, keysz);
219                 qs += keysz;
220                 if (*qs != '=')
221                         goto next;
222
223                 /* Parse one value. */
224
225                 valsz = strcspn(++qs, ";&");
226                 val = mandoc_strndup(qs, valsz);
227                 qs += valsz;
228
229                 /* Decode and catch encoding errors. */
230
231                 if ( ! (http_decode(key) && http_decode(val)))
232                         goto next;
233
234                 /* Handle key-value pairs. */
235
236                 if ( ! strcmp(key, "query"))
237                         set_query_attr(&req->q.query, &val);
238
239                 else if ( ! strcmp(key, "apropos"))
240                         req->q.equal = !strcmp(val, "0");
241
242                 else if ( ! strcmp(key, "manpath")) {
243 #ifdef COMPAT_OLDURI
244                         if ( ! strncmp(val, "OpenBSD ", 8)) {
245                                 val[7] = '-';
246                                 if ('C' == val[8])
247                                         val[8] = 'c';
248                         }
249 #endif
250                         set_query_attr(&req->q.manpath, &val);
251                 }
252
253                 else if ( ! (strcmp(key, "sec")
254 #ifdef COMPAT_OLDURI
255                     && strcmp(key, "sektion")
256 #endif
257                     )) {
258                         if ( ! strcmp(val, "0"))
259                                 *val = '\0';
260                         set_query_attr(&req->q.sec, &val);
261                 }
262
263                 else if ( ! strcmp(key, "arch")) {
264                         if ( ! strcmp(val, "default"))
265                                 *val = '\0';
266                         set_query_attr(&req->q.arch, &val);
267                 }
268
269                 /*
270                  * The key must be freed in any case.
271                  * The val may have been handed over to the query
272                  * structure, in which case it is now NULL.
273                  */
274 next:
275                 free(key);
276                 key = NULL;
277                 free(val);
278                 val = NULL;
279
280                 if (*qs != '\0')
281                         qs++;
282         }
283 }
284
285 /*
286  * HTTP-decode a string.  The standard explanation is that this turns
287  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
288  * over the allocated string.
289  */
290 static int
291 http_decode(char *p)
292 {
293         char             hex[3];
294         char            *q;
295         int              c;
296
297         hex[2] = '\0';
298
299         q = p;
300         for ( ; '\0' != *p; p++, q++) {
301                 if ('%' == *p) {
302                         if ('\0' == (hex[0] = *(p + 1)))
303                                 return 0;
304                         if ('\0' == (hex[1] = *(p + 2)))
305                                 return 0;
306                         if (1 != sscanf(hex, "%x", &c))
307                                 return 0;
308                         if ('\0' == c)
309                                 return 0;
310
311                         *q = (char)c;
312                         p += 2;
313                 } else
314                         *q = '+' == *p ? ' ' : *p;
315         }
316
317         *q = '\0';
318         return 1;
319 }
320
321 static void
322 http_encode(const char *p)
323 {
324         for (; *p != '\0'; p++) {
325                 if (isalnum((unsigned char)*p) == 0 &&
326                     strchr("-._~", *p) == NULL)
327                         printf("%%%2.2X", (unsigned char)*p);
328                 else
329                         putchar(*p);
330         }
331 }
332
333 static void
334 resp_begin_http(int code, const char *msg)
335 {
336
337         if (200 != code)
338                 printf("Status: %d %s\r\n", code, msg);
339
340         printf("Content-Type: text/html; charset=utf-8\r\n"
341              "Cache-Control: no-cache\r\n"
342              "Pragma: no-cache\r\n"
343              "\r\n");
344
345         fflush(stdout);
346 }
347
348 static void
349 resp_copy(const char *filename)
350 {
351         char     buf[4096];
352         ssize_t  sz;
353         int      fd;
354
355         if ((fd = open(filename, O_RDONLY)) != -1) {
356                 fflush(stdout);
357                 while ((sz = read(fd, buf, sizeof(buf))) > 0)
358                         write(STDOUT_FILENO, buf, sz);
359                 close(fd);
360         }
361 }
362
363 static void
364 resp_begin_html(int code, const char *msg, const char *file)
365 {
366         char    *cp;
367
368         resp_begin_http(code, msg);
369
370         printf("<!DOCTYPE html>\n"
371                "<html>\n"
372                "<head>\n"
373                "  <meta charset=\"UTF-8\"/>\n"
374                "  <meta name=\"viewport\""
375                       " content=\"width=device-width, initial-scale=1.0\">\n"
376                "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
377                " type=\"text/css\" media=\"all\">\n"
378                "  <title>",
379                CSS_DIR);
380         if (file != NULL) {
381                 if ((cp = strrchr(file, '/')) != NULL)
382                         file = cp + 1;
383                 if ((cp = strrchr(file, '.')) != NULL) {
384                         printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
385                 } else
386                         printf("%s - ", file);
387         }
388         printf("%s</title>\n"
389                "</head>\n"
390                "<body>\n",
391                CUSTOMIZE_TITLE);
392
393         resp_copy(MAN_DIR "/header.html");
394 }
395
396 static void
397 resp_end_html(void)
398 {
399
400         resp_copy(MAN_DIR "/footer.html");
401
402         puts("</body>\n"
403              "</html>");
404 }
405
406 static void
407 resp_searchform(const struct req *req, enum focus focus)
408 {
409         int              i;
410
411         printf("<form action=\"/%s\" method=\"get\">\n"
412                "  <fieldset>\n"
413                "    <legend>Manual Page Search Parameters</legend>\n",
414                scriptname);
415
416         /* Write query input box. */
417
418         printf("    <input type=\"search\" name=\"query\" value=\"");
419         if (req->q.query != NULL)
420                 html_print(req->q.query);
421         printf( "\" size=\"40\"");
422         if (focus == FOCUS_QUERY)
423                 printf(" autofocus");
424         puts(">");
425
426         /* Write submission buttons. */
427
428         printf( "    <button type=\"submit\" name=\"apropos\" value=\"0\">"
429                 "man</button>\n"
430                 "    <button type=\"submit\" name=\"apropos\" value=\"1\">"
431                 "apropos</button>\n"
432                 "    <br/>\n");
433
434         /* Write section selector. */
435
436         puts("    <select name=\"sec\">");
437         for (i = 0; i < sec_MAX; i++) {
438                 printf("      <option value=\"%s\"", sec_numbers[i]);
439                 if (NULL != req->q.sec &&
440                     0 == strcmp(sec_numbers[i], req->q.sec))
441                         printf(" selected=\"selected\"");
442                 printf(">%s</option>\n", sec_names[i]);
443         }
444         puts("    </select>");
445
446         /* Write architecture selector. */
447
448         printf( "    <select name=\"arch\">\n"
449                 "      <option value=\"default\"");
450         if (NULL == req->q.arch)
451                 printf(" selected=\"selected\"");
452         puts(">All Architectures</option>");
453         for (i = 0; i < arch_MAX; i++) {
454                 printf("      <option");
455                 if (NULL != req->q.arch &&
456                     0 == strcmp(arch_names[i], req->q.arch))
457                         printf(" selected=\"selected\"");
458                 printf(">%s</option>\n", arch_names[i]);
459         }
460         puts("    </select>");
461
462         /* Write manpath selector. */
463
464         if (req->psz > 1) {
465                 puts("    <select name=\"manpath\">");
466                 for (i = 0; i < (int)req->psz; i++) {
467                         printf("      <option");
468                         if (strcmp(req->q.manpath, req->p[i]) == 0)
469                                 printf(" selected=\"selected\"");
470                         printf(">");
471                         html_print(req->p[i]);
472                         puts("</option>");
473                 }
474                 puts("    </select>");
475         }
476
477         puts("  </fieldset>\n"
478              "</form>");
479 }
480
481 static int
482 validate_urifrag(const char *frag)
483 {
484
485         while ('\0' != *frag) {
486                 if ( ! (isalnum((unsigned char)*frag) ||
487                     '-' == *frag || '.' == *frag ||
488                     '/' == *frag || '_' == *frag))
489                         return 0;
490                 frag++;
491         }
492         return 1;
493 }
494
495 static int
496 validate_manpath(const struct req *req, const char* manpath)
497 {
498         size_t   i;
499
500         for (i = 0; i < req->psz; i++)
501                 if ( ! strcmp(manpath, req->p[i]))
502                         return 1;
503
504         return 0;
505 }
506
507 static int
508 validate_arch(const char *arch)
509 {
510         int      i;
511
512         for (i = 0; i < arch_MAX; i++)
513                 if (strcmp(arch, arch_names[i]) == 0)
514                         return 1;
515
516         return 0;
517 }
518
519 static int
520 validate_filename(const char *file)
521 {
522
523         if ('.' == file[0] && '/' == file[1])
524                 file += 2;
525
526         return ! (strstr(file, "../") || strstr(file, "/..") ||
527             (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
528 }
529
530 static void
531 pg_index(const struct req *req)
532 {
533
534         resp_begin_html(200, NULL, NULL);
535         resp_searchform(req, FOCUS_QUERY);
536         printf("<p>\n"
537                "This web interface is documented in the\n"
538                "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
539                "manual, and the\n"
540                "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
541                "manual explains the query syntax.\n"
542                "</p>\n",
543                scriptname, *scriptname == '\0' ? "" : "/",
544                scriptname, *scriptname == '\0' ? "" : "/");
545         resp_end_html();
546 }
547
548 static void
549 pg_noresult(const struct req *req, const char *msg)
550 {
551         resp_begin_html(200, NULL, NULL);
552         resp_searchform(req, FOCUS_QUERY);
553         puts("<p>");
554         puts(msg);
555         puts("</p>");
556         resp_end_html();
557 }
558
559 static void
560 pg_error_badrequest(const char *msg)
561 {
562
563         resp_begin_html(400, "Bad Request", NULL);
564         puts("<h1>Bad Request</h1>\n"
565              "<p>\n");
566         puts(msg);
567         printf("Try again from the\n"
568                "<a href=\"/%s\">main page</a>.\n"
569                "</p>", scriptname);
570         resp_end_html();
571 }
572
573 static void
574 pg_error_internal(void)
575 {
576         resp_begin_html(500, "Internal Server Error", NULL);
577         puts("<p>Internal Server Error</p>");
578         resp_end_html();
579 }
580
581 static void
582 pg_redirect(const struct req *req, const char *name)
583 {
584         printf("Status: 303 See Other\r\n"
585             "Location: /");
586         if (*scriptname != '\0')
587                 printf("%s/", scriptname);
588         if (strcmp(req->q.manpath, req->p[0]))
589                 printf("%s/", req->q.manpath);
590         if (req->q.arch != NULL)
591                 printf("%s/", req->q.arch);
592         http_encode(name);
593         if (req->q.sec != NULL) {
594                 putchar('.');
595                 http_encode(req->q.sec);
596         }
597         printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
598 }
599
600 static void
601 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
602 {
603         char            *arch, *archend;
604         const char      *sec;
605         size_t           i, iuse;
606         int              archprio, archpriouse;
607         int              prio, priouse;
608
609         for (i = 0; i < sz; i++) {
610                 if (validate_filename(r[i].file))
611                         continue;
612                 warnx("invalid filename %s in %s database",
613                     r[i].file, req->q.manpath);
614                 pg_error_internal();
615                 return;
616         }
617
618         if (req->isquery && sz == 1) {
619                 /*
620                  * If we have just one result, then jump there now
621                  * without any delay.
622                  */
623                 printf("Status: 303 See Other\r\n"
624                     "Location: /");
625                 if (*scriptname != '\0')
626                         printf("%s/", scriptname);
627                 if (strcmp(req->q.manpath, req->p[0]))
628                         printf("%s/", req->q.manpath);
629                 printf("%s\r\n"
630                     "Content-Type: text/html; charset=utf-8\r\n\r\n",
631                     r[0].file);
632                 return;
633         }
634
635         /*
636          * In man(1) mode, show one of the pages
637          * even if more than one is found.
638          */
639
640         iuse = 0;
641         if (req->q.equal || sz == 1) {
642                 priouse = 20;
643                 archpriouse = 3;
644                 for (i = 0; i < sz; i++) {
645                         sec = r[i].file;
646                         sec += strcspn(sec, "123456789");
647                         if (sec[0] == '\0')
648                                 continue;
649                         prio = sec_prios[sec[0] - '1'];
650                         if (sec[1] != '/')
651                                 prio += 10;
652                         if (req->q.arch == NULL) {
653                                 archprio =
654                                     ((arch = strchr(sec + 1, '/'))
655                                         == NULL) ? 3 :
656                                     ((archend = strchr(arch + 1, '/'))
657                                         == NULL) ? 0 :
658                                     strncmp(arch, "amd64/",
659                                         archend - arch) ? 2 : 1;
660                                 if (archprio < archpriouse) {
661                                         archpriouse = archprio;
662                                         priouse = prio;
663                                         iuse = i;
664                                         continue;
665                                 }
666                                 if (archprio > archpriouse)
667                                         continue;
668                         }
669                         if (prio >= priouse)
670                                 continue;
671                         priouse = prio;
672                         iuse = i;
673                 }
674                 resp_begin_html(200, NULL, r[iuse].file);
675         } else
676                 resp_begin_html(200, NULL, NULL);
677
678         resp_searchform(req,
679             req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
680
681         if (sz > 1) {
682                 puts("<table class=\"results\">");
683                 for (i = 0; i < sz; i++) {
684                         printf("  <tr>\n"
685                                "    <td>"
686                                "<a class=\"Xr\" href=\"/");
687                         if (*scriptname != '\0')
688                                 printf("%s/", scriptname);
689                         if (strcmp(req->q.manpath, req->p[0]))
690                                 printf("%s/", req->q.manpath);
691                         printf("%s\">", r[i].file);
692                         html_print(r[i].names);
693                         printf("</a></td>\n"
694                                "    <td><span class=\"Nd\">");
695                         html_print(r[i].output);
696                         puts("</span></td>\n"
697                              "  </tr>");
698                 }
699                 puts("</table>");
700         }
701
702         if (req->q.equal || sz == 1) {
703                 puts("<hr>");
704                 resp_show(req, r[iuse].file);
705         }
706
707         resp_end_html();
708 }
709
710 static void
711 resp_catman(const struct req *req, const char *file)
712 {
713         FILE            *f;
714         char            *p;
715         size_t           sz;
716         ssize_t          len;
717         int              i;
718         int              italic, bold;
719
720         if ((f = fopen(file, "r")) == NULL) {
721                 puts("<p>You specified an invalid manual file.</p>");
722                 return;
723         }
724
725         puts("<div class=\"catman\">\n"
726              "<pre>");
727
728         p = NULL;
729         sz = 0;
730
731         while ((len = getline(&p, &sz, f)) != -1) {
732                 bold = italic = 0;
733                 for (i = 0; i < len - 1; i++) {
734                         /*
735                          * This means that the catpage is out of state.
736                          * Ignore it and keep going (although the
737                          * catpage is bogus).
738                          */
739
740                         if ('\b' == p[i] || '\n' == p[i])
741                                 continue;
742
743                         /*
744                          * Print a regular character.
745                          * Close out any bold/italic scopes.
746                          * If we're in back-space mode, make sure we'll
747                          * have something to enter when we backspace.
748                          */
749
750                         if ('\b' != p[i + 1]) {
751                                 if (italic)
752                                         printf("</i>");
753                                 if (bold)
754                                         printf("</b>");
755                                 italic = bold = 0;
756                                 html_putchar(p[i]);
757                                 continue;
758                         } else if (i + 2 >= len)
759                                 continue;
760
761                         /* Italic mode. */
762
763                         if ('_' == p[i]) {
764                                 if (bold)
765                                         printf("</b>");
766                                 if ( ! italic)
767                                         printf("<i>");
768                                 bold = 0;
769                                 italic = 1;
770                                 i += 2;
771                                 html_putchar(p[i]);
772                                 continue;
773                         }
774
775                         /*
776                          * Handle funny behaviour troff-isms.
777                          * These grok'd from the original man2html.c.
778                          */
779
780                         if (('+' == p[i] && 'o' == p[i + 2]) ||
781                                         ('o' == p[i] && '+' == p[i + 2]) ||
782                                         ('|' == p[i] && '=' == p[i + 2]) ||
783                                         ('=' == p[i] && '|' == p[i + 2]) ||
784                                         ('*' == p[i] && '=' == p[i + 2]) ||
785                                         ('=' == p[i] && '*' == p[i + 2]) ||
786                                         ('*' == p[i] && '|' == p[i + 2]) ||
787                                         ('|' == p[i] && '*' == p[i + 2]))  {
788                                 if (italic)
789                                         printf("</i>");
790                                 if (bold)
791                                         printf("</b>");
792                                 italic = bold = 0;
793                                 putchar('*');
794                                 i += 2;
795                                 continue;
796                         } else if (('|' == p[i] && '-' == p[i + 2]) ||
797                                         ('-' == p[i] && '|' == p[i + 1]) ||
798                                         ('+' == p[i] && '-' == p[i + 1]) ||
799                                         ('-' == p[i] && '+' == p[i + 1]) ||
800                                         ('+' == p[i] && '|' == p[i + 1]) ||
801                                         ('|' == p[i] && '+' == p[i + 1]))  {
802                                 if (italic)
803                                         printf("</i>");
804                                 if (bold)
805                                         printf("</b>");
806                                 italic = bold = 0;
807                                 putchar('+');
808                                 i += 2;
809                                 continue;
810                         }
811
812                         /* Bold mode. */
813
814                         if (italic)
815                                 printf("</i>");
816                         if ( ! bold)
817                                 printf("<b>");
818                         bold = 1;
819                         italic = 0;
820                         i += 2;
821                         html_putchar(p[i]);
822                 }
823
824                 /*
825                  * Clean up the last character.
826                  * We can get to a newline; don't print that.
827                  */
828
829                 if (italic)
830                         printf("</i>");
831                 if (bold)
832                         printf("</b>");
833
834                 if (i == len - 1 && p[i] != '\n')
835                         html_putchar(p[i]);
836
837                 putchar('\n');
838         }
839         free(p);
840
841         puts("</pre>\n"
842              "</div>");
843
844         fclose(f);
845 }
846
847 static void
848 resp_format(const struct req *req, const char *file)
849 {
850         struct manoutput conf;
851         struct mparse   *mp;
852         struct roff_meta *meta;
853         void            *vp;
854         int              fd;
855         int              usepath;
856
857         if (-1 == (fd = open(file, O_RDONLY, 0))) {
858                 puts("<p>You specified an invalid manual file.</p>");
859                 return;
860         }
861
862         mchars_alloc();
863         mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
864             MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
865         mparse_readfd(mp, fd, file);
866         close(fd);
867         meta = mparse_result(mp);
868
869         memset(&conf, 0, sizeof(conf));
870         conf.fragment = 1;
871         conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
872         conf.toc = 1;
873         usepath = strcmp(req->q.manpath, req->p[0]);
874         mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
875             scriptname, *scriptname == '\0' ? "" : "/",
876             usepath ? req->q.manpath : "", usepath ? "/" : "");
877
878         vp = html_alloc(&conf);
879         if (meta->macroset == MACROSET_MDOC)
880                 html_mdoc(vp, meta);
881         else
882                 html_man(vp, meta);
883
884         html_free(vp);
885         mparse_free(mp);
886         mchars_free();
887         free(conf.man);
888         free(conf.style);
889 }
890
891 static void
892 resp_show(const struct req *req, const char *file)
893 {
894
895         if ('.' == file[0] && '/' == file[1])
896                 file += 2;
897
898         if ('c' == *file)
899                 resp_catman(req, file);
900         else
901                 resp_format(req, file);
902 }
903
904 static void
905 pg_show(struct req *req, const char *fullpath)
906 {
907         char            *manpath;
908         const char      *file;
909
910         if ((file = strchr(fullpath, '/')) == NULL) {
911                 pg_error_badrequest(
912                     "You did not specify a page to show.");
913                 return;
914         }
915         manpath = mandoc_strndup(fullpath, file - fullpath);
916         file++;
917
918         if ( ! validate_manpath(req, manpath)) {
919                 pg_error_badrequest(
920                     "You specified an invalid manpath.");
921                 free(manpath);
922                 return;
923         }
924
925         /*
926          * Begin by chdir()ing into the manpath.
927          * This way we can pick up the database files, which are
928          * relative to the manpath root.
929          */
930
931         if (chdir(manpath) == -1) {
932                 warn("chdir %s", manpath);
933                 pg_error_internal();
934                 free(manpath);
935                 return;
936         }
937         free(manpath);
938
939         if ( ! validate_filename(file)) {
940                 pg_error_badrequest(
941                     "You specified an invalid manual file.");
942                 return;
943         }
944
945         resp_begin_html(200, NULL, file);
946         resp_searchform(req, FOCUS_NONE);
947         resp_show(req, file);
948         resp_end_html();
949 }
950
951 static void
952 pg_search(const struct req *req)
953 {
954         struct mansearch          search;
955         struct manpaths           paths;
956         struct manpage           *res;
957         char                    **argv;
958         char                     *query, *rp, *wp;
959         size_t                    ressz;
960         int                       argc;
961
962         /*
963          * Begin by chdir()ing into the root of the manpath.
964          * This way we can pick up the database files, which are
965          * relative to the manpath root.
966          */
967
968         if (chdir(req->q.manpath) == -1) {
969                 warn("chdir %s", req->q.manpath);
970                 pg_error_internal();
971                 return;
972         }
973
974         search.arch = req->q.arch;
975         search.sec = req->q.sec;
976         search.outkey = "Nd";
977         search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
978         search.firstmatch = 1;
979
980         paths.sz = 1;
981         paths.paths = mandoc_malloc(sizeof(char *));
982         paths.paths[0] = mandoc_strdup(".");
983
984         /*
985          * Break apart at spaces with backslash-escaping.
986          */
987
988         argc = 0;
989         argv = NULL;
990         rp = query = mandoc_strdup(req->q.query);
991         for (;;) {
992                 while (isspace((unsigned char)*rp))
993                         rp++;
994                 if (*rp == '\0')
995                         break;
996                 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
997                 argv[argc++] = wp = rp;
998                 for (;;) {
999                         if (isspace((unsigned char)*rp)) {
1000                                 *wp = '\0';
1001                                 rp++;
1002                                 break;
1003                         }
1004                         if (rp[0] == '\\' && rp[1] != '\0')
1005                                 rp++;
1006                         if (wp != rp)
1007                                 *wp = *rp;
1008                         if (*rp == '\0')
1009                                 break;
1010                         wp++;
1011                         rp++;
1012                 }
1013         }
1014
1015         res = NULL;
1016         ressz = 0;
1017         if (req->isquery && req->q.equal && argc == 1)
1018                 pg_redirect(req, argv[0]);
1019         else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1020                 pg_noresult(req, "You entered an invalid query.");
1021         else if (ressz == 0)
1022                 pg_noresult(req, "No results found.");
1023         else
1024                 pg_searchres(req, res, ressz);
1025
1026         free(query);
1027         mansearch_free(res, ressz);
1028         free(paths.paths[0]);
1029         free(paths.paths);
1030 }
1031
1032 int
1033 main(void)
1034 {
1035         struct req       req;
1036         struct itimerval itimer;
1037         const char      *path;
1038         const char      *querystring;
1039         int              i;
1040
1041 #if HAVE_PLEDGE
1042         /*
1043          * The "rpath" pledge could be revoked after mparse_readfd()
1044          * if the file desciptor to "/footer.html" would be opened
1045          * up front, but it's probably not worth the complication
1046          * of the code it would cause: it would require scattering
1047          * pledge() calls in multiple low-level resp_*() functions.
1048          */
1049
1050         if (pledge("stdio rpath", NULL) == -1) {
1051                 warn("pledge");
1052                 pg_error_internal();
1053                 return EXIT_FAILURE;
1054         }
1055 #endif
1056
1057         /* Poor man's ReDoS mitigation. */
1058
1059         itimer.it_value.tv_sec = 2;
1060         itimer.it_value.tv_usec = 0;
1061         itimer.it_interval.tv_sec = 2;
1062         itimer.it_interval.tv_usec = 0;
1063         if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1064                 warn("setitimer");
1065                 pg_error_internal();
1066                 return EXIT_FAILURE;
1067         }
1068
1069         /*
1070          * First we change directory into the MAN_DIR so that
1071          * subsequent scanning for manpath directories is rooted
1072          * relative to the same position.
1073          */
1074
1075         if (chdir(MAN_DIR) == -1) {
1076                 warn("MAN_DIR: %s", MAN_DIR);
1077                 pg_error_internal();
1078                 return EXIT_FAILURE;
1079         }
1080
1081         memset(&req, 0, sizeof(struct req));
1082         req.q.equal = 1;
1083         parse_manpath_conf(&req);
1084
1085         /* Parse the path info and the query string. */
1086
1087         if ((path = getenv("PATH_INFO")) == NULL)
1088                 path = "";
1089         else if (*path == '/')
1090                 path++;
1091
1092         if (*path != '\0') {
1093                 parse_path_info(&req, path);
1094                 if (req.q.manpath == NULL || req.q.sec == NULL ||
1095                     *req.q.query == '\0' || access(path, F_OK) == -1)
1096                         path = "";
1097         } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1098                 parse_query_string(&req, querystring);
1099
1100         /* Validate parsed data and add defaults. */
1101
1102         if (req.q.manpath == NULL)
1103                 req.q.manpath = mandoc_strdup(req.p[0]);
1104         else if ( ! validate_manpath(&req, req.q.manpath)) {
1105                 pg_error_badrequest(
1106                     "You specified an invalid manpath.");
1107                 return EXIT_FAILURE;
1108         }
1109
1110         if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1111                 pg_error_badrequest(
1112                     "You specified an invalid architecture.");
1113                 return EXIT_FAILURE;
1114         }
1115
1116         /* Dispatch to the three different pages. */
1117
1118         if ('\0' != *path)
1119                 pg_show(&req, path);
1120         else if (NULL != req.q.query)
1121                 pg_search(&req);
1122         else
1123                 pg_index(&req);
1124
1125         free(req.q.manpath);
1126         free(req.q.arch);
1127         free(req.q.sec);
1128         free(req.q.query);
1129         for (i = 0; i < (int)req.psz; i++)
1130                 free(req.p[i]);
1131         free(req.p);
1132         return EXIT_SUCCESS;
1133 }
1134
1135 /*
1136  * Translate PATH_INFO to a query.
1137  */
1138 static void
1139 parse_path_info(struct req *req, const char *path)
1140 {
1141         const char      *name, *sec, *end;
1142
1143         req->isquery = 0;
1144         req->q.equal = 1;
1145         req->q.manpath = NULL;
1146         req->q.arch = NULL;
1147
1148         /* Mandatory manual page name. */
1149         if ((name = strrchr(path, '/')) == NULL)
1150                 name = path;
1151         else
1152                 name++;
1153
1154         /* Optional trailing section. */
1155         sec = strrchr(name, '.');
1156         if (sec != NULL && isdigit((unsigned char)*++sec)) {
1157                 req->q.query = mandoc_strndup(name, sec - name - 1);
1158                 req->q.sec = mandoc_strdup(sec);
1159         } else {
1160                 req->q.query = mandoc_strdup(name);
1161                 req->q.sec = NULL;
1162         }
1163
1164         /* Handle the case of name[.section] only. */
1165         if (name == path)
1166                 return;
1167
1168         /* Optional manpath. */
1169         end = strchr(path, '/');
1170         req->q.manpath = mandoc_strndup(path, end - path);
1171         if (validate_manpath(req, req->q.manpath)) {
1172                 path = end + 1;
1173                 if (name == path)
1174                         return;
1175         } else {
1176                 free(req->q.manpath);
1177                 req->q.manpath = NULL;
1178         }
1179
1180         /* Optional section. */
1181         if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1182                 path += 3;
1183                 end = strchr(path, '/');
1184                 free(req->q.sec);
1185                 req->q.sec = mandoc_strndup(path, end - path);
1186                 path = end + 1;
1187                 if (name == path)
1188                         return;
1189         }
1190
1191         /* Optional architecture. */
1192         end = strchr(path, '/');
1193         if (end + 1 != name) {
1194                 pg_error_badrequest(
1195                     "You specified too many directory components.");
1196                 exit(EXIT_FAILURE);
1197         }
1198         req->q.arch = mandoc_strndup(path, end - path);
1199         if (validate_arch(req->q.arch) == 0) {
1200                 pg_error_badrequest(
1201                     "You specified an invalid directory component.");
1202                 exit(EXIT_FAILURE);
1203         }
1204 }
1205
1206 /*
1207  * Scan for indexable paths.
1208  */
1209 static void
1210 parse_manpath_conf(struct req *req)
1211 {
1212         FILE    *fp;
1213         char    *dp;
1214         size_t   dpsz;
1215         ssize_t  len;
1216
1217         if ((fp = fopen("manpath.conf", "r")) == NULL) {
1218                 warn("%s/manpath.conf", MAN_DIR);
1219                 pg_error_internal();
1220                 exit(EXIT_FAILURE);
1221         }
1222
1223         dp = NULL;
1224         dpsz = 0;
1225
1226         while ((len = getline(&dp, &dpsz, fp)) != -1) {
1227                 if (dp[len - 1] == '\n')
1228                         dp[--len] = '\0';
1229                 req->p = mandoc_realloc(req->p,
1230                     (req->psz + 1) * sizeof(char *));
1231                 if ( ! validate_urifrag(dp)) {
1232                         warnx("%s/manpath.conf contains "
1233                             "unsafe path \"%s\"", MAN_DIR, dp);
1234                         pg_error_internal();
1235                         exit(EXIT_FAILURE);
1236                 }
1237                 if (strchr(dp, '/') != NULL) {
1238                         warnx("%s/manpath.conf contains "
1239                             "path with slash \"%s\"", MAN_DIR, dp);
1240                         pg_error_internal();
1241                         exit(EXIT_FAILURE);
1242                 }
1243                 req->p[req->psz++] = dp;
1244                 dp = NULL;
1245                 dpsz = 0;
1246         }
1247         free(dp);
1248
1249         if (req->p == NULL) {
1250                 warnx("%s/manpath.conf is empty", MAN_DIR);
1251                 pg_error_internal();
1252                 exit(EXIT_FAILURE);
1253         }
1254 }