]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/mandoc/cgi.c
vm_phys: Fix vm_phys_find_range() after commit 69cbb18746b6
[FreeBSD/FreeBSD.git] / contrib / mandoc / cgi.c
1 /* $Id: cgi.c,v 1.175 2021/08/19 15:23:36 schwarze Exp $ */
2 /*
3  * Copyright (c) 2014-2019, 2021 Ingo Schwarze <schwarze@usta.de>
4  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * Implementation of the man.cgi(8) program.
19  */
20 #include "config.h"
21
22 #include <sys/types.h>
23 #include <sys/time.h>
24
25 #include <ctype.h>
26 #if HAVE_ERR
27 #include <err.h>
28 #endif
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <limits.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37
38 #include "mandoc_aux.h"
39 #include "mandoc.h"
40 #include "roff.h"
41 #include "mdoc.h"
42 #include "man.h"
43 #include "mandoc_parse.h"
44 #include "main.h"
45 #include "manconf.h"
46 #include "mansearch.h"
47 #include "cgi.h"
48
49 /*
50  * A query as passed to the search function.
51  */
52 struct  query {
53         char            *manpath; /* desired manual directory */
54         char            *arch; /* architecture */
55         char            *sec; /* manual section */
56         char            *query; /* unparsed query expression */
57         int              equal; /* match whole names, not substrings */
58 };
59
60 struct  req {
61         struct query      q;
62         char            **p; /* array of available manpaths */
63         size_t            psz; /* number of available manpaths */
64         int               isquery; /* QUERY_STRING used, not PATH_INFO */
65 };
66
67 enum    focus {
68         FOCUS_NONE = 0,
69         FOCUS_QUERY
70 };
71
72 static  void             html_print(const char *);
73 static  void             html_putchar(char);
74 static  int              http_decode(char *);
75 static  void             http_encode(const char *);
76 static  void             parse_manpath_conf(struct req *);
77 static  void             parse_path_info(struct req *, const char *);
78 static  void             parse_query_string(struct req *, const char *);
79 static  void             pg_error_badrequest(const char *);
80 static  void             pg_error_internal(void);
81 static  void             pg_index(const struct req *);
82 static  void             pg_noresult(const struct req *, int, const char *,
83                                 const char *);
84 static  void             pg_redirect(const struct req *, const char *);
85 static  void             pg_search(const struct req *);
86 static  void             pg_searchres(const struct req *,
87                                 struct manpage *, size_t);
88 static  void             pg_show(struct req *, const char *);
89 static  void             resp_begin_html(int, const char *, const char *);
90 static  void             resp_begin_http(int, const char *);
91 static  void             resp_catman(const struct req *, const char *);
92 static  void             resp_copy(const char *);
93 static  void             resp_end_html(void);
94 static  void             resp_format(const struct req *, const char *);
95 static  void             resp_searchform(const struct req *, enum focus);
96 static  void             resp_show(const struct req *, const char *);
97 static  void             set_query_attr(char **, char **);
98 static  int              validate_arch(const char *);
99 static  int              validate_filename(const char *);
100 static  int              validate_manpath(const struct req *, const char *);
101 static  int              validate_urifrag(const char *);
102
103 static  const char       *scriptname = SCRIPT_NAME;
104
105 static  const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
106 static  const char *const sec_numbers[] = {
107     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
108 };
109 static  const char *const sec_names[] = {
110     "All Sections",
111     "1 - General Commands",
112     "2 - System Calls",
113     "3 - Library Functions",
114     "3p - Perl Library",
115     "4 - Device Drivers",
116     "5 - File Formats",
117     "6 - Games",
118     "7 - Miscellaneous Information",
119     "8 - System Manager\'s Manual",
120     "9 - Kernel Developer\'s Manual"
121 };
122 static  const int sec_MAX = sizeof(sec_names) / sizeof(char *);
123
124 static  const char *const arch_names[] = {
125     "amd64",       "alpha",       "armv7",       "arm64",
126     "hppa",        "i386",        "landisk",     "loongson",
127     "luna88k",     "macppc",      "mips64",      "octeon",
128     "powerpc64",   "riscv64",     "sparc64",
129
130     "amiga",       "arc",         "armish",      "arm32",
131     "atari",       "aviion",      "beagle",      "cats",
132     "hppa64",      "hp300",
133     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
134     "mvmeppc",     "palm",        "pc532",       "pegasos",
135     "pmax",        "powerpc",     "sgi",         "socppc",
136     "solbourne",   "sparc",
137     "sun3",        "vax",         "wgrisc",      "x68k",
138     "zaurus"
139 };
140 static  const int arch_MAX = sizeof(arch_names) / sizeof(char *);
141
142 /*
143  * Print a character, escaping HTML along the way.
144  * This will pass non-ASCII straight to output: be warned!
145  */
146 static void
147 html_putchar(char c)
148 {
149
150         switch (c) {
151         case '"':
152                 printf("&quot;");
153                 break;
154         case '&':
155                 printf("&amp;");
156                 break;
157         case '>':
158                 printf("&gt;");
159                 break;
160         case '<':
161                 printf("&lt;");
162                 break;
163         default:
164                 putchar((unsigned char)c);
165                 break;
166         }
167 }
168
169 /*
170  * Call through to html_putchar().
171  * Accepts NULL strings.
172  */
173 static void
174 html_print(const char *p)
175 {
176
177         if (NULL == p)
178                 return;
179         while ('\0' != *p)
180                 html_putchar(*p++);
181 }
182
183 /*
184  * Transfer the responsibility for the allocated string *val
185  * to the query structure.
186  */
187 static void
188 set_query_attr(char **attr, char **val)
189 {
190
191         free(*attr);
192         if (**val == '\0') {
193                 *attr = NULL;
194                 free(*val);
195         } else
196                 *attr = *val;
197         *val = NULL;
198 }
199
200 /*
201  * Parse the QUERY_STRING for key-value pairs
202  * and store the values into the query structure.
203  */
204 static void
205 parse_query_string(struct req *req, const char *qs)
206 {
207         char            *key, *val;
208         size_t           keysz, valsz;
209
210         req->isquery    = 1;
211         req->q.manpath  = NULL;
212         req->q.arch     = NULL;
213         req->q.sec      = NULL;
214         req->q.query    = NULL;
215         req->q.equal    = 1;
216
217         key = val = NULL;
218         while (*qs != '\0') {
219
220                 /* Parse one key. */
221
222                 keysz = strcspn(qs, "=;&");
223                 key = mandoc_strndup(qs, keysz);
224                 qs += keysz;
225                 if (*qs != '=')
226                         goto next;
227
228                 /* Parse one value. */
229
230                 valsz = strcspn(++qs, ";&");
231                 val = mandoc_strndup(qs, valsz);
232                 qs += valsz;
233
234                 /* Decode and catch encoding errors. */
235
236                 if ( ! (http_decode(key) && http_decode(val)))
237                         goto next;
238
239                 /* Handle key-value pairs. */
240
241                 if ( ! strcmp(key, "query"))
242                         set_query_attr(&req->q.query, &val);
243
244                 else if ( ! strcmp(key, "apropos"))
245                         req->q.equal = !strcmp(val, "0");
246
247                 else if ( ! strcmp(key, "manpath")) {
248 #ifdef COMPAT_OLDURI
249                         if ( ! strncmp(val, "OpenBSD ", 8)) {
250                                 val[7] = '-';
251                                 if ('C' == val[8])
252                                         val[8] = 'c';
253                         }
254 #endif
255                         set_query_attr(&req->q.manpath, &val);
256                 }
257
258                 else if ( ! (strcmp(key, "sec")
259 #ifdef COMPAT_OLDURI
260                     && strcmp(key, "sektion")
261 #endif
262                     )) {
263                         if ( ! strcmp(val, "0"))
264                                 *val = '\0';
265                         set_query_attr(&req->q.sec, &val);
266                 }
267
268                 else if ( ! strcmp(key, "arch")) {
269                         if ( ! strcmp(val, "default"))
270                                 *val = '\0';
271                         set_query_attr(&req->q.arch, &val);
272                 }
273
274                 /*
275                  * The key must be freed in any case.
276                  * The val may have been handed over to the query
277                  * structure, in which case it is now NULL.
278                  */
279 next:
280                 free(key);
281                 key = NULL;
282                 free(val);
283                 val = NULL;
284
285                 if (*qs != '\0')
286                         qs++;
287         }
288 }
289
290 /*
291  * HTTP-decode a string.  The standard explanation is that this turns
292  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
293  * over the allocated string.
294  */
295 static int
296 http_decode(char *p)
297 {
298         char             hex[3];
299         char            *q;
300         int              c;
301
302         hex[2] = '\0';
303
304         q = p;
305         for ( ; '\0' != *p; p++, q++) {
306                 if ('%' == *p) {
307                         if ('\0' == (hex[0] = *(p + 1)))
308                                 return 0;
309                         if ('\0' == (hex[1] = *(p + 2)))
310                                 return 0;
311                         if (1 != sscanf(hex, "%x", &c))
312                                 return 0;
313                         if ('\0' == c)
314                                 return 0;
315
316                         *q = (char)c;
317                         p += 2;
318                 } else
319                         *q = '+' == *p ? ' ' : *p;
320         }
321
322         *q = '\0';
323         return 1;
324 }
325
326 static void
327 http_encode(const char *p)
328 {
329         for (; *p != '\0'; p++) {
330                 if (isalnum((unsigned char)*p) == 0 &&
331                     strchr("-._~", *p) == NULL)
332                         printf("%%%2.2X", (unsigned char)*p);
333                 else
334                         putchar(*p);
335         }
336 }
337
338 static void
339 resp_begin_http(int code, const char *msg)
340 {
341
342         if (200 != code)
343                 printf("Status: %d %s\r\n", code, msg);
344
345         printf("Content-Type: text/html; charset=utf-8\r\n"
346              "Cache-Control: no-cache\r\n"
347              "Content-Security-Policy: default-src 'none'; "
348              "style-src 'self' 'unsafe-inline'\r\n"
349              "Pragma: no-cache\r\n"
350              "\r\n");
351
352         fflush(stdout);
353 }
354
355 static void
356 resp_copy(const char *filename)
357 {
358         char     buf[4096];
359         ssize_t  sz;
360         int      fd;
361
362         if ((fd = open(filename, O_RDONLY)) != -1) {
363                 fflush(stdout);
364                 while ((sz = read(fd, buf, sizeof(buf))) > 0)
365                         write(STDOUT_FILENO, buf, sz);
366                 close(fd);
367         }
368 }
369
370 static void
371 resp_begin_html(int code, const char *msg, const char *file)
372 {
373         const char      *name, *sec, *cp;
374         int              namesz, secsz;
375
376         resp_begin_http(code, msg);
377
378         printf("<!DOCTYPE html>\n"
379                "<html>\n"
380                "<head>\n"
381                "  <meta charset=\"UTF-8\"/>\n"
382                "  <meta name=\"viewport\""
383                       " content=\"width=device-width, initial-scale=1.0\">\n"
384                "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
385                " type=\"text/css\" media=\"all\">\n"
386                "  <title>",
387                CSS_DIR);
388         if (file != NULL) {
389                 cp = strrchr(file, '/');
390                 name = cp == NULL ? file : cp + 1;
391                 cp = strrchr(name, '.');
392                 namesz = cp == NULL ? strlen(name) : cp - name;
393                 sec = NULL;
394                 if (cp != NULL && cp[1] != '0') {
395                         sec = cp + 1;
396                         secsz = strlen(sec);
397                 } else if (name - file > 1) {
398                         for (cp = name - 2; cp >= file; cp--) {
399                                 if (*cp < '1' || *cp > '9')
400                                         continue;
401                                 sec = cp;
402                                 secsz = name - cp - 1;
403                                 break;
404                         }
405                 }
406                 printf("%.*s", namesz, name);
407                 if (sec != NULL)
408                         printf("(%.*s)", secsz, sec);
409                 fputs(" - ", stdout);
410         }
411         printf("%s</title>\n"
412                "</head>\n"
413                "<body>\n",
414                CUSTOMIZE_TITLE);
415
416         resp_copy(MAN_DIR "/header.html");
417 }
418
419 static void
420 resp_end_html(void)
421 {
422
423         resp_copy(MAN_DIR "/footer.html");
424
425         puts("</body>\n"
426              "</html>");
427 }
428
429 static void
430 resp_searchform(const struct req *req, enum focus focus)
431 {
432         int              i;
433
434         printf("<form action=\"/%s\" method=\"get\" "
435                "autocomplete=\"off\" autocapitalize=\"none\">\n"
436                "  <fieldset>\n"
437                "    <legend>Manual Page Search Parameters</legend>\n",
438                scriptname);
439
440         /* Write query input box. */
441
442         printf("    <input type=\"search\" name=\"query\" value=\"");
443         if (req->q.query != NULL)
444                 html_print(req->q.query);
445         printf( "\" size=\"40\"");
446         if (focus == FOCUS_QUERY)
447                 printf(" autofocus");
448         puts(">");
449
450         /* Write submission buttons. */
451
452         printf( "    <button type=\"submit\" name=\"apropos\" value=\"0\">"
453                 "man</button>\n"
454                 "    <button type=\"submit\" name=\"apropos\" value=\"1\">"
455                 "apropos</button>\n"
456                 "    <br/>\n");
457
458         /* Write section selector. */
459
460         puts("    <select name=\"sec\">");
461         for (i = 0; i < sec_MAX; i++) {
462                 printf("      <option value=\"%s\"", sec_numbers[i]);
463                 if (NULL != req->q.sec &&
464                     0 == strcmp(sec_numbers[i], req->q.sec))
465                         printf(" selected=\"selected\"");
466                 printf(">%s</option>\n", sec_names[i]);
467         }
468         puts("    </select>");
469
470         /* Write architecture selector. */
471
472         printf( "    <select name=\"arch\">\n"
473                 "      <option value=\"default\"");
474         if (NULL == req->q.arch)
475                 printf(" selected=\"selected\"");
476         puts(">All Architectures</option>");
477         for (i = 0; i < arch_MAX; i++) {
478                 printf("      <option");
479                 if (NULL != req->q.arch &&
480                     0 == strcmp(arch_names[i], req->q.arch))
481                         printf(" selected=\"selected\"");
482                 printf(">%s</option>\n", arch_names[i]);
483         }
484         puts("    </select>");
485
486         /* Write manpath selector. */
487
488         if (req->psz > 1) {
489                 puts("    <select name=\"manpath\">");
490                 for (i = 0; i < (int)req->psz; i++) {
491                         printf("      <option");
492                         if (strcmp(req->q.manpath, req->p[i]) == 0)
493                                 printf(" selected=\"selected\"");
494                         printf(">");
495                         html_print(req->p[i]);
496                         puts("</option>");
497                 }
498                 puts("    </select>");
499         }
500
501         puts("  </fieldset>\n"
502              "</form>");
503 }
504
505 static int
506 validate_urifrag(const char *frag)
507 {
508
509         while ('\0' != *frag) {
510                 if ( ! (isalnum((unsigned char)*frag) ||
511                     '-' == *frag || '.' == *frag ||
512                     '/' == *frag || '_' == *frag))
513                         return 0;
514                 frag++;
515         }
516         return 1;
517 }
518
519 static int
520 validate_manpath(const struct req *req, const char* manpath)
521 {
522         size_t   i;
523
524         for (i = 0; i < req->psz; i++)
525                 if ( ! strcmp(manpath, req->p[i]))
526                         return 1;
527
528         return 0;
529 }
530
531 static int
532 validate_arch(const char *arch)
533 {
534         int      i;
535
536         for (i = 0; i < arch_MAX; i++)
537                 if (strcmp(arch, arch_names[i]) == 0)
538                         return 1;
539
540         return 0;
541 }
542
543 static int
544 validate_filename(const char *file)
545 {
546
547         if ('.' == file[0] && '/' == file[1])
548                 file += 2;
549
550         return ! (strstr(file, "../") || strstr(file, "/..") ||
551             (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
552 }
553
554 static void
555 pg_index(const struct req *req)
556 {
557
558         resp_begin_html(200, NULL, NULL);
559         resp_searchform(req, FOCUS_QUERY);
560         printf("<p>\n"
561                "This web interface is documented in the\n"
562                "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
563                "manual, and the\n"
564                "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
565                "manual explains the query syntax.\n"
566                "</p>\n",
567                scriptname, *scriptname == '\0' ? "" : "/",
568                scriptname, *scriptname == '\0' ? "" : "/");
569         resp_end_html();
570 }
571
572 static void
573 pg_noresult(const struct req *req, int code, const char *http_msg,
574     const char *user_msg)
575 {
576         resp_begin_html(code, http_msg, NULL);
577         resp_searchform(req, FOCUS_QUERY);
578         puts("<p>");
579         puts(user_msg);
580         puts("</p>");
581         resp_end_html();
582 }
583
584 static void
585 pg_error_badrequest(const char *msg)
586 {
587
588         resp_begin_html(400, "Bad Request", NULL);
589         puts("<h1>Bad Request</h1>\n"
590              "<p>\n");
591         puts(msg);
592         printf("Try again from the\n"
593                "<a href=\"/%s\">main page</a>.\n"
594                "</p>", scriptname);
595         resp_end_html();
596 }
597
598 static void
599 pg_error_internal(void)
600 {
601         resp_begin_html(500, "Internal Server Error", NULL);
602         puts("<p>Internal Server Error</p>");
603         resp_end_html();
604 }
605
606 static void
607 pg_redirect(const struct req *req, const char *name)
608 {
609         printf("Status: 303 See Other\r\n"
610             "Location: /");
611         if (*scriptname != '\0')
612                 printf("%s/", scriptname);
613         if (strcmp(req->q.manpath, req->p[0]))
614                 printf("%s/", req->q.manpath);
615         if (req->q.arch != NULL)
616                 printf("%s/", req->q.arch);
617         http_encode(name);
618         if (req->q.sec != NULL) {
619                 putchar('.');
620                 http_encode(req->q.sec);
621         }
622         printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
623 }
624
625 static void
626 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
627 {
628         char            *arch, *archend;
629         const char      *sec;
630         size_t           i, iuse;
631         int              archprio, archpriouse;
632         int              prio, priouse;
633
634         for (i = 0; i < sz; i++) {
635                 if (validate_filename(r[i].file))
636                         continue;
637                 warnx("invalid filename %s in %s database",
638                     r[i].file, req->q.manpath);
639                 pg_error_internal();
640                 return;
641         }
642
643         if (req->isquery && sz == 1) {
644                 /*
645                  * If we have just one result, then jump there now
646                  * without any delay.
647                  */
648                 printf("Status: 303 See Other\r\n"
649                     "Location: /");
650                 if (*scriptname != '\0')
651                         printf("%s/", scriptname);
652                 if (strcmp(req->q.manpath, req->p[0]))
653                         printf("%s/", req->q.manpath);
654                 printf("%s\r\n"
655                     "Content-Type: text/html; charset=utf-8\r\n\r\n",
656                     r[0].file);
657                 return;
658         }
659
660         /*
661          * In man(1) mode, show one of the pages
662          * even if more than one is found.
663          */
664
665         iuse = 0;
666         if (req->q.equal || sz == 1) {
667                 priouse = 20;
668                 archpriouse = 3;
669                 for (i = 0; i < sz; i++) {
670                         sec = r[i].file;
671                         sec += strcspn(sec, "123456789");
672                         if (sec[0] == '\0')
673                                 continue;
674                         prio = sec_prios[sec[0] - '1'];
675                         if (sec[1] != '/')
676                                 prio += 10;
677                         if (req->q.arch == NULL) {
678                                 archprio =
679                                     ((arch = strchr(sec + 1, '/'))
680                                         == NULL) ? 3 :
681                                     ((archend = strchr(arch + 1, '/'))
682                                         == NULL) ? 0 :
683                                     strncmp(arch, "amd64/",
684                                         archend - arch) ? 2 : 1;
685                                 if (archprio < archpriouse) {
686                                         archpriouse = archprio;
687                                         priouse = prio;
688                                         iuse = i;
689                                         continue;
690                                 }
691                                 if (archprio > archpriouse)
692                                         continue;
693                         }
694                         if (prio >= priouse)
695                                 continue;
696                         priouse = prio;
697                         iuse = i;
698                 }
699                 resp_begin_html(200, NULL, r[iuse].file);
700         } else
701                 resp_begin_html(200, NULL, NULL);
702
703         resp_searchform(req,
704             req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
705
706         if (sz > 1) {
707                 puts("<table class=\"results\">");
708                 for (i = 0; i < sz; i++) {
709                         printf("  <tr>\n"
710                                "    <td>"
711                                "<a class=\"Xr\" href=\"/");
712                         if (*scriptname != '\0')
713                                 printf("%s/", scriptname);
714                         if (strcmp(req->q.manpath, req->p[0]))
715                                 printf("%s/", req->q.manpath);
716                         printf("%s\">", r[i].file);
717                         html_print(r[i].names);
718                         printf("</a></td>\n"
719                                "    <td><span class=\"Nd\">");
720                         html_print(r[i].output);
721                         puts("</span></td>\n"
722                              "  </tr>");
723                 }
724                 puts("</table>");
725         }
726
727         if (req->q.equal || sz == 1) {
728                 puts("<hr>");
729                 resp_show(req, r[iuse].file);
730         }
731
732         resp_end_html();
733 }
734
735 static void
736 resp_catman(const struct req *req, const char *file)
737 {
738         FILE            *f;
739         char            *p;
740         size_t           sz;
741         ssize_t          len;
742         int              i;
743         int              italic, bold;
744
745         if ((f = fopen(file, "r")) == NULL) {
746                 puts("<p>You specified an invalid manual file.</p>");
747                 return;
748         }
749
750         puts("<div class=\"catman\">\n"
751              "<pre>");
752
753         p = NULL;
754         sz = 0;
755
756         while ((len = getline(&p, &sz, f)) != -1) {
757                 bold = italic = 0;
758                 for (i = 0; i < len - 1; i++) {
759                         /*
760                          * This means that the catpage is out of state.
761                          * Ignore it and keep going (although the
762                          * catpage is bogus).
763                          */
764
765                         if ('\b' == p[i] || '\n' == p[i])
766                                 continue;
767
768                         /*
769                          * Print a regular character.
770                          * Close out any bold/italic scopes.
771                          * If we're in back-space mode, make sure we'll
772                          * have something to enter when we backspace.
773                          */
774
775                         if ('\b' != p[i + 1]) {
776                                 if (italic)
777                                         printf("</i>");
778                                 if (bold)
779                                         printf("</b>");
780                                 italic = bold = 0;
781                                 html_putchar(p[i]);
782                                 continue;
783                         } else if (i + 2 >= len)
784                                 continue;
785
786                         /* Italic mode. */
787
788                         if ('_' == p[i]) {
789                                 if (bold)
790                                         printf("</b>");
791                                 if ( ! italic)
792                                         printf("<i>");
793                                 bold = 0;
794                                 italic = 1;
795                                 i += 2;
796                                 html_putchar(p[i]);
797                                 continue;
798                         }
799
800                         /*
801                          * Handle funny behaviour troff-isms.
802                          * These grok'd from the original man2html.c.
803                          */
804
805                         if (('+' == p[i] && 'o' == p[i + 2]) ||
806                                         ('o' == p[i] && '+' == p[i + 2]) ||
807                                         ('|' == p[i] && '=' == p[i + 2]) ||
808                                         ('=' == p[i] && '|' == p[i + 2]) ||
809                                         ('*' == p[i] && '=' == p[i + 2]) ||
810                                         ('=' == p[i] && '*' == p[i + 2]) ||
811                                         ('*' == p[i] && '|' == p[i + 2]) ||
812                                         ('|' == p[i] && '*' == p[i + 2]))  {
813                                 if (italic)
814                                         printf("</i>");
815                                 if (bold)
816                                         printf("</b>");
817                                 italic = bold = 0;
818                                 putchar('*');
819                                 i += 2;
820                                 continue;
821                         } else if (('|' == p[i] && '-' == p[i + 2]) ||
822                                         ('-' == p[i] && '|' == p[i + 1]) ||
823                                         ('+' == p[i] && '-' == p[i + 1]) ||
824                                         ('-' == p[i] && '+' == p[i + 1]) ||
825                                         ('+' == p[i] && '|' == p[i + 1]) ||
826                                         ('|' == p[i] && '+' == p[i + 1]))  {
827                                 if (italic)
828                                         printf("</i>");
829                                 if (bold)
830                                         printf("</b>");
831                                 italic = bold = 0;
832                                 putchar('+');
833                                 i += 2;
834                                 continue;
835                         }
836
837                         /* Bold mode. */
838
839                         if (italic)
840                                 printf("</i>");
841                         if ( ! bold)
842                                 printf("<b>");
843                         bold = 1;
844                         italic = 0;
845                         i += 2;
846                         html_putchar(p[i]);
847                 }
848
849                 /*
850                  * Clean up the last character.
851                  * We can get to a newline; don't print that.
852                  */
853
854                 if (italic)
855                         printf("</i>");
856                 if (bold)
857                         printf("</b>");
858
859                 if (i == len - 1 && p[i] != '\n')
860                         html_putchar(p[i]);
861
862                 putchar('\n');
863         }
864         free(p);
865
866         puts("</pre>\n"
867              "</div>");
868
869         fclose(f);
870 }
871
872 static void
873 resp_format(const struct req *req, const char *file)
874 {
875         struct manoutput conf;
876         struct mparse   *mp;
877         struct roff_meta *meta;
878         void            *vp;
879         int              fd;
880         int              usepath;
881
882         if (-1 == (fd = open(file, O_RDONLY, 0))) {
883                 puts("<p>You specified an invalid manual file.</p>");
884                 return;
885         }
886
887         mchars_alloc();
888         mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
889             MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
890         mparse_readfd(mp, fd, file);
891         close(fd);
892         meta = mparse_result(mp);
893
894         memset(&conf, 0, sizeof(conf));
895         conf.fragment = 1;
896         conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
897         usepath = strcmp(req->q.manpath, req->p[0]);
898         mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
899             scriptname, *scriptname == '\0' ? "" : "/",
900             usepath ? req->q.manpath : "", usepath ? "/" : "");
901
902         vp = html_alloc(&conf);
903         if (meta->macroset == MACROSET_MDOC)
904                 html_mdoc(vp, meta);
905         else
906                 html_man(vp, meta);
907
908         html_free(vp);
909         mparse_free(mp);
910         mchars_free();
911         free(conf.man);
912         free(conf.style);
913 }
914
915 static void
916 resp_show(const struct req *req, const char *file)
917 {
918
919         if ('.' == file[0] && '/' == file[1])
920                 file += 2;
921
922         if ('c' == *file)
923                 resp_catman(req, file);
924         else
925                 resp_format(req, file);
926 }
927
928 static void
929 pg_show(struct req *req, const char *fullpath)
930 {
931         char            *manpath;
932         const char      *file;
933
934         if ((file = strchr(fullpath, '/')) == NULL) {
935                 pg_error_badrequest(
936                     "You did not specify a page to show.");
937                 return;
938         }
939         manpath = mandoc_strndup(fullpath, file - fullpath);
940         file++;
941
942         if ( ! validate_manpath(req, manpath)) {
943                 pg_error_badrequest(
944                     "You specified an invalid manpath.");
945                 free(manpath);
946                 return;
947         }
948
949         /*
950          * Begin by chdir()ing into the manpath.
951          * This way we can pick up the database files, which are
952          * relative to the manpath root.
953          */
954
955         if (chdir(manpath) == -1) {
956                 warn("chdir %s", manpath);
957                 pg_error_internal();
958                 free(manpath);
959                 return;
960         }
961         free(manpath);
962
963         if ( ! validate_filename(file)) {
964                 pg_error_badrequest(
965                     "You specified an invalid manual file.");
966                 return;
967         }
968
969         resp_begin_html(200, NULL, file);
970         resp_searchform(req, FOCUS_NONE);
971         resp_show(req, file);
972         resp_end_html();
973 }
974
975 static void
976 pg_search(const struct req *req)
977 {
978         struct mansearch          search;
979         struct manpaths           paths;
980         struct manpage           *res;
981         char                    **argv;
982         char                     *query, *rp, *wp;
983         size_t                    ressz;
984         int                       argc;
985
986         /*
987          * Begin by chdir()ing into the root of the manpath.
988          * This way we can pick up the database files, which are
989          * relative to the manpath root.
990          */
991
992         if (chdir(req->q.manpath) == -1) {
993                 warn("chdir %s", req->q.manpath);
994                 pg_error_internal();
995                 return;
996         }
997
998         search.arch = req->q.arch;
999         search.sec = req->q.sec;
1000         search.outkey = "Nd";
1001         search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
1002         search.firstmatch = 1;
1003
1004         paths.sz = 1;
1005         paths.paths = mandoc_malloc(sizeof(char *));
1006         paths.paths[0] = mandoc_strdup(".");
1007
1008         /*
1009          * Break apart at spaces with backslash-escaping.
1010          */
1011
1012         argc = 0;
1013         argv = NULL;
1014         rp = query = mandoc_strdup(req->q.query);
1015         for (;;) {
1016                 while (isspace((unsigned char)*rp))
1017                         rp++;
1018                 if (*rp == '\0')
1019                         break;
1020                 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1021                 argv[argc++] = wp = rp;
1022                 for (;;) {
1023                         if (isspace((unsigned char)*rp)) {
1024                                 *wp = '\0';
1025                                 rp++;
1026                                 break;
1027                         }
1028                         if (rp[0] == '\\' && rp[1] != '\0')
1029                                 rp++;
1030                         if (wp != rp)
1031                                 *wp = *rp;
1032                         if (*rp == '\0')
1033                                 break;
1034                         wp++;
1035                         rp++;
1036                 }
1037         }
1038
1039         res = NULL;
1040         ressz = 0;
1041         if (req->isquery && req->q.equal && argc == 1)
1042                 pg_redirect(req, argv[0]);
1043         else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1044                 pg_noresult(req, 400, "Bad Request",
1045                     "You entered an invalid query.");
1046         else if (ressz == 0)
1047                 pg_noresult(req, 404, "Not Found", "No results found.");
1048         else
1049                 pg_searchres(req, res, ressz);
1050
1051         free(query);
1052         mansearch_free(res, ressz);
1053         free(paths.paths[0]);
1054         free(paths.paths);
1055 }
1056
1057 int
1058 main(void)
1059 {
1060         struct req       req;
1061         struct itimerval itimer;
1062         const char      *path;
1063         const char      *querystring;
1064         int              i;
1065
1066 #if HAVE_PLEDGE
1067         /*
1068          * The "rpath" pledge could be revoked after mparse_readfd()
1069          * if the file desciptor to "/footer.html" would be opened
1070          * up front, but it's probably not worth the complication
1071          * of the code it would cause: it would require scattering
1072          * pledge() calls in multiple low-level resp_*() functions.
1073          */
1074
1075         if (pledge("stdio rpath", NULL) == -1) {
1076                 warn("pledge");
1077                 pg_error_internal();
1078                 return EXIT_FAILURE;
1079         }
1080 #endif
1081
1082         /* Poor man's ReDoS mitigation. */
1083
1084         itimer.it_value.tv_sec = 2;
1085         itimer.it_value.tv_usec = 0;
1086         itimer.it_interval.tv_sec = 2;
1087         itimer.it_interval.tv_usec = 0;
1088         if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1089                 warn("setitimer");
1090                 pg_error_internal();
1091                 return EXIT_FAILURE;
1092         }
1093
1094         /*
1095          * First we change directory into the MAN_DIR so that
1096          * subsequent scanning for manpath directories is rooted
1097          * relative to the same position.
1098          */
1099
1100         if (chdir(MAN_DIR) == -1) {
1101                 warn("MAN_DIR: %s", MAN_DIR);
1102                 pg_error_internal();
1103                 return EXIT_FAILURE;
1104         }
1105
1106         memset(&req, 0, sizeof(struct req));
1107         req.q.equal = 1;
1108         parse_manpath_conf(&req);
1109
1110         /* Parse the path info and the query string. */
1111
1112         if ((path = getenv("PATH_INFO")) == NULL)
1113                 path = "";
1114         else if (*path == '/')
1115                 path++;
1116
1117         if (*path != '\0') {
1118                 parse_path_info(&req, path);
1119                 if (req.q.manpath == NULL || req.q.sec == NULL ||
1120                     *req.q.query == '\0' || access(path, F_OK) == -1)
1121                         path = "";
1122         } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1123                 parse_query_string(&req, querystring);
1124
1125         /* Validate parsed data and add defaults. */
1126
1127         if (req.q.manpath == NULL)
1128                 req.q.manpath = mandoc_strdup(req.p[0]);
1129         else if ( ! validate_manpath(&req, req.q.manpath)) {
1130                 pg_error_badrequest(
1131                     "You specified an invalid manpath.");
1132                 return EXIT_FAILURE;
1133         }
1134
1135         if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1136                 pg_error_badrequest(
1137                     "You specified an invalid architecture.");
1138                 return EXIT_FAILURE;
1139         }
1140
1141         /* Dispatch to the three different pages. */
1142
1143         if ('\0' != *path)
1144                 pg_show(&req, path);
1145         else if (NULL != req.q.query)
1146                 pg_search(&req);
1147         else
1148                 pg_index(&req);
1149
1150         free(req.q.manpath);
1151         free(req.q.arch);
1152         free(req.q.sec);
1153         free(req.q.query);
1154         for (i = 0; i < (int)req.psz; i++)
1155                 free(req.p[i]);
1156         free(req.p);
1157         return EXIT_SUCCESS;
1158 }
1159
1160 /*
1161  * Translate PATH_INFO to a query.
1162  */
1163 static void
1164 parse_path_info(struct req *req, const char *path)
1165 {
1166         const char      *name, *sec, *end;
1167
1168         req->isquery = 0;
1169         req->q.equal = 1;
1170         req->q.manpath = NULL;
1171         req->q.arch = NULL;
1172
1173         /* Mandatory manual page name. */
1174         if ((name = strrchr(path, '/')) == NULL)
1175                 name = path;
1176         else
1177                 name++;
1178
1179         /* Optional trailing section. */
1180         sec = strrchr(name, '.');
1181         if (sec != NULL && isdigit((unsigned char)*++sec)) {
1182                 req->q.query = mandoc_strndup(name, sec - name - 1);
1183                 req->q.sec = mandoc_strdup(sec);
1184         } else {
1185                 req->q.query = mandoc_strdup(name);
1186                 req->q.sec = NULL;
1187         }
1188
1189         /* Handle the case of name[.section] only. */
1190         if (name == path)
1191                 return;
1192
1193         /* Optional manpath. */
1194         end = strchr(path, '/');
1195         req->q.manpath = mandoc_strndup(path, end - path);
1196         if (validate_manpath(req, req->q.manpath)) {
1197                 path = end + 1;
1198                 if (name == path)
1199                         return;
1200         } else {
1201                 free(req->q.manpath);
1202                 req->q.manpath = NULL;
1203         }
1204
1205         /* Optional section. */
1206         if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1207                 path += 3;
1208                 end = strchr(path, '/');
1209                 free(req->q.sec);
1210                 req->q.sec = mandoc_strndup(path, end - path);
1211                 path = end + 1;
1212                 if (name == path)
1213                         return;
1214         }
1215
1216         /* Optional architecture. */
1217         end = strchr(path, '/');
1218         if (end + 1 != name) {
1219                 pg_error_badrequest(
1220                     "You specified too many directory components.");
1221                 exit(EXIT_FAILURE);
1222         }
1223         req->q.arch = mandoc_strndup(path, end - path);
1224         if (validate_arch(req->q.arch) == 0) {
1225                 pg_error_badrequest(
1226                     "You specified an invalid directory component.");
1227                 exit(EXIT_FAILURE);
1228         }
1229 }
1230
1231 /*
1232  * Scan for indexable paths.
1233  */
1234 static void
1235 parse_manpath_conf(struct req *req)
1236 {
1237         FILE    *fp;
1238         char    *dp;
1239         size_t   dpsz;
1240         ssize_t  len;
1241
1242         if ((fp = fopen("manpath.conf", "r")) == NULL) {
1243                 warn("%s/manpath.conf", MAN_DIR);
1244                 pg_error_internal();
1245                 exit(EXIT_FAILURE);
1246         }
1247
1248         dp = NULL;
1249         dpsz = 0;
1250
1251         while ((len = getline(&dp, &dpsz, fp)) != -1) {
1252                 if (dp[len - 1] == '\n')
1253                         dp[--len] = '\0';
1254                 req->p = mandoc_realloc(req->p,
1255                     (req->psz + 1) * sizeof(char *));
1256                 if ( ! validate_urifrag(dp)) {
1257                         warnx("%s/manpath.conf contains "
1258                             "unsafe path \"%s\"", MAN_DIR, dp);
1259                         pg_error_internal();
1260                         exit(EXIT_FAILURE);
1261                 }
1262                 if (strchr(dp, '/') != NULL) {
1263                         warnx("%s/manpath.conf contains "
1264                             "path with slash \"%s\"", MAN_DIR, dp);
1265                         pg_error_internal();
1266                         exit(EXIT_FAILURE);
1267                 }
1268                 req->p[req->psz++] = dp;
1269                 dp = NULL;
1270                 dpsz = 0;
1271         }
1272         free(dp);
1273
1274         if (req->p == NULL) {
1275                 warnx("%s/manpath.conf is empty", MAN_DIR);
1276                 pg_error_internal();
1277                 exit(EXIT_FAILURE);
1278         }
1279 }