]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/mdocml/cgi.c
bsdinstall: Make sure chroot filesystems are umounted after use
[FreeBSD/FreeBSD.git] / contrib / mdocml / cgi.c
1 /*      $Id: cgi.c,v 1.144 2017/01/21 01:20:31 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #include <err.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <limits.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <unistd.h>
33
34 #include "mandoc_aux.h"
35 #include "mandoc.h"
36 #include "roff.h"
37 #include "mdoc.h"
38 #include "man.h"
39 #include "main.h"
40 #include "manconf.h"
41 #include "mansearch.h"
42 #include "cgi.h"
43
44 /*
45  * A query as passed to the search function.
46  */
47 struct  query {
48         char            *manpath; /* desired manual directory */
49         char            *arch; /* architecture */
50         char            *sec; /* manual section */
51         char            *query; /* unparsed query expression */
52         int              equal; /* match whole names, not substrings */
53 };
54
55 struct  req {
56         struct query      q;
57         char            **p; /* array of available manpaths */
58         size_t            psz; /* number of available manpaths */
59         int               isquery; /* QUERY_STRING used, not PATH_INFO */
60 };
61
62 enum    focus {
63         FOCUS_NONE = 0,
64         FOCUS_QUERY
65 };
66
67 static  void             html_print(const char *);
68 static  void             html_putchar(char);
69 static  int              http_decode(char *);
70 static  void             parse_manpath_conf(struct req *);
71 static  void             parse_path_info(struct req *req, const char *path);
72 static  void             parse_query_string(struct req *, const char *);
73 static  void             pg_error_badrequest(const char *);
74 static  void             pg_error_internal(void);
75 static  void             pg_index(const struct req *);
76 static  void             pg_noresult(const struct req *, const char *);
77 static  void             pg_search(const struct req *);
78 static  void             pg_searchres(const struct req *,
79                                 struct manpage *, size_t);
80 static  void             pg_show(struct req *, const char *);
81 static  void             resp_begin_html(int, const char *);
82 static  void             resp_begin_http(int, const char *);
83 static  void             resp_catman(const struct req *, const char *);
84 static  void             resp_copy(const char *);
85 static  void             resp_end_html(void);
86 static  void             resp_format(const struct req *, const char *);
87 static  void             resp_searchform(const struct req *, enum focus);
88 static  void             resp_show(const struct req *, const char *);
89 static  void             set_query_attr(char **, char **);
90 static  int              validate_filename(const char *);
91 static  int              validate_manpath(const struct req *, const char *);
92 static  int              validate_urifrag(const char *);
93
94 static  const char       *scriptname = SCRIPT_NAME;
95
96 static  const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
97 static  const char *const sec_numbers[] = {
98     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
99 };
100 static  const char *const sec_names[] = {
101     "All Sections",
102     "1 - General Commands",
103     "2 - System Calls",
104     "3 - Library Functions",
105     "3p - Perl Library",
106     "4 - Device Drivers",
107     "5 - File Formats",
108     "6 - Games",
109     "7 - Miscellaneous Information",
110     "8 - System Manager\'s Manual",
111     "9 - Kernel Developer\'s Manual"
112 };
113 static  const int sec_MAX = sizeof(sec_names) / sizeof(char *);
114
115 static  const char *const arch_names[] = {
116     "amd64",       "alpha",       "armv7",
117     "hppa",        "i386",        "landisk",
118     "loongson",    "luna88k",     "macppc",      "mips64",
119     "octeon",      "sgi",         "socppc",      "sparc64",
120     "amiga",       "arc",         "armish",      "arm32",
121     "atari",       "aviion",      "beagle",      "cats",
122     "hppa64",      "hp300",
123     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
124     "mvmeppc",     "palm",        "pc532",       "pegasos",
125     "pmax",        "powerpc",     "solbourne",   "sparc",
126     "sun3",        "vax",         "wgrisc",      "x68k",
127     "zaurus"
128 };
129 static  const int arch_MAX = sizeof(arch_names) / sizeof(char *);
130
131 /*
132  * Print a character, escaping HTML along the way.
133  * This will pass non-ASCII straight to output: be warned!
134  */
135 static void
136 html_putchar(char c)
137 {
138
139         switch (c) {
140         case ('"'):
141                 printf("&quot;");
142                 break;
143         case ('&'):
144                 printf("&amp;");
145                 break;
146         case ('>'):
147                 printf("&gt;");
148                 break;
149         case ('<'):
150                 printf("&lt;");
151                 break;
152         default:
153                 putchar((unsigned char)c);
154                 break;
155         }
156 }
157
158 /*
159  * Call through to html_putchar().
160  * Accepts NULL strings.
161  */
162 static void
163 html_print(const char *p)
164 {
165
166         if (NULL == p)
167                 return;
168         while ('\0' != *p)
169                 html_putchar(*p++);
170 }
171
172 /*
173  * Transfer the responsibility for the allocated string *val
174  * to the query structure.
175  */
176 static void
177 set_query_attr(char **attr, char **val)
178 {
179
180         free(*attr);
181         if (**val == '\0') {
182                 *attr = NULL;
183                 free(*val);
184         } else
185                 *attr = *val;
186         *val = NULL;
187 }
188
189 /*
190  * Parse the QUERY_STRING for key-value pairs
191  * and store the values into the query structure.
192  */
193 static void
194 parse_query_string(struct req *req, const char *qs)
195 {
196         char            *key, *val;
197         size_t           keysz, valsz;
198
199         req->isquery    = 1;
200         req->q.manpath  = NULL;
201         req->q.arch     = NULL;
202         req->q.sec      = NULL;
203         req->q.query    = NULL;
204         req->q.equal    = 1;
205
206         key = val = NULL;
207         while (*qs != '\0') {
208
209                 /* Parse one key. */
210
211                 keysz = strcspn(qs, "=;&");
212                 key = mandoc_strndup(qs, keysz);
213                 qs += keysz;
214                 if (*qs != '=')
215                         goto next;
216
217                 /* Parse one value. */
218
219                 valsz = strcspn(++qs, ";&");
220                 val = mandoc_strndup(qs, valsz);
221                 qs += valsz;
222
223                 /* Decode and catch encoding errors. */
224
225                 if ( ! (http_decode(key) && http_decode(val)))
226                         goto next;
227
228                 /* Handle key-value pairs. */
229
230                 if ( ! strcmp(key, "query"))
231                         set_query_attr(&req->q.query, &val);
232
233                 else if ( ! strcmp(key, "apropos"))
234                         req->q.equal = !strcmp(val, "0");
235
236                 else if ( ! strcmp(key, "manpath")) {
237 #ifdef COMPAT_OLDURI
238                         if ( ! strncmp(val, "OpenBSD ", 8)) {
239                                 val[7] = '-';
240                                 if ('C' == val[8])
241                                         val[8] = 'c';
242                         }
243 #endif
244                         set_query_attr(&req->q.manpath, &val);
245                 }
246
247                 else if ( ! (strcmp(key, "sec")
248 #ifdef COMPAT_OLDURI
249                     && strcmp(key, "sektion")
250 #endif
251                     )) {
252                         if ( ! strcmp(val, "0"))
253                                 *val = '\0';
254                         set_query_attr(&req->q.sec, &val);
255                 }
256
257                 else if ( ! strcmp(key, "arch")) {
258                         if ( ! strcmp(val, "default"))
259                                 *val = '\0';
260                         set_query_attr(&req->q.arch, &val);
261                 }
262
263                 /*
264                  * The key must be freed in any case.
265                  * The val may have been handed over to the query
266                  * structure, in which case it is now NULL.
267                  */
268 next:
269                 free(key);
270                 key = NULL;
271                 free(val);
272                 val = NULL;
273
274                 if (*qs != '\0')
275                         qs++;
276         }
277 }
278
279 /*
280  * HTTP-decode a string.  The standard explanation is that this turns
281  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
282  * over the allocated string.
283  */
284 static int
285 http_decode(char *p)
286 {
287         char             hex[3];
288         char            *q;
289         int              c;
290
291         hex[2] = '\0';
292
293         q = p;
294         for ( ; '\0' != *p; p++, q++) {
295                 if ('%' == *p) {
296                         if ('\0' == (hex[0] = *(p + 1)))
297                                 return 0;
298                         if ('\0' == (hex[1] = *(p + 2)))
299                                 return 0;
300                         if (1 != sscanf(hex, "%x", &c))
301                                 return 0;
302                         if ('\0' == c)
303                                 return 0;
304
305                         *q = (char)c;
306                         p += 2;
307                 } else
308                         *q = '+' == *p ? ' ' : *p;
309         }
310
311         *q = '\0';
312         return 1;
313 }
314
315 static void
316 resp_begin_http(int code, const char *msg)
317 {
318
319         if (200 != code)
320                 printf("Status: %d %s\r\n", code, msg);
321
322         printf("Content-Type: text/html; charset=utf-8\r\n"
323              "Cache-Control: no-cache\r\n"
324              "Pragma: no-cache\r\n"
325              "\r\n");
326
327         fflush(stdout);
328 }
329
330 static void
331 resp_copy(const char *filename)
332 {
333         char     buf[4096];
334         ssize_t  sz;
335         int      fd;
336
337         if ((fd = open(filename, O_RDONLY)) != -1) {
338                 fflush(stdout);
339                 while ((sz = read(fd, buf, sizeof(buf))) > 0)
340                         write(STDOUT_FILENO, buf, sz);
341                 close(fd);
342         }
343 }
344
345 static void
346 resp_begin_html(int code, const char *msg)
347 {
348
349         resp_begin_http(code, msg);
350
351         printf("<!DOCTYPE html>\n"
352                "<html>\n"
353                "<head>\n"
354                "  <meta charset=\"UTF-8\"/>\n"
355                "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
356                " type=\"text/css\" media=\"all\">\n"
357                "  <title>%s</title>\n"
358                "</head>\n"
359                "<body>\n",
360                CSS_DIR, CUSTOMIZE_TITLE);
361
362         resp_copy(MAN_DIR "/header.html");
363 }
364
365 static void
366 resp_end_html(void)
367 {
368
369         resp_copy(MAN_DIR "/footer.html");
370
371         puts("</body>\n"
372              "</html>");
373 }
374
375 static void
376 resp_searchform(const struct req *req, enum focus focus)
377 {
378         int              i;
379
380         printf("<form action=\"/%s\" method=\"get\">\n"
381                "  <fieldset>\n"
382                "    <legend>Manual Page Search Parameters</legend>\n",
383                scriptname);
384
385         /* Write query input box. */
386
387         printf("    <input type=\"text\" name=\"query\" value=\"");
388         if (req->q.query != NULL)
389                 html_print(req->q.query);
390         printf( "\" size=\"40\"");
391         if (focus == FOCUS_QUERY)
392                 printf(" autofocus");
393         puts(">");
394
395         /* Write submission buttons. */
396
397         printf( "    <button type=\"submit\" name=\"apropos\" value=\"0\">"
398                 "man</button>\n"
399                 "    <button type=\"submit\" name=\"apropos\" value=\"1\">"
400                 "apropos</button>\n"
401                 "    <br/>\n");
402
403         /* Write section selector. */
404
405         puts("    <select name=\"sec\">");
406         for (i = 0; i < sec_MAX; i++) {
407                 printf("      <option value=\"%s\"", sec_numbers[i]);
408                 if (NULL != req->q.sec &&
409                     0 == strcmp(sec_numbers[i], req->q.sec))
410                         printf(" selected=\"selected\"");
411                 printf(">%s</option>\n", sec_names[i]);
412         }
413         puts("    </select>");
414
415         /* Write architecture selector. */
416
417         printf( "    <select name=\"arch\">\n"
418                 "      <option value=\"default\"");
419         if (NULL == req->q.arch)
420                 printf(" selected=\"selected\"");
421         puts(">All Architectures</option>");
422         for (i = 0; i < arch_MAX; i++) {
423                 printf("      <option value=\"%s\"", arch_names[i]);
424                 if (NULL != req->q.arch &&
425                     0 == strcmp(arch_names[i], req->q.arch))
426                         printf(" selected=\"selected\"");
427                 printf(">%s</option>\n", arch_names[i]);
428         }
429         puts("    </select>");
430
431         /* Write manpath selector. */
432
433         if (req->psz > 1) {
434                 puts("    <select name=\"manpath\">");
435                 for (i = 0; i < (int)req->psz; i++) {
436                         printf("      <option ");
437                         if (strcmp(req->q.manpath, req->p[i]) == 0)
438                                 printf("selected=\"selected\" ");
439                         printf("value=\"");
440                         html_print(req->p[i]);
441                         printf("\">");
442                         html_print(req->p[i]);
443                         puts("</option>");
444                 }
445                 puts("    </select>");
446         }
447
448         puts("  </fieldset>\n"
449              "</form>");
450 }
451
452 static int
453 validate_urifrag(const char *frag)
454 {
455
456         while ('\0' != *frag) {
457                 if ( ! (isalnum((unsigned char)*frag) ||
458                     '-' == *frag || '.' == *frag ||
459                     '/' == *frag || '_' == *frag))
460                         return 0;
461                 frag++;
462         }
463         return 1;
464 }
465
466 static int
467 validate_manpath(const struct req *req, const char* manpath)
468 {
469         size_t   i;
470
471         for (i = 0; i < req->psz; i++)
472                 if ( ! strcmp(manpath, req->p[i]))
473                         return 1;
474
475         return 0;
476 }
477
478 static int
479 validate_filename(const char *file)
480 {
481
482         if ('.' == file[0] && '/' == file[1])
483                 file += 2;
484
485         return ! (strstr(file, "../") || strstr(file, "/..") ||
486             (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
487 }
488
489 static void
490 pg_index(const struct req *req)
491 {
492
493         resp_begin_html(200, NULL);
494         resp_searchform(req, FOCUS_QUERY);
495         printf("<p>\n"
496                "This web interface is documented in the\n"
497                "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
498                "manual, and the\n"
499                "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
500                "manual explains the query syntax.\n"
501                "</p>\n",
502                scriptname, *scriptname == '\0' ? "" : "/",
503                scriptname, *scriptname == '\0' ? "" : "/");
504         resp_end_html();
505 }
506
507 static void
508 pg_noresult(const struct req *req, const char *msg)
509 {
510         resp_begin_html(200, NULL);
511         resp_searchform(req, FOCUS_QUERY);
512         puts("<p>");
513         puts(msg);
514         puts("</p>");
515         resp_end_html();
516 }
517
518 static void
519 pg_error_badrequest(const char *msg)
520 {
521
522         resp_begin_html(400, "Bad Request");
523         puts("<h1>Bad Request</h1>\n"
524              "<p>\n");
525         puts(msg);
526         printf("Try again from the\n"
527                "<a href=\"/%s\">main page</a>.\n"
528                "</p>", scriptname);
529         resp_end_html();
530 }
531
532 static void
533 pg_error_internal(void)
534 {
535         resp_begin_html(500, "Internal Server Error");
536         puts("<p>Internal Server Error</p>");
537         resp_end_html();
538 }
539
540 static void
541 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
542 {
543         char            *arch, *archend;
544         const char      *sec;
545         size_t           i, iuse;
546         int              archprio, archpriouse;
547         int              prio, priouse;
548
549         for (i = 0; i < sz; i++) {
550                 if (validate_filename(r[i].file))
551                         continue;
552                 warnx("invalid filename %s in %s database",
553                     r[i].file, req->q.manpath);
554                 pg_error_internal();
555                 return;
556         }
557
558         if (req->isquery && sz == 1) {
559                 /*
560                  * If we have just one result, then jump there now
561                  * without any delay.
562                  */
563                 printf("Status: 303 See Other\r\n");
564                 printf("Location: http://%s/%s%s%s/%s",
565                     HTTP_HOST, scriptname,
566                     *scriptname == '\0' ? "" : "/",
567                     req->q.manpath, r[0].file);
568                 printf("\r\n"
569                      "Content-Type: text/html; charset=utf-8\r\n"
570                      "\r\n");
571                 return;
572         }
573
574         resp_begin_html(200, NULL);
575         resp_searchform(req,
576             req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
577
578         if (sz > 1) {
579                 puts("<table class=\"results\">");
580                 for (i = 0; i < sz; i++) {
581                         printf("  <tr>\n"
582                                "    <td>"
583                                "<a class=\"Xr\" href=\"/%s%s%s/%s\">",
584                             scriptname, *scriptname == '\0' ? "" : "/",
585                             req->q.manpath, r[i].file);
586                         html_print(r[i].names);
587                         printf("</a></td>\n"
588                                "    <td><span class=\"Nd\">");
589                         html_print(r[i].output);
590                         puts("</span></td>\n"
591                              "  </tr>");
592                 }
593                 puts("</table>");
594         }
595
596         /*
597          * In man(1) mode, show one of the pages
598          * even if more than one is found.
599          */
600
601         if (req->q.equal || sz == 1) {
602                 puts("<hr>");
603                 iuse = 0;
604                 priouse = 20;
605                 archpriouse = 3;
606                 for (i = 0; i < sz; i++) {
607                         sec = r[i].file;
608                         sec += strcspn(sec, "123456789");
609                         if (sec[0] == '\0')
610                                 continue;
611                         prio = sec_prios[sec[0] - '1'];
612                         if (sec[1] != '/')
613                                 prio += 10;
614                         if (req->q.arch == NULL) {
615                                 archprio =
616                                     ((arch = strchr(sec + 1, '/'))
617                                         == NULL) ? 3 :
618                                     ((archend = strchr(arch + 1, '/'))
619                                         == NULL) ? 0 :
620                                     strncmp(arch, "amd64/",
621                                         archend - arch) ? 2 : 1;
622                                 if (archprio < archpriouse) {
623                                         archpriouse = archprio;
624                                         priouse = prio;
625                                         iuse = i;
626                                         continue;
627                                 }
628                                 if (archprio > archpriouse)
629                                         continue;
630                         }
631                         if (prio >= priouse)
632                                 continue;
633                         priouse = prio;
634                         iuse = i;
635                 }
636                 resp_show(req, r[iuse].file);
637         }
638
639         resp_end_html();
640 }
641
642 static void
643 resp_catman(const struct req *req, const char *file)
644 {
645         FILE            *f;
646         char            *p;
647         size_t           sz;
648         ssize_t          len;
649         int              i;
650         int              italic, bold;
651
652         if ((f = fopen(file, "r")) == NULL) {
653                 puts("<p>You specified an invalid manual file.</p>");
654                 return;
655         }
656
657         puts("<div class=\"catman\">\n"
658              "<pre>");
659
660         p = NULL;
661         sz = 0;
662
663         while ((len = getline(&p, &sz, f)) != -1) {
664                 bold = italic = 0;
665                 for (i = 0; i < len - 1; i++) {
666                         /*
667                          * This means that the catpage is out of state.
668                          * Ignore it and keep going (although the
669                          * catpage is bogus).
670                          */
671
672                         if ('\b' == p[i] || '\n' == p[i])
673                                 continue;
674
675                         /*
676                          * Print a regular character.
677                          * Close out any bold/italic scopes.
678                          * If we're in back-space mode, make sure we'll
679                          * have something to enter when we backspace.
680                          */
681
682                         if ('\b' != p[i + 1]) {
683                                 if (italic)
684                                         printf("</i>");
685                                 if (bold)
686                                         printf("</b>");
687                                 italic = bold = 0;
688                                 html_putchar(p[i]);
689                                 continue;
690                         } else if (i + 2 >= len)
691                                 continue;
692
693                         /* Italic mode. */
694
695                         if ('_' == p[i]) {
696                                 if (bold)
697                                         printf("</b>");
698                                 if ( ! italic)
699                                         printf("<i>");
700                                 bold = 0;
701                                 italic = 1;
702                                 i += 2;
703                                 html_putchar(p[i]);
704                                 continue;
705                         }
706
707                         /*
708                          * Handle funny behaviour troff-isms.
709                          * These grok'd from the original man2html.c.
710                          */
711
712                         if (('+' == p[i] && 'o' == p[i + 2]) ||
713                                         ('o' == p[i] && '+' == p[i + 2]) ||
714                                         ('|' == p[i] && '=' == p[i + 2]) ||
715                                         ('=' == p[i] && '|' == p[i + 2]) ||
716                                         ('*' == p[i] && '=' == p[i + 2]) ||
717                                         ('=' == p[i] && '*' == p[i + 2]) ||
718                                         ('*' == p[i] && '|' == p[i + 2]) ||
719                                         ('|' == p[i] && '*' == p[i + 2]))  {
720                                 if (italic)
721                                         printf("</i>");
722                                 if (bold)
723                                         printf("</b>");
724                                 italic = bold = 0;
725                                 putchar('*');
726                                 i += 2;
727                                 continue;
728                         } else if (('|' == p[i] && '-' == p[i + 2]) ||
729                                         ('-' == p[i] && '|' == p[i + 1]) ||
730                                         ('+' == p[i] && '-' == p[i + 1]) ||
731                                         ('-' == p[i] && '+' == p[i + 1]) ||
732                                         ('+' == p[i] && '|' == p[i + 1]) ||
733                                         ('|' == p[i] && '+' == p[i + 1]))  {
734                                 if (italic)
735                                         printf("</i>");
736                                 if (bold)
737                                         printf("</b>");
738                                 italic = bold = 0;
739                                 putchar('+');
740                                 i += 2;
741                                 continue;
742                         }
743
744                         /* Bold mode. */
745
746                         if (italic)
747                                 printf("</i>");
748                         if ( ! bold)
749                                 printf("<b>");
750                         bold = 1;
751                         italic = 0;
752                         i += 2;
753                         html_putchar(p[i]);
754                 }
755
756                 /*
757                  * Clean up the last character.
758                  * We can get to a newline; don't print that.
759                  */
760
761                 if (italic)
762                         printf("</i>");
763                 if (bold)
764                         printf("</b>");
765
766                 if (i == len - 1 && p[i] != '\n')
767                         html_putchar(p[i]);
768
769                 putchar('\n');
770         }
771         free(p);
772
773         puts("</pre>\n"
774              "</div>");
775
776         fclose(f);
777 }
778
779 static void
780 resp_format(const struct req *req, const char *file)
781 {
782         struct manoutput conf;
783         struct mparse   *mp;
784         struct roff_man *man;
785         void            *vp;
786         int              fd;
787         int              usepath;
788
789         if (-1 == (fd = open(file, O_RDONLY, 0))) {
790                 puts("<p>You specified an invalid manual file.</p>");
791                 return;
792         }
793
794         mchars_alloc();
795         mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
796             MANDOCLEVEL_BADARG, NULL, req->q.manpath);
797         mparse_readfd(mp, fd, file);
798         close(fd);
799
800         memset(&conf, 0, sizeof(conf));
801         conf.fragment = 1;
802         usepath = strcmp(req->q.manpath, req->p[0]);
803         mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
804             usepath ? req->q.manpath : "", usepath ? "/" : "");
805
806         mparse_result(mp, &man, NULL);
807         if (man == NULL) {
808                 warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
809                 pg_error_internal();
810                 mparse_free(mp);
811                 mchars_free();
812                 return;
813         }
814
815         vp = html_alloc(&conf);
816
817         if (man->macroset == MACROSET_MDOC) {
818                 mdoc_validate(man);
819                 html_mdoc(vp, man);
820         } else {
821                 man_validate(man);
822                 html_man(vp, man);
823         }
824
825         html_free(vp);
826         mparse_free(mp);
827         mchars_free();
828         free(conf.man);
829 }
830
831 static void
832 resp_show(const struct req *req, const char *file)
833 {
834
835         if ('.' == file[0] && '/' == file[1])
836                 file += 2;
837
838         if ('c' == *file)
839                 resp_catman(req, file);
840         else
841                 resp_format(req, file);
842 }
843
844 static void
845 pg_show(struct req *req, const char *fullpath)
846 {
847         char            *manpath;
848         const char      *file;
849
850         if ((file = strchr(fullpath, '/')) == NULL) {
851                 pg_error_badrequest(
852                     "You did not specify a page to show.");
853                 return;
854         }
855         manpath = mandoc_strndup(fullpath, file - fullpath);
856         file++;
857
858         if ( ! validate_manpath(req, manpath)) {
859                 pg_error_badrequest(
860                     "You specified an invalid manpath.");
861                 free(manpath);
862                 return;
863         }
864
865         /*
866          * Begin by chdir()ing into the manpath.
867          * This way we can pick up the database files, which are
868          * relative to the manpath root.
869          */
870
871         if (chdir(manpath) == -1) {
872                 warn("chdir %s", manpath);
873                 pg_error_internal();
874                 free(manpath);
875                 return;
876         }
877         free(manpath);
878
879         if ( ! validate_filename(file)) {
880                 pg_error_badrequest(
881                     "You specified an invalid manual file.");
882                 return;
883         }
884
885         resp_begin_html(200, NULL);
886         resp_searchform(req, FOCUS_NONE);
887         resp_show(req, file);
888         resp_end_html();
889 }
890
891 static void
892 pg_search(const struct req *req)
893 {
894         struct mansearch          search;
895         struct manpaths           paths;
896         struct manpage           *res;
897         char                    **argv;
898         char                     *query, *rp, *wp;
899         size_t                    ressz;
900         int                       argc;
901
902         /*
903          * Begin by chdir()ing into the root of the manpath.
904          * This way we can pick up the database files, which are
905          * relative to the manpath root.
906          */
907
908         if (chdir(req->q.manpath) == -1) {
909                 warn("chdir %s", req->q.manpath);
910                 pg_error_internal();
911                 return;
912         }
913
914         search.arch = req->q.arch;
915         search.sec = req->q.sec;
916         search.outkey = "Nd";
917         search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
918         search.firstmatch = 1;
919
920         paths.sz = 1;
921         paths.paths = mandoc_malloc(sizeof(char *));
922         paths.paths[0] = mandoc_strdup(".");
923
924         /*
925          * Break apart at spaces with backslash-escaping.
926          */
927
928         argc = 0;
929         argv = NULL;
930         rp = query = mandoc_strdup(req->q.query);
931         for (;;) {
932                 while (isspace((unsigned char)*rp))
933                         rp++;
934                 if (*rp == '\0')
935                         break;
936                 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
937                 argv[argc++] = wp = rp;
938                 for (;;) {
939                         if (isspace((unsigned char)*rp)) {
940                                 *wp = '\0';
941                                 rp++;
942                                 break;
943                         }
944                         if (rp[0] == '\\' && rp[1] != '\0')
945                                 rp++;
946                         if (wp != rp)
947                                 *wp = *rp;
948                         if (*rp == '\0')
949                                 break;
950                         wp++;
951                         rp++;
952                 }
953         }
954
955         if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
956                 pg_noresult(req, "You entered an invalid query.");
957         else if (0 == ressz)
958                 pg_noresult(req, "No results found.");
959         else
960                 pg_searchres(req, res, ressz);
961
962         free(query);
963         mansearch_free(res, ressz);
964         free(paths.paths[0]);
965         free(paths.paths);
966 }
967
968 int
969 main(void)
970 {
971         struct req       req;
972         struct itimerval itimer;
973         const char      *path;
974         const char      *querystring;
975         int              i;
976
977         /* Poor man's ReDoS mitigation. */
978
979         itimer.it_value.tv_sec = 2;
980         itimer.it_value.tv_usec = 0;
981         itimer.it_interval.tv_sec = 2;
982         itimer.it_interval.tv_usec = 0;
983         if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
984                 warn("setitimer");
985                 pg_error_internal();
986                 return EXIT_FAILURE;
987         }
988
989         /*
990          * First we change directory into the MAN_DIR so that
991          * subsequent scanning for manpath directories is rooted
992          * relative to the same position.
993          */
994
995         if (chdir(MAN_DIR) == -1) {
996                 warn("MAN_DIR: %s", MAN_DIR);
997                 pg_error_internal();
998                 return EXIT_FAILURE;
999         }
1000
1001         memset(&req, 0, sizeof(struct req));
1002         req.q.equal = 1;
1003         parse_manpath_conf(&req);
1004
1005         /* Parse the path info and the query string. */
1006
1007         if ((path = getenv("PATH_INFO")) == NULL)
1008                 path = "";
1009         else if (*path == '/')
1010                 path++;
1011
1012         if (*path != '\0') {
1013                 parse_path_info(&req, path);
1014                 if (req.q.manpath == NULL || access(path, F_OK) == -1)
1015                         path = "";
1016         } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1017                 parse_query_string(&req, querystring);
1018
1019         /* Validate parsed data and add defaults. */
1020
1021         if (req.q.manpath == NULL)
1022                 req.q.manpath = mandoc_strdup(req.p[0]);
1023         else if ( ! validate_manpath(&req, req.q.manpath)) {
1024                 pg_error_badrequest(
1025                     "You specified an invalid manpath.");
1026                 return EXIT_FAILURE;
1027         }
1028
1029         if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1030                 pg_error_badrequest(
1031                     "You specified an invalid architecture.");
1032                 return EXIT_FAILURE;
1033         }
1034
1035         /* Dispatch to the three different pages. */
1036
1037         if ('\0' != *path)
1038                 pg_show(&req, path);
1039         else if (NULL != req.q.query)
1040                 pg_search(&req);
1041         else
1042                 pg_index(&req);
1043
1044         free(req.q.manpath);
1045         free(req.q.arch);
1046         free(req.q.sec);
1047         free(req.q.query);
1048         for (i = 0; i < (int)req.psz; i++)
1049                 free(req.p[i]);
1050         free(req.p);
1051         return EXIT_SUCCESS;
1052 }
1053
1054 /*
1055  * If PATH_INFO is not a file name, translate it to a query.
1056  */
1057 static void
1058 parse_path_info(struct req *req, const char *path)
1059 {
1060         char    *dir[4];
1061         int      i;
1062
1063         req->isquery = 0;
1064         req->q.equal = 1;
1065         req->q.manpath = mandoc_strdup(path);
1066         req->q.arch = NULL;
1067
1068         /* Mandatory manual page name. */
1069         if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1070                 req->q.query = req->q.manpath;
1071                 req->q.manpath = NULL;
1072         } else
1073                 *req->q.query++ = '\0';
1074
1075         /* Optional trailing section. */
1076         if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1077                 if(isdigit((unsigned char)req->q.sec[1])) {
1078                         *req->q.sec++ = '\0';
1079                         req->q.sec = mandoc_strdup(req->q.sec);
1080                 } else
1081                         req->q.sec = NULL;
1082         }
1083
1084         /* Handle the case of name[.section] only. */
1085         if (req->q.manpath == NULL)
1086                 return;
1087         req->q.query = mandoc_strdup(req->q.query);
1088
1089         /* Split directory components. */
1090         dir[i = 0] = req->q.manpath;
1091         while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1092                 if (++i == 3) {
1093                         pg_error_badrequest(
1094                             "You specified too many directory components.");
1095                         exit(EXIT_FAILURE);
1096                 }
1097                 *dir[i]++ = '\0';
1098         }
1099
1100         /* Optional manpath. */
1101         if ((i = validate_manpath(req, req->q.manpath)) == 0)
1102                 req->q.manpath = NULL;
1103         else if (dir[1] == NULL)
1104                 return;
1105
1106         /* Optional section. */
1107         if (strncmp(dir[i], "man", 3) == 0) {
1108                 free(req->q.sec);
1109                 req->q.sec = mandoc_strdup(dir[i++] + 3);
1110         }
1111         if (dir[i] == NULL) {
1112                 if (req->q.manpath == NULL)
1113                         free(dir[0]);
1114                 return;
1115         }
1116         if (dir[i + 1] != NULL) {
1117                 pg_error_badrequest(
1118                     "You specified an invalid directory component.");
1119                 exit(EXIT_FAILURE);
1120         }
1121
1122         /* Optional architecture. */
1123         if (i) {
1124                 req->q.arch = mandoc_strdup(dir[i]);
1125                 if (req->q.manpath == NULL)
1126                         free(dir[0]);
1127         } else
1128                 req->q.arch = dir[0];
1129 }
1130
1131 /*
1132  * Scan for indexable paths.
1133  */
1134 static void
1135 parse_manpath_conf(struct req *req)
1136 {
1137         FILE    *fp;
1138         char    *dp;
1139         size_t   dpsz;
1140         ssize_t  len;
1141
1142         if ((fp = fopen("manpath.conf", "r")) == NULL) {
1143                 warn("%s/manpath.conf", MAN_DIR);
1144                 pg_error_internal();
1145                 exit(EXIT_FAILURE);
1146         }
1147
1148         dp = NULL;
1149         dpsz = 0;
1150
1151         while ((len = getline(&dp, &dpsz, fp)) != -1) {
1152                 if (dp[len - 1] == '\n')
1153                         dp[--len] = '\0';
1154                 req->p = mandoc_realloc(req->p,
1155                     (req->psz + 1) * sizeof(char *));
1156                 if ( ! validate_urifrag(dp)) {
1157                         warnx("%s/manpath.conf contains "
1158                             "unsafe path \"%s\"", MAN_DIR, dp);
1159                         pg_error_internal();
1160                         exit(EXIT_FAILURE);
1161                 }
1162                 if (strchr(dp, '/') != NULL) {
1163                         warnx("%s/manpath.conf contains "
1164                             "path with slash \"%s\"", MAN_DIR, dp);
1165                         pg_error_internal();
1166                         exit(EXIT_FAILURE);
1167                 }
1168                 req->p[req->psz++] = dp;
1169                 dp = NULL;
1170                 dpsz = 0;
1171         }
1172         free(dp);
1173
1174         if (req->p == NULL) {
1175                 warnx("%s/manpath.conf is empty", MAN_DIR);
1176                 pg_error_internal();
1177                 exit(EXIT_FAILURE);
1178         }
1179 }