]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/mdocml/cgi.c
Merge bmake-20170510
[FreeBSD/FreeBSD.git] / contrib / mdocml / cgi.c
1 /*      $Id: cgi.c,v 1.147 2017/02/08 13:34:27 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "main.h"
42 #include "manconf.h"
43 #include "mansearch.h"
44 #include "cgi.h"
45
46 /*
47  * A query as passed to the search function.
48  */
49 struct  query {
50         char            *manpath; /* desired manual directory */
51         char            *arch; /* architecture */
52         char            *sec; /* manual section */
53         char            *query; /* unparsed query expression */
54         int              equal; /* match whole names, not substrings */
55 };
56
57 struct  req {
58         struct query      q;
59         char            **p; /* array of available manpaths */
60         size_t            psz; /* number of available manpaths */
61         int               isquery; /* QUERY_STRING used, not PATH_INFO */
62 };
63
64 enum    focus {
65         FOCUS_NONE = 0,
66         FOCUS_QUERY
67 };
68
69 static  void             html_print(const char *);
70 static  void             html_putchar(char);
71 static  int              http_decode(char *);
72 static  void             parse_manpath_conf(struct req *);
73 static  void             parse_path_info(struct req *req, const char *path);
74 static  void             parse_query_string(struct req *, const char *);
75 static  void             pg_error_badrequest(const char *);
76 static  void             pg_error_internal(void);
77 static  void             pg_index(const struct req *);
78 static  void             pg_noresult(const struct req *, const char *);
79 static  void             pg_search(const struct req *);
80 static  void             pg_searchres(const struct req *,
81                                 struct manpage *, size_t);
82 static  void             pg_show(struct req *, const char *);
83 static  void             resp_begin_html(int, const char *);
84 static  void             resp_begin_http(int, const char *);
85 static  void             resp_catman(const struct req *, const char *);
86 static  void             resp_copy(const char *);
87 static  void             resp_end_html(void);
88 static  void             resp_format(const struct req *, const char *);
89 static  void             resp_searchform(const struct req *, enum focus);
90 static  void             resp_show(const struct req *, const char *);
91 static  void             set_query_attr(char **, char **);
92 static  int              validate_filename(const char *);
93 static  int              validate_manpath(const struct req *, const char *);
94 static  int              validate_urifrag(const char *);
95
96 static  const char       *scriptname = SCRIPT_NAME;
97
98 static  const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
99 static  const char *const sec_numbers[] = {
100     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
101 };
102 static  const char *const sec_names[] = {
103     "All Sections",
104     "1 - General Commands",
105     "2 - System Calls",
106     "3 - Library Functions",
107     "3p - Perl Library",
108     "4 - Device Drivers",
109     "5 - File Formats",
110     "6 - Games",
111     "7 - Miscellaneous Information",
112     "8 - System Manager\'s Manual",
113     "9 - Kernel Developer\'s Manual"
114 };
115 static  const int sec_MAX = sizeof(sec_names) / sizeof(char *);
116
117 static  const char *const arch_names[] = {
118     "amd64",       "alpha",       "armv7",      "arm64",
119     "hppa",        "i386",        "landisk",
120     "loongson",    "luna88k",     "macppc",      "mips64",
121     "octeon",      "sgi",         "socppc",      "sparc64",
122     "amiga",       "arc",         "armish",      "arm32",
123     "atari",       "aviion",      "beagle",      "cats",
124     "hppa64",      "hp300",
125     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
126     "mvmeppc",     "palm",        "pc532",       "pegasos",
127     "pmax",        "powerpc",     "solbourne",   "sparc",
128     "sun3",        "vax",         "wgrisc",      "x68k",
129     "zaurus"
130 };
131 static  const int arch_MAX = sizeof(arch_names) / sizeof(char *);
132
133 /*
134  * Print a character, escaping HTML along the way.
135  * This will pass non-ASCII straight to output: be warned!
136  */
137 static void
138 html_putchar(char c)
139 {
140
141         switch (c) {
142         case ('"'):
143                 printf("&quot;");
144                 break;
145         case ('&'):
146                 printf("&amp;");
147                 break;
148         case ('>'):
149                 printf("&gt;");
150                 break;
151         case ('<'):
152                 printf("&lt;");
153                 break;
154         default:
155                 putchar((unsigned char)c);
156                 break;
157         }
158 }
159
160 /*
161  * Call through to html_putchar().
162  * Accepts NULL strings.
163  */
164 static void
165 html_print(const char *p)
166 {
167
168         if (NULL == p)
169                 return;
170         while ('\0' != *p)
171                 html_putchar(*p++);
172 }
173
174 /*
175  * Transfer the responsibility for the allocated string *val
176  * to the query structure.
177  */
178 static void
179 set_query_attr(char **attr, char **val)
180 {
181
182         free(*attr);
183         if (**val == '\0') {
184                 *attr = NULL;
185                 free(*val);
186         } else
187                 *attr = *val;
188         *val = NULL;
189 }
190
191 /*
192  * Parse the QUERY_STRING for key-value pairs
193  * and store the values into the query structure.
194  */
195 static void
196 parse_query_string(struct req *req, const char *qs)
197 {
198         char            *key, *val;
199         size_t           keysz, valsz;
200
201         req->isquery    = 1;
202         req->q.manpath  = NULL;
203         req->q.arch     = NULL;
204         req->q.sec      = NULL;
205         req->q.query    = NULL;
206         req->q.equal    = 1;
207
208         key = val = NULL;
209         while (*qs != '\0') {
210
211                 /* Parse one key. */
212
213                 keysz = strcspn(qs, "=;&");
214                 key = mandoc_strndup(qs, keysz);
215                 qs += keysz;
216                 if (*qs != '=')
217                         goto next;
218
219                 /* Parse one value. */
220
221                 valsz = strcspn(++qs, ";&");
222                 val = mandoc_strndup(qs, valsz);
223                 qs += valsz;
224
225                 /* Decode and catch encoding errors. */
226
227                 if ( ! (http_decode(key) && http_decode(val)))
228                         goto next;
229
230                 /* Handle key-value pairs. */
231
232                 if ( ! strcmp(key, "query"))
233                         set_query_attr(&req->q.query, &val);
234
235                 else if ( ! strcmp(key, "apropos"))
236                         req->q.equal = !strcmp(val, "0");
237
238                 else if ( ! strcmp(key, "manpath")) {
239 #ifdef COMPAT_OLDURI
240                         if ( ! strncmp(val, "OpenBSD ", 8)) {
241                                 val[7] = '-';
242                                 if ('C' == val[8])
243                                         val[8] = 'c';
244                         }
245 #endif
246                         set_query_attr(&req->q.manpath, &val);
247                 }
248
249                 else if ( ! (strcmp(key, "sec")
250 #ifdef COMPAT_OLDURI
251                     && strcmp(key, "sektion")
252 #endif
253                     )) {
254                         if ( ! strcmp(val, "0"))
255                                 *val = '\0';
256                         set_query_attr(&req->q.sec, &val);
257                 }
258
259                 else if ( ! strcmp(key, "arch")) {
260                         if ( ! strcmp(val, "default"))
261                                 *val = '\0';
262                         set_query_attr(&req->q.arch, &val);
263                 }
264
265                 /*
266                  * The key must be freed in any case.
267                  * The val may have been handed over to the query
268                  * structure, in which case it is now NULL.
269                  */
270 next:
271                 free(key);
272                 key = NULL;
273                 free(val);
274                 val = NULL;
275
276                 if (*qs != '\0')
277                         qs++;
278         }
279 }
280
281 /*
282  * HTTP-decode a string.  The standard explanation is that this turns
283  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
284  * over the allocated string.
285  */
286 static int
287 http_decode(char *p)
288 {
289         char             hex[3];
290         char            *q;
291         int              c;
292
293         hex[2] = '\0';
294
295         q = p;
296         for ( ; '\0' != *p; p++, q++) {
297                 if ('%' == *p) {
298                         if ('\0' == (hex[0] = *(p + 1)))
299                                 return 0;
300                         if ('\0' == (hex[1] = *(p + 2)))
301                                 return 0;
302                         if (1 != sscanf(hex, "%x", &c))
303                                 return 0;
304                         if ('\0' == c)
305                                 return 0;
306
307                         *q = (char)c;
308                         p += 2;
309                 } else
310                         *q = '+' == *p ? ' ' : *p;
311         }
312
313         *q = '\0';
314         return 1;
315 }
316
317 static void
318 resp_begin_http(int code, const char *msg)
319 {
320
321         if (200 != code)
322                 printf("Status: %d %s\r\n", code, msg);
323
324         printf("Content-Type: text/html; charset=utf-8\r\n"
325              "Cache-Control: no-cache\r\n"
326              "Pragma: no-cache\r\n"
327              "\r\n");
328
329         fflush(stdout);
330 }
331
332 static void
333 resp_copy(const char *filename)
334 {
335         char     buf[4096];
336         ssize_t  sz;
337         int      fd;
338
339         if ((fd = open(filename, O_RDONLY)) != -1) {
340                 fflush(stdout);
341                 while ((sz = read(fd, buf, sizeof(buf))) > 0)
342                         write(STDOUT_FILENO, buf, sz);
343                 close(fd);
344         }
345 }
346
347 static void
348 resp_begin_html(int code, const char *msg)
349 {
350
351         resp_begin_http(code, msg);
352
353         printf("<!DOCTYPE html>\n"
354                "<html>\n"
355                "<head>\n"
356                "  <meta charset=\"UTF-8\"/>\n"
357                "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
358                " type=\"text/css\" media=\"all\">\n"
359                "  <title>%s</title>\n"
360                "</head>\n"
361                "<body>\n",
362                CSS_DIR, CUSTOMIZE_TITLE);
363
364         resp_copy(MAN_DIR "/header.html");
365 }
366
367 static void
368 resp_end_html(void)
369 {
370
371         resp_copy(MAN_DIR "/footer.html");
372
373         puts("</body>\n"
374              "</html>");
375 }
376
377 static void
378 resp_searchform(const struct req *req, enum focus focus)
379 {
380         int              i;
381
382         printf("<form action=\"/%s\" method=\"get\">\n"
383                "  <fieldset>\n"
384                "    <legend>Manual Page Search Parameters</legend>\n",
385                scriptname);
386
387         /* Write query input box. */
388
389         printf("    <input type=\"text\" name=\"query\" value=\"");
390         if (req->q.query != NULL)
391                 html_print(req->q.query);
392         printf( "\" size=\"40\"");
393         if (focus == FOCUS_QUERY)
394                 printf(" autofocus");
395         puts(">");
396
397         /* Write submission buttons. */
398
399         printf( "    <button type=\"submit\" name=\"apropos\" value=\"0\">"
400                 "man</button>\n"
401                 "    <button type=\"submit\" name=\"apropos\" value=\"1\">"
402                 "apropos</button>\n"
403                 "    <br/>\n");
404
405         /* Write section selector. */
406
407         puts("    <select name=\"sec\">");
408         for (i = 0; i < sec_MAX; i++) {
409                 printf("      <option value=\"%s\"", sec_numbers[i]);
410                 if (NULL != req->q.sec &&
411                     0 == strcmp(sec_numbers[i], req->q.sec))
412                         printf(" selected=\"selected\"");
413                 printf(">%s</option>\n", sec_names[i]);
414         }
415         puts("    </select>");
416
417         /* Write architecture selector. */
418
419         printf( "    <select name=\"arch\">\n"
420                 "      <option value=\"default\"");
421         if (NULL == req->q.arch)
422                 printf(" selected=\"selected\"");
423         puts(">All Architectures</option>");
424         for (i = 0; i < arch_MAX; i++) {
425                 printf("      <option value=\"%s\"", arch_names[i]);
426                 if (NULL != req->q.arch &&
427                     0 == strcmp(arch_names[i], req->q.arch))
428                         printf(" selected=\"selected\"");
429                 printf(">%s</option>\n", arch_names[i]);
430         }
431         puts("    </select>");
432
433         /* Write manpath selector. */
434
435         if (req->psz > 1) {
436                 puts("    <select name=\"manpath\">");
437                 for (i = 0; i < (int)req->psz; i++) {
438                         printf("      <option ");
439                         if (strcmp(req->q.manpath, req->p[i]) == 0)
440                                 printf("selected=\"selected\" ");
441                         printf("value=\"");
442                         html_print(req->p[i]);
443                         printf("\">");
444                         html_print(req->p[i]);
445                         puts("</option>");
446                 }
447                 puts("    </select>");
448         }
449
450         puts("  </fieldset>\n"
451              "</form>");
452 }
453
454 static int
455 validate_urifrag(const char *frag)
456 {
457
458         while ('\0' != *frag) {
459                 if ( ! (isalnum((unsigned char)*frag) ||
460                     '-' == *frag || '.' == *frag ||
461                     '/' == *frag || '_' == *frag))
462                         return 0;
463                 frag++;
464         }
465         return 1;
466 }
467
468 static int
469 validate_manpath(const struct req *req, const char* manpath)
470 {
471         size_t   i;
472
473         for (i = 0; i < req->psz; i++)
474                 if ( ! strcmp(manpath, req->p[i]))
475                         return 1;
476
477         return 0;
478 }
479
480 static int
481 validate_filename(const char *file)
482 {
483
484         if ('.' == file[0] && '/' == file[1])
485                 file += 2;
486
487         return ! (strstr(file, "../") || strstr(file, "/..") ||
488             (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
489 }
490
491 static void
492 pg_index(const struct req *req)
493 {
494
495         resp_begin_html(200, NULL);
496         resp_searchform(req, FOCUS_QUERY);
497         printf("<p>\n"
498                "This web interface is documented in the\n"
499                "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
500                "manual, and the\n"
501                "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
502                "manual explains the query syntax.\n"
503                "</p>\n",
504                scriptname, *scriptname == '\0' ? "" : "/",
505                scriptname, *scriptname == '\0' ? "" : "/");
506         resp_end_html();
507 }
508
509 static void
510 pg_noresult(const struct req *req, const char *msg)
511 {
512         resp_begin_html(200, NULL);
513         resp_searchform(req, FOCUS_QUERY);
514         puts("<p>");
515         puts(msg);
516         puts("</p>");
517         resp_end_html();
518 }
519
520 static void
521 pg_error_badrequest(const char *msg)
522 {
523
524         resp_begin_html(400, "Bad Request");
525         puts("<h1>Bad Request</h1>\n"
526              "<p>\n");
527         puts(msg);
528         printf("Try again from the\n"
529                "<a href=\"/%s\">main page</a>.\n"
530                "</p>", scriptname);
531         resp_end_html();
532 }
533
534 static void
535 pg_error_internal(void)
536 {
537         resp_begin_html(500, "Internal Server Error");
538         puts("<p>Internal Server Error</p>");
539         resp_end_html();
540 }
541
542 static void
543 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
544 {
545         char            *arch, *archend;
546         const char      *sec;
547         size_t           i, iuse;
548         int              archprio, archpriouse;
549         int              prio, priouse;
550
551         for (i = 0; i < sz; i++) {
552                 if (validate_filename(r[i].file))
553                         continue;
554                 warnx("invalid filename %s in %s database",
555                     r[i].file, req->q.manpath);
556                 pg_error_internal();
557                 return;
558         }
559
560         if (req->isquery && sz == 1) {
561                 /*
562                  * If we have just one result, then jump there now
563                  * without any delay.
564                  */
565                 printf("Status: 303 See Other\r\n");
566                 printf("Location: http://%s/%s%s%s/%s",
567                     HTTP_HOST, scriptname,
568                     *scriptname == '\0' ? "" : "/",
569                     req->q.manpath, r[0].file);
570                 printf("\r\n"
571                      "Content-Type: text/html; charset=utf-8\r\n"
572                      "\r\n");
573                 return;
574         }
575
576         resp_begin_html(200, NULL);
577         resp_searchform(req,
578             req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
579
580         if (sz > 1) {
581                 puts("<table class=\"results\">");
582                 for (i = 0; i < sz; i++) {
583                         printf("  <tr>\n"
584                                "    <td>"
585                                "<a class=\"Xr\" href=\"/%s%s%s/%s\">",
586                             scriptname, *scriptname == '\0' ? "" : "/",
587                             req->q.manpath, r[i].file);
588                         html_print(r[i].names);
589                         printf("</a></td>\n"
590                                "    <td><span class=\"Nd\">");
591                         html_print(r[i].output);
592                         puts("</span></td>\n"
593                              "  </tr>");
594                 }
595                 puts("</table>");
596         }
597
598         /*
599          * In man(1) mode, show one of the pages
600          * even if more than one is found.
601          */
602
603         if (req->q.equal || sz == 1) {
604                 puts("<hr>");
605                 iuse = 0;
606                 priouse = 20;
607                 archpriouse = 3;
608                 for (i = 0; i < sz; i++) {
609                         sec = r[i].file;
610                         sec += strcspn(sec, "123456789");
611                         if (sec[0] == '\0')
612                                 continue;
613                         prio = sec_prios[sec[0] - '1'];
614                         if (sec[1] != '/')
615                                 prio += 10;
616                         if (req->q.arch == NULL) {
617                                 archprio =
618                                     ((arch = strchr(sec + 1, '/'))
619                                         == NULL) ? 3 :
620                                     ((archend = strchr(arch + 1, '/'))
621                                         == NULL) ? 0 :
622                                     strncmp(arch, "amd64/",
623                                         archend - arch) ? 2 : 1;
624                                 if (archprio < archpriouse) {
625                                         archpriouse = archprio;
626                                         priouse = prio;
627                                         iuse = i;
628                                         continue;
629                                 }
630                                 if (archprio > archpriouse)
631                                         continue;
632                         }
633                         if (prio >= priouse)
634                                 continue;
635                         priouse = prio;
636                         iuse = i;
637                 }
638                 resp_show(req, r[iuse].file);
639         }
640
641         resp_end_html();
642 }
643
644 static void
645 resp_catman(const struct req *req, const char *file)
646 {
647         FILE            *f;
648         char            *p;
649         size_t           sz;
650         ssize_t          len;
651         int              i;
652         int              italic, bold;
653
654         if ((f = fopen(file, "r")) == NULL) {
655                 puts("<p>You specified an invalid manual file.</p>");
656                 return;
657         }
658
659         puts("<div class=\"catman\">\n"
660              "<pre>");
661
662         p = NULL;
663         sz = 0;
664
665         while ((len = getline(&p, &sz, f)) != -1) {
666                 bold = italic = 0;
667                 for (i = 0; i < len - 1; i++) {
668                         /*
669                          * This means that the catpage is out of state.
670                          * Ignore it and keep going (although the
671                          * catpage is bogus).
672                          */
673
674                         if ('\b' == p[i] || '\n' == p[i])
675                                 continue;
676
677                         /*
678                          * Print a regular character.
679                          * Close out any bold/italic scopes.
680                          * If we're in back-space mode, make sure we'll
681                          * have something to enter when we backspace.
682                          */
683
684                         if ('\b' != p[i + 1]) {
685                                 if (italic)
686                                         printf("</i>");
687                                 if (bold)
688                                         printf("</b>");
689                                 italic = bold = 0;
690                                 html_putchar(p[i]);
691                                 continue;
692                         } else if (i + 2 >= len)
693                                 continue;
694
695                         /* Italic mode. */
696
697                         if ('_' == p[i]) {
698                                 if (bold)
699                                         printf("</b>");
700                                 if ( ! italic)
701                                         printf("<i>");
702                                 bold = 0;
703                                 italic = 1;
704                                 i += 2;
705                                 html_putchar(p[i]);
706                                 continue;
707                         }
708
709                         /*
710                          * Handle funny behaviour troff-isms.
711                          * These grok'd from the original man2html.c.
712                          */
713
714                         if (('+' == p[i] && 'o' == p[i + 2]) ||
715                                         ('o' == p[i] && '+' == p[i + 2]) ||
716                                         ('|' == p[i] && '=' == p[i + 2]) ||
717                                         ('=' == p[i] && '|' == p[i + 2]) ||
718                                         ('*' == p[i] && '=' == p[i + 2]) ||
719                                         ('=' == p[i] && '*' == p[i + 2]) ||
720                                         ('*' == p[i] && '|' == p[i + 2]) ||
721                                         ('|' == p[i] && '*' == p[i + 2]))  {
722                                 if (italic)
723                                         printf("</i>");
724                                 if (bold)
725                                         printf("</b>");
726                                 italic = bold = 0;
727                                 putchar('*');
728                                 i += 2;
729                                 continue;
730                         } else if (('|' == p[i] && '-' == p[i + 2]) ||
731                                         ('-' == p[i] && '|' == p[i + 1]) ||
732                                         ('+' == p[i] && '-' == p[i + 1]) ||
733                                         ('-' == p[i] && '+' == p[i + 1]) ||
734                                         ('+' == p[i] && '|' == p[i + 1]) ||
735                                         ('|' == p[i] && '+' == p[i + 1]))  {
736                                 if (italic)
737                                         printf("</i>");
738                                 if (bold)
739                                         printf("</b>");
740                                 italic = bold = 0;
741                                 putchar('+');
742                                 i += 2;
743                                 continue;
744                         }
745
746                         /* Bold mode. */
747
748                         if (italic)
749                                 printf("</i>");
750                         if ( ! bold)
751                                 printf("<b>");
752                         bold = 1;
753                         italic = 0;
754                         i += 2;
755                         html_putchar(p[i]);
756                 }
757
758                 /*
759                  * Clean up the last character.
760                  * We can get to a newline; don't print that.
761                  */
762
763                 if (italic)
764                         printf("</i>");
765                 if (bold)
766                         printf("</b>");
767
768                 if (i == len - 1 && p[i] != '\n')
769                         html_putchar(p[i]);
770
771                 putchar('\n');
772         }
773         free(p);
774
775         puts("</pre>\n"
776              "</div>");
777
778         fclose(f);
779 }
780
781 static void
782 resp_format(const struct req *req, const char *file)
783 {
784         struct manoutput conf;
785         struct mparse   *mp;
786         struct roff_man *man;
787         void            *vp;
788         int              fd;
789         int              usepath;
790
791         if (-1 == (fd = open(file, O_RDONLY, 0))) {
792                 puts("<p>You specified an invalid manual file.</p>");
793                 return;
794         }
795
796         mchars_alloc();
797         mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
798             MANDOCLEVEL_BADARG, NULL, req->q.manpath);
799         mparse_readfd(mp, fd, file);
800         close(fd);
801
802         memset(&conf, 0, sizeof(conf));
803         conf.fragment = 1;
804         conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
805         usepath = strcmp(req->q.manpath, req->p[0]);
806         mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
807             usepath ? req->q.manpath : "", usepath ? "/" : "");
808
809         mparse_result(mp, &man, NULL);
810         if (man == NULL) {
811                 warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
812                 pg_error_internal();
813                 mparse_free(mp);
814                 mchars_free();
815                 return;
816         }
817
818         vp = html_alloc(&conf);
819
820         if (man->macroset == MACROSET_MDOC) {
821                 mdoc_validate(man);
822                 html_mdoc(vp, man);
823         } else {
824                 man_validate(man);
825                 html_man(vp, man);
826         }
827
828         html_free(vp);
829         mparse_free(mp);
830         mchars_free();
831         free(conf.man);
832         free(conf.style);
833 }
834
835 static void
836 resp_show(const struct req *req, const char *file)
837 {
838
839         if ('.' == file[0] && '/' == file[1])
840                 file += 2;
841
842         if ('c' == *file)
843                 resp_catman(req, file);
844         else
845                 resp_format(req, file);
846 }
847
848 static void
849 pg_show(struct req *req, const char *fullpath)
850 {
851         char            *manpath;
852         const char      *file;
853
854         if ((file = strchr(fullpath, '/')) == NULL) {
855                 pg_error_badrequest(
856                     "You did not specify a page to show.");
857                 return;
858         }
859         manpath = mandoc_strndup(fullpath, file - fullpath);
860         file++;
861
862         if ( ! validate_manpath(req, manpath)) {
863                 pg_error_badrequest(
864                     "You specified an invalid manpath.");
865                 free(manpath);
866                 return;
867         }
868
869         /*
870          * Begin by chdir()ing into the manpath.
871          * This way we can pick up the database files, which are
872          * relative to the manpath root.
873          */
874
875         if (chdir(manpath) == -1) {
876                 warn("chdir %s", manpath);
877                 pg_error_internal();
878                 free(manpath);
879                 return;
880         }
881         free(manpath);
882
883         if ( ! validate_filename(file)) {
884                 pg_error_badrequest(
885                     "You specified an invalid manual file.");
886                 return;
887         }
888
889         resp_begin_html(200, NULL);
890         resp_searchform(req, FOCUS_NONE);
891         resp_show(req, file);
892         resp_end_html();
893 }
894
895 static void
896 pg_search(const struct req *req)
897 {
898         struct mansearch          search;
899         struct manpaths           paths;
900         struct manpage           *res;
901         char                    **argv;
902         char                     *query, *rp, *wp;
903         size_t                    ressz;
904         int                       argc;
905
906         /*
907          * Begin by chdir()ing into the root of the manpath.
908          * This way we can pick up the database files, which are
909          * relative to the manpath root.
910          */
911
912         if (chdir(req->q.manpath) == -1) {
913                 warn("chdir %s", req->q.manpath);
914                 pg_error_internal();
915                 return;
916         }
917
918         search.arch = req->q.arch;
919         search.sec = req->q.sec;
920         search.outkey = "Nd";
921         search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
922         search.firstmatch = 1;
923
924         paths.sz = 1;
925         paths.paths = mandoc_malloc(sizeof(char *));
926         paths.paths[0] = mandoc_strdup(".");
927
928         /*
929          * Break apart at spaces with backslash-escaping.
930          */
931
932         argc = 0;
933         argv = NULL;
934         rp = query = mandoc_strdup(req->q.query);
935         for (;;) {
936                 while (isspace((unsigned char)*rp))
937                         rp++;
938                 if (*rp == '\0')
939                         break;
940                 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
941                 argv[argc++] = wp = rp;
942                 for (;;) {
943                         if (isspace((unsigned char)*rp)) {
944                                 *wp = '\0';
945                                 rp++;
946                                 break;
947                         }
948                         if (rp[0] == '\\' && rp[1] != '\0')
949                                 rp++;
950                         if (wp != rp)
951                                 *wp = *rp;
952                         if (*rp == '\0')
953                                 break;
954                         wp++;
955                         rp++;
956                 }
957         }
958
959         if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
960                 pg_noresult(req, "You entered an invalid query.");
961         else if (0 == ressz)
962                 pg_noresult(req, "No results found.");
963         else
964                 pg_searchres(req, res, ressz);
965
966         free(query);
967         mansearch_free(res, ressz);
968         free(paths.paths[0]);
969         free(paths.paths);
970 }
971
972 int
973 main(void)
974 {
975         struct req       req;
976         struct itimerval itimer;
977         const char      *path;
978         const char      *querystring;
979         int              i;
980
981         /* Poor man's ReDoS mitigation. */
982
983         itimer.it_value.tv_sec = 2;
984         itimer.it_value.tv_usec = 0;
985         itimer.it_interval.tv_sec = 2;
986         itimer.it_interval.tv_usec = 0;
987         if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
988                 warn("setitimer");
989                 pg_error_internal();
990                 return EXIT_FAILURE;
991         }
992
993         /*
994          * First we change directory into the MAN_DIR so that
995          * subsequent scanning for manpath directories is rooted
996          * relative to the same position.
997          */
998
999         if (chdir(MAN_DIR) == -1) {
1000                 warn("MAN_DIR: %s", MAN_DIR);
1001                 pg_error_internal();
1002                 return EXIT_FAILURE;
1003         }
1004
1005         memset(&req, 0, sizeof(struct req));
1006         req.q.equal = 1;
1007         parse_manpath_conf(&req);
1008
1009         /* Parse the path info and the query string. */
1010
1011         if ((path = getenv("PATH_INFO")) == NULL)
1012                 path = "";
1013         else if (*path == '/')
1014                 path++;
1015
1016         if (*path != '\0') {
1017                 parse_path_info(&req, path);
1018                 if (req.q.manpath == NULL || access(path, F_OK) == -1)
1019                         path = "";
1020         } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1021                 parse_query_string(&req, querystring);
1022
1023         /* Validate parsed data and add defaults. */
1024
1025         if (req.q.manpath == NULL)
1026                 req.q.manpath = mandoc_strdup(req.p[0]);
1027         else if ( ! validate_manpath(&req, req.q.manpath)) {
1028                 pg_error_badrequest(
1029                     "You specified an invalid manpath.");
1030                 return EXIT_FAILURE;
1031         }
1032
1033         if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1034                 pg_error_badrequest(
1035                     "You specified an invalid architecture.");
1036                 return EXIT_FAILURE;
1037         }
1038
1039         /* Dispatch to the three different pages. */
1040
1041         if ('\0' != *path)
1042                 pg_show(&req, path);
1043         else if (NULL != req.q.query)
1044                 pg_search(&req);
1045         else
1046                 pg_index(&req);
1047
1048         free(req.q.manpath);
1049         free(req.q.arch);
1050         free(req.q.sec);
1051         free(req.q.query);
1052         for (i = 0; i < (int)req.psz; i++)
1053                 free(req.p[i]);
1054         free(req.p);
1055         return EXIT_SUCCESS;
1056 }
1057
1058 /*
1059  * If PATH_INFO is not a file name, translate it to a query.
1060  */
1061 static void
1062 parse_path_info(struct req *req, const char *path)
1063 {
1064         char    *dir[4];
1065         int      i;
1066
1067         req->isquery = 0;
1068         req->q.equal = 1;
1069         req->q.manpath = mandoc_strdup(path);
1070         req->q.arch = NULL;
1071
1072         /* Mandatory manual page name. */
1073         if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1074                 req->q.query = req->q.manpath;
1075                 req->q.manpath = NULL;
1076         } else
1077                 *req->q.query++ = '\0';
1078
1079         /* Optional trailing section. */
1080         if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1081                 if(isdigit((unsigned char)req->q.sec[1])) {
1082                         *req->q.sec++ = '\0';
1083                         req->q.sec = mandoc_strdup(req->q.sec);
1084                 } else
1085                         req->q.sec = NULL;
1086         }
1087
1088         /* Handle the case of name[.section] only. */
1089         if (req->q.manpath == NULL)
1090                 return;
1091         req->q.query = mandoc_strdup(req->q.query);
1092
1093         /* Split directory components. */
1094         dir[i = 0] = req->q.manpath;
1095         while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1096                 if (++i == 3) {
1097                         pg_error_badrequest(
1098                             "You specified too many directory components.");
1099                         exit(EXIT_FAILURE);
1100                 }
1101                 *dir[i]++ = '\0';
1102         }
1103
1104         /* Optional manpath. */
1105         if ((i = validate_manpath(req, req->q.manpath)) == 0)
1106                 req->q.manpath = NULL;
1107         else if (dir[1] == NULL)
1108                 return;
1109
1110         /* Optional section. */
1111         if (strncmp(dir[i], "man", 3) == 0) {
1112                 free(req->q.sec);
1113                 req->q.sec = mandoc_strdup(dir[i++] + 3);
1114         }
1115         if (dir[i] == NULL) {
1116                 if (req->q.manpath == NULL)
1117                         free(dir[0]);
1118                 return;
1119         }
1120         if (dir[i + 1] != NULL) {
1121                 pg_error_badrequest(
1122                     "You specified an invalid directory component.");
1123                 exit(EXIT_FAILURE);
1124         }
1125
1126         /* Optional architecture. */
1127         if (i) {
1128                 req->q.arch = mandoc_strdup(dir[i]);
1129                 if (req->q.manpath == NULL)
1130                         free(dir[0]);
1131         } else
1132                 req->q.arch = dir[0];
1133 }
1134
1135 /*
1136  * Scan for indexable paths.
1137  */
1138 static void
1139 parse_manpath_conf(struct req *req)
1140 {
1141         FILE    *fp;
1142         char    *dp;
1143         size_t   dpsz;
1144         ssize_t  len;
1145
1146         if ((fp = fopen("manpath.conf", "r")) == NULL) {
1147                 warn("%s/manpath.conf", MAN_DIR);
1148                 pg_error_internal();
1149                 exit(EXIT_FAILURE);
1150         }
1151
1152         dp = NULL;
1153         dpsz = 0;
1154
1155         while ((len = getline(&dp, &dpsz, fp)) != -1) {
1156                 if (dp[len - 1] == '\n')
1157                         dp[--len] = '\0';
1158                 req->p = mandoc_realloc(req->p,
1159                     (req->psz + 1) * sizeof(char *));
1160                 if ( ! validate_urifrag(dp)) {
1161                         warnx("%s/manpath.conf contains "
1162                             "unsafe path \"%s\"", MAN_DIR, dp);
1163                         pg_error_internal();
1164                         exit(EXIT_FAILURE);
1165                 }
1166                 if (strchr(dp, '/') != NULL) {
1167                         warnx("%s/manpath.conf contains "
1168                             "path with slash \"%s\"", MAN_DIR, dp);
1169                         pg_error_internal();
1170                         exit(EXIT_FAILURE);
1171                 }
1172                 req->p[req->psz++] = dp;
1173                 dp = NULL;
1174                 dpsz = 0;
1175         }
1176         free(dp);
1177
1178         if (req->p == NULL) {
1179                 warnx("%s/manpath.conf is empty", MAN_DIR);
1180                 pg_error_internal();
1181                 exit(EXIT_FAILURE);
1182         }
1183 }