]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/mdocml/cgi.c
Fix ipfw invalid mbuf handling.
[FreeBSD/FreeBSD.git] / contrib / mdocml / cgi.c
1 /*      $Id: cgi.c,v 1.156 2017/06/24 14:38:32 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "main.h"
42 #include "manconf.h"
43 #include "mansearch.h"
44 #include "cgi.h"
45
46 /*
47  * A query as passed to the search function.
48  */
49 struct  query {
50         char            *manpath; /* desired manual directory */
51         char            *arch; /* architecture */
52         char            *sec; /* manual section */
53         char            *query; /* unparsed query expression */
54         int              equal; /* match whole names, not substrings */
55 };
56
57 struct  req {
58         struct query      q;
59         char            **p; /* array of available manpaths */
60         size_t            psz; /* number of available manpaths */
61         int               isquery; /* QUERY_STRING used, not PATH_INFO */
62 };
63
64 enum    focus {
65         FOCUS_NONE = 0,
66         FOCUS_QUERY
67 };
68
69 static  void             html_print(const char *);
70 static  void             html_putchar(char);
71 static  int              http_decode(char *);
72 static  void             parse_manpath_conf(struct req *);
73 static  void             parse_path_info(struct req *req, const char *path);
74 static  void             parse_query_string(struct req *, const char *);
75 static  void             pg_error_badrequest(const char *);
76 static  void             pg_error_internal(void);
77 static  void             pg_index(const struct req *);
78 static  void             pg_noresult(const struct req *, const char *);
79 static  void             pg_redirect(const struct req *, const char *);
80 static  void             pg_search(const struct req *);
81 static  void             pg_searchres(const struct req *,
82                                 struct manpage *, size_t);
83 static  void             pg_show(struct req *, const char *);
84 static  void             resp_begin_html(int, const char *, const char *);
85 static  void             resp_begin_http(int, const char *);
86 static  void             resp_catman(const struct req *, const char *);
87 static  void             resp_copy(const char *);
88 static  void             resp_end_html(void);
89 static  void             resp_format(const struct req *, const char *);
90 static  void             resp_searchform(const struct req *, enum focus);
91 static  void             resp_show(const struct req *, const char *);
92 static  void             set_query_attr(char **, char **);
93 static  int              validate_filename(const char *);
94 static  int              validate_manpath(const struct req *, const char *);
95 static  int              validate_urifrag(const char *);
96
97 static  const char       *scriptname = SCRIPT_NAME;
98
99 static  const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
100 static  const char *const sec_numbers[] = {
101     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
102 };
103 static  const char *const sec_names[] = {
104     "All Sections",
105     "1 - General Commands",
106     "2 - System Calls",
107     "3 - Library Functions",
108     "3p - Perl Library",
109     "4 - Device Drivers",
110     "5 - File Formats",
111     "6 - Games",
112     "7 - Miscellaneous Information",
113     "8 - System Manager\'s Manual",
114     "9 - Kernel Developer\'s Manual"
115 };
116 static  const int sec_MAX = sizeof(sec_names) / sizeof(char *);
117
118 static  const char *const arch_names[] = {
119     "amd64",       "alpha",       "armv7",      "arm64",
120     "hppa",        "i386",        "landisk",
121     "loongson",    "luna88k",     "macppc",      "mips64",
122     "octeon",      "sgi",         "socppc",      "sparc64",
123     "amiga",       "arc",         "armish",      "arm32",
124     "atari",       "aviion",      "beagle",      "cats",
125     "hppa64",      "hp300",
126     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
127     "mvmeppc",     "palm",        "pc532",       "pegasos",
128     "pmax",        "powerpc",     "solbourne",   "sparc",
129     "sun3",        "vax",         "wgrisc",      "x68k",
130     "zaurus"
131 };
132 static  const int arch_MAX = sizeof(arch_names) / sizeof(char *);
133
134 /*
135  * Print a character, escaping HTML along the way.
136  * This will pass non-ASCII straight to output: be warned!
137  */
138 static void
139 html_putchar(char c)
140 {
141
142         switch (c) {
143         case '"':
144                 printf("&quot;");
145                 break;
146         case '&':
147                 printf("&amp;");
148                 break;
149         case '>':
150                 printf("&gt;");
151                 break;
152         case '<':
153                 printf("&lt;");
154                 break;
155         default:
156                 putchar((unsigned char)c);
157                 break;
158         }
159 }
160
161 /*
162  * Call through to html_putchar().
163  * Accepts NULL strings.
164  */
165 static void
166 html_print(const char *p)
167 {
168
169         if (NULL == p)
170                 return;
171         while ('\0' != *p)
172                 html_putchar(*p++);
173 }
174
175 /*
176  * Transfer the responsibility for the allocated string *val
177  * to the query structure.
178  */
179 static void
180 set_query_attr(char **attr, char **val)
181 {
182
183         free(*attr);
184         if (**val == '\0') {
185                 *attr = NULL;
186                 free(*val);
187         } else
188                 *attr = *val;
189         *val = NULL;
190 }
191
192 /*
193  * Parse the QUERY_STRING for key-value pairs
194  * and store the values into the query structure.
195  */
196 static void
197 parse_query_string(struct req *req, const char *qs)
198 {
199         char            *key, *val;
200         size_t           keysz, valsz;
201
202         req->isquery    = 1;
203         req->q.manpath  = NULL;
204         req->q.arch     = NULL;
205         req->q.sec      = NULL;
206         req->q.query    = NULL;
207         req->q.equal    = 1;
208
209         key = val = NULL;
210         while (*qs != '\0') {
211
212                 /* Parse one key. */
213
214                 keysz = strcspn(qs, "=;&");
215                 key = mandoc_strndup(qs, keysz);
216                 qs += keysz;
217                 if (*qs != '=')
218                         goto next;
219
220                 /* Parse one value. */
221
222                 valsz = strcspn(++qs, ";&");
223                 val = mandoc_strndup(qs, valsz);
224                 qs += valsz;
225
226                 /* Decode and catch encoding errors. */
227
228                 if ( ! (http_decode(key) && http_decode(val)))
229                         goto next;
230
231                 /* Handle key-value pairs. */
232
233                 if ( ! strcmp(key, "query"))
234                         set_query_attr(&req->q.query, &val);
235
236                 else if ( ! strcmp(key, "apropos"))
237                         req->q.equal = !strcmp(val, "0");
238
239                 else if ( ! strcmp(key, "manpath")) {
240 #ifdef COMPAT_OLDURI
241                         if ( ! strncmp(val, "OpenBSD ", 8)) {
242                                 val[7] = '-';
243                                 if ('C' == val[8])
244                                         val[8] = 'c';
245                         }
246 #endif
247                         set_query_attr(&req->q.manpath, &val);
248                 }
249
250                 else if ( ! (strcmp(key, "sec")
251 #ifdef COMPAT_OLDURI
252                     && strcmp(key, "sektion")
253 #endif
254                     )) {
255                         if ( ! strcmp(val, "0"))
256                                 *val = '\0';
257                         set_query_attr(&req->q.sec, &val);
258                 }
259
260                 else if ( ! strcmp(key, "arch")) {
261                         if ( ! strcmp(val, "default"))
262                                 *val = '\0';
263                         set_query_attr(&req->q.arch, &val);
264                 }
265
266                 /*
267                  * The key must be freed in any case.
268                  * The val may have been handed over to the query
269                  * structure, in which case it is now NULL.
270                  */
271 next:
272                 free(key);
273                 key = NULL;
274                 free(val);
275                 val = NULL;
276
277                 if (*qs != '\0')
278                         qs++;
279         }
280 }
281
282 /*
283  * HTTP-decode a string.  The standard explanation is that this turns
284  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
285  * over the allocated string.
286  */
287 static int
288 http_decode(char *p)
289 {
290         char             hex[3];
291         char            *q;
292         int              c;
293
294         hex[2] = '\0';
295
296         q = p;
297         for ( ; '\0' != *p; p++, q++) {
298                 if ('%' == *p) {
299                         if ('\0' == (hex[0] = *(p + 1)))
300                                 return 0;
301                         if ('\0' == (hex[1] = *(p + 2)))
302                                 return 0;
303                         if (1 != sscanf(hex, "%x", &c))
304                                 return 0;
305                         if ('\0' == c)
306                                 return 0;
307
308                         *q = (char)c;
309                         p += 2;
310                 } else
311                         *q = '+' == *p ? ' ' : *p;
312         }
313
314         *q = '\0';
315         return 1;
316 }
317
318 static void
319 resp_begin_http(int code, const char *msg)
320 {
321
322         if (200 != code)
323                 printf("Status: %d %s\r\n", code, msg);
324
325         printf("Content-Type: text/html; charset=utf-8\r\n"
326              "Cache-Control: no-cache\r\n"
327              "Pragma: no-cache\r\n"
328              "\r\n");
329
330         fflush(stdout);
331 }
332
333 static void
334 resp_copy(const char *filename)
335 {
336         char     buf[4096];
337         ssize_t  sz;
338         int      fd;
339
340         if ((fd = open(filename, O_RDONLY)) != -1) {
341                 fflush(stdout);
342                 while ((sz = read(fd, buf, sizeof(buf))) > 0)
343                         write(STDOUT_FILENO, buf, sz);
344                 close(fd);
345         }
346 }
347
348 static void
349 resp_begin_html(int code, const char *msg, const char *file)
350 {
351         char    *cp;
352
353         resp_begin_http(code, msg);
354
355         printf("<!DOCTYPE html>\n"
356                "<html>\n"
357                "<head>\n"
358                "  <meta charset=\"UTF-8\"/>\n"
359                "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
360                " type=\"text/css\" media=\"all\">\n"
361                "  <title>",
362                CSS_DIR);
363         if (file != NULL) {
364                 if ((cp = strrchr(file, '/')) != NULL)
365                         file = cp + 1;
366                 if ((cp = strrchr(file, '.')) != NULL) {
367                         printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
368                 } else
369                         printf("%s - ", file);
370         }
371         printf("%s</title>\n"
372                "</head>\n"
373                "<body>\n",
374                CUSTOMIZE_TITLE);
375
376         resp_copy(MAN_DIR "/header.html");
377 }
378
379 static void
380 resp_end_html(void)
381 {
382
383         resp_copy(MAN_DIR "/footer.html");
384
385         puts("</body>\n"
386              "</html>");
387 }
388
389 static void
390 resp_searchform(const struct req *req, enum focus focus)
391 {
392         int              i;
393
394         printf("<form action=\"/%s\" method=\"get\">\n"
395                "  <fieldset>\n"
396                "    <legend>Manual Page Search Parameters</legend>\n",
397                scriptname);
398
399         /* Write query input box. */
400
401         printf("    <input type=\"text\" name=\"query\" value=\"");
402         if (req->q.query != NULL)
403                 html_print(req->q.query);
404         printf( "\" size=\"40\"");
405         if (focus == FOCUS_QUERY)
406                 printf(" autofocus");
407         puts(">");
408
409         /* Write submission buttons. */
410
411         printf( "    <button type=\"submit\" name=\"apropos\" value=\"0\">"
412                 "man</button>\n"
413                 "    <button type=\"submit\" name=\"apropos\" value=\"1\">"
414                 "apropos</button>\n"
415                 "    <br/>\n");
416
417         /* Write section selector. */
418
419         puts("    <select name=\"sec\">");
420         for (i = 0; i < sec_MAX; i++) {
421                 printf("      <option value=\"%s\"", sec_numbers[i]);
422                 if (NULL != req->q.sec &&
423                     0 == strcmp(sec_numbers[i], req->q.sec))
424                         printf(" selected=\"selected\"");
425                 printf(">%s</option>\n", sec_names[i]);
426         }
427         puts("    </select>");
428
429         /* Write architecture selector. */
430
431         printf( "    <select name=\"arch\">\n"
432                 "      <option value=\"default\"");
433         if (NULL == req->q.arch)
434                 printf(" selected=\"selected\"");
435         puts(">All Architectures</option>");
436         for (i = 0; i < arch_MAX; i++) {
437                 printf("      <option value=\"%s\"", arch_names[i]);
438                 if (NULL != req->q.arch &&
439                     0 == strcmp(arch_names[i], req->q.arch))
440                         printf(" selected=\"selected\"");
441                 printf(">%s</option>\n", arch_names[i]);
442         }
443         puts("    </select>");
444
445         /* Write manpath selector. */
446
447         if (req->psz > 1) {
448                 puts("    <select name=\"manpath\">");
449                 for (i = 0; i < (int)req->psz; i++) {
450                         printf("      <option ");
451                         if (strcmp(req->q.manpath, req->p[i]) == 0)
452                                 printf("selected=\"selected\" ");
453                         printf("value=\"");
454                         html_print(req->p[i]);
455                         printf("\">");
456                         html_print(req->p[i]);
457                         puts("</option>");
458                 }
459                 puts("    </select>");
460         }
461
462         puts("  </fieldset>\n"
463              "</form>");
464 }
465
466 static int
467 validate_urifrag(const char *frag)
468 {
469
470         while ('\0' != *frag) {
471                 if ( ! (isalnum((unsigned char)*frag) ||
472                     '-' == *frag || '.' == *frag ||
473                     '/' == *frag || '_' == *frag))
474                         return 0;
475                 frag++;
476         }
477         return 1;
478 }
479
480 static int
481 validate_manpath(const struct req *req, const char* manpath)
482 {
483         size_t   i;
484
485         for (i = 0; i < req->psz; i++)
486                 if ( ! strcmp(manpath, req->p[i]))
487                         return 1;
488
489         return 0;
490 }
491
492 static int
493 validate_filename(const char *file)
494 {
495
496         if ('.' == file[0] && '/' == file[1])
497                 file += 2;
498
499         return ! (strstr(file, "../") || strstr(file, "/..") ||
500             (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
501 }
502
503 static void
504 pg_index(const struct req *req)
505 {
506
507         resp_begin_html(200, NULL, NULL);
508         resp_searchform(req, FOCUS_QUERY);
509         printf("<p>\n"
510                "This web interface is documented in the\n"
511                "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
512                "manual, and the\n"
513                "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
514                "manual explains the query syntax.\n"
515                "</p>\n",
516                scriptname, *scriptname == '\0' ? "" : "/",
517                scriptname, *scriptname == '\0' ? "" : "/");
518         resp_end_html();
519 }
520
521 static void
522 pg_noresult(const struct req *req, const char *msg)
523 {
524         resp_begin_html(200, NULL, NULL);
525         resp_searchform(req, FOCUS_QUERY);
526         puts("<p>");
527         puts(msg);
528         puts("</p>");
529         resp_end_html();
530 }
531
532 static void
533 pg_error_badrequest(const char *msg)
534 {
535
536         resp_begin_html(400, "Bad Request", NULL);
537         puts("<h1>Bad Request</h1>\n"
538              "<p>\n");
539         puts(msg);
540         printf("Try again from the\n"
541                "<a href=\"/%s\">main page</a>.\n"
542                "</p>", scriptname);
543         resp_end_html();
544 }
545
546 static void
547 pg_error_internal(void)
548 {
549         resp_begin_html(500, "Internal Server Error", NULL);
550         puts("<p>Internal Server Error</p>");
551         resp_end_html();
552 }
553
554 static void
555 pg_redirect(const struct req *req, const char *name)
556 {
557         printf("Status: 303 See Other\r\n"
558             "Location: /");
559         if (*scriptname != '\0')
560                 printf("%s/", scriptname);
561         if (strcmp(req->q.manpath, req->p[0]))
562                 printf("%s/", req->q.manpath);
563         if (req->q.arch != NULL)
564                 printf("%s/", req->q.arch);
565         printf("%s", name);
566         if (req->q.sec != NULL)
567                 printf(".%s", req->q.sec);
568         printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
569 }
570
571 static void
572 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
573 {
574         char            *arch, *archend;
575         const char      *sec;
576         size_t           i, iuse;
577         int              archprio, archpriouse;
578         int              prio, priouse;
579
580         for (i = 0; i < sz; i++) {
581                 if (validate_filename(r[i].file))
582                         continue;
583                 warnx("invalid filename %s in %s database",
584                     r[i].file, req->q.manpath);
585                 pg_error_internal();
586                 return;
587         }
588
589         if (req->isquery && sz == 1) {
590                 /*
591                  * If we have just one result, then jump there now
592                  * without any delay.
593                  */
594                 printf("Status: 303 See Other\r\n"
595                     "Location: /");
596                 if (*scriptname != '\0')
597                         printf("%s/", scriptname);
598                 if (strcmp(req->q.manpath, req->p[0]))
599                         printf("%s/", req->q.manpath);
600                 printf("%s\r\n"
601                     "Content-Type: text/html; charset=utf-8\r\n\r\n",
602                     r[0].file);
603                 return;
604         }
605
606         /*
607          * In man(1) mode, show one of the pages
608          * even if more than one is found.
609          */
610
611         iuse = 0;
612         if (req->q.equal || sz == 1) {
613                 priouse = 20;
614                 archpriouse = 3;
615                 for (i = 0; i < sz; i++) {
616                         sec = r[i].file;
617                         sec += strcspn(sec, "123456789");
618                         if (sec[0] == '\0')
619                                 continue;
620                         prio = sec_prios[sec[0] - '1'];
621                         if (sec[1] != '/')
622                                 prio += 10;
623                         if (req->q.arch == NULL) {
624                                 archprio =
625                                     ((arch = strchr(sec + 1, '/'))
626                                         == NULL) ? 3 :
627                                     ((archend = strchr(arch + 1, '/'))
628                                         == NULL) ? 0 :
629                                     strncmp(arch, "amd64/",
630                                         archend - arch) ? 2 : 1;
631                                 if (archprio < archpriouse) {
632                                         archpriouse = archprio;
633                                         priouse = prio;
634                                         iuse = i;
635                                         continue;
636                                 }
637                                 if (archprio > archpriouse)
638                                         continue;
639                         }
640                         if (prio >= priouse)
641                                 continue;
642                         priouse = prio;
643                         iuse = i;
644                 }
645                 resp_begin_html(200, NULL, r[iuse].file);
646         } else
647                 resp_begin_html(200, NULL, NULL);
648
649         resp_searchform(req,
650             req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
651
652         if (sz > 1) {
653                 puts("<table class=\"results\">");
654                 for (i = 0; i < sz; i++) {
655                         printf("  <tr>\n"
656                                "    <td>"
657                                "<a class=\"Xr\" href=\"/");
658                         if (*scriptname != '\0')
659                                 printf("%s/", scriptname);
660                         if (strcmp(req->q.manpath, req->p[0]))
661                                 printf("%s/", req->q.manpath);
662                         printf("%s\">", r[i].file);
663                         html_print(r[i].names);
664                         printf("</a></td>\n"
665                                "    <td><span class=\"Nd\">");
666                         html_print(r[i].output);
667                         puts("</span></td>\n"
668                              "  </tr>");
669                 }
670                 puts("</table>");
671         }
672
673         if (req->q.equal || sz == 1) {
674                 puts("<hr>");
675                 resp_show(req, r[iuse].file);
676         }
677
678         resp_end_html();
679 }
680
681 static void
682 resp_catman(const struct req *req, const char *file)
683 {
684         FILE            *f;
685         char            *p;
686         size_t           sz;
687         ssize_t          len;
688         int              i;
689         int              italic, bold;
690
691         if ((f = fopen(file, "r")) == NULL) {
692                 puts("<p>You specified an invalid manual file.</p>");
693                 return;
694         }
695
696         puts("<div class=\"catman\">\n"
697              "<pre>");
698
699         p = NULL;
700         sz = 0;
701
702         while ((len = getline(&p, &sz, f)) != -1) {
703                 bold = italic = 0;
704                 for (i = 0; i < len - 1; i++) {
705                         /*
706                          * This means that the catpage is out of state.
707                          * Ignore it and keep going (although the
708                          * catpage is bogus).
709                          */
710
711                         if ('\b' == p[i] || '\n' == p[i])
712                                 continue;
713
714                         /*
715                          * Print a regular character.
716                          * Close out any bold/italic scopes.
717                          * If we're in back-space mode, make sure we'll
718                          * have something to enter when we backspace.
719                          */
720
721                         if ('\b' != p[i + 1]) {
722                                 if (italic)
723                                         printf("</i>");
724                                 if (bold)
725                                         printf("</b>");
726                                 italic = bold = 0;
727                                 html_putchar(p[i]);
728                                 continue;
729                         } else if (i + 2 >= len)
730                                 continue;
731
732                         /* Italic mode. */
733
734                         if ('_' == p[i]) {
735                                 if (bold)
736                                         printf("</b>");
737                                 if ( ! italic)
738                                         printf("<i>");
739                                 bold = 0;
740                                 italic = 1;
741                                 i += 2;
742                                 html_putchar(p[i]);
743                                 continue;
744                         }
745
746                         /*
747                          * Handle funny behaviour troff-isms.
748                          * These grok'd from the original man2html.c.
749                          */
750
751                         if (('+' == p[i] && 'o' == p[i + 2]) ||
752                                         ('o' == p[i] && '+' == p[i + 2]) ||
753                                         ('|' == p[i] && '=' == p[i + 2]) ||
754                                         ('=' == p[i] && '|' == p[i + 2]) ||
755                                         ('*' == p[i] && '=' == p[i + 2]) ||
756                                         ('=' == p[i] && '*' == p[i + 2]) ||
757                                         ('*' == p[i] && '|' == p[i + 2]) ||
758                                         ('|' == p[i] && '*' == p[i + 2]))  {
759                                 if (italic)
760                                         printf("</i>");
761                                 if (bold)
762                                         printf("</b>");
763                                 italic = bold = 0;
764                                 putchar('*');
765                                 i += 2;
766                                 continue;
767                         } else if (('|' == p[i] && '-' == p[i + 2]) ||
768                                         ('-' == p[i] && '|' == p[i + 1]) ||
769                                         ('+' == p[i] && '-' == p[i + 1]) ||
770                                         ('-' == p[i] && '+' == p[i + 1]) ||
771                                         ('+' == p[i] && '|' == p[i + 1]) ||
772                                         ('|' == p[i] && '+' == p[i + 1]))  {
773                                 if (italic)
774                                         printf("</i>");
775                                 if (bold)
776                                         printf("</b>");
777                                 italic = bold = 0;
778                                 putchar('+');
779                                 i += 2;
780                                 continue;
781                         }
782
783                         /* Bold mode. */
784
785                         if (italic)
786                                 printf("</i>");
787                         if ( ! bold)
788                                 printf("<b>");
789                         bold = 1;
790                         italic = 0;
791                         i += 2;
792                         html_putchar(p[i]);
793                 }
794
795                 /*
796                  * Clean up the last character.
797                  * We can get to a newline; don't print that.
798                  */
799
800                 if (italic)
801                         printf("</i>");
802                 if (bold)
803                         printf("</b>");
804
805                 if (i == len - 1 && p[i] != '\n')
806                         html_putchar(p[i]);
807
808                 putchar('\n');
809         }
810         free(p);
811
812         puts("</pre>\n"
813              "</div>");
814
815         fclose(f);
816 }
817
818 static void
819 resp_format(const struct req *req, const char *file)
820 {
821         struct manoutput conf;
822         struct mparse   *mp;
823         struct roff_man *man;
824         void            *vp;
825         int              fd;
826         int              usepath;
827
828         if (-1 == (fd = open(file, O_RDONLY, 0))) {
829                 puts("<p>You specified an invalid manual file.</p>");
830                 return;
831         }
832
833         mchars_alloc();
834         mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
835             MANDOCERR_MAX, NULL, MANDOC_OS_OTHER, req->q.manpath);
836         mparse_readfd(mp, fd, file);
837         close(fd);
838
839         memset(&conf, 0, sizeof(conf));
840         conf.fragment = 1;
841         conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
842         usepath = strcmp(req->q.manpath, req->p[0]);
843         mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
844             scriptname, *scriptname == '\0' ? "" : "/",
845             usepath ? req->q.manpath : "", usepath ? "/" : "");
846
847         mparse_result(mp, &man, NULL);
848         if (man == NULL) {
849                 warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
850                 pg_error_internal();
851                 mparse_free(mp);
852                 mchars_free();
853                 return;
854         }
855
856         vp = html_alloc(&conf);
857
858         if (man->macroset == MACROSET_MDOC) {
859                 mdoc_validate(man);
860                 html_mdoc(vp, man);
861         } else {
862                 man_validate(man);
863                 html_man(vp, man);
864         }
865
866         html_free(vp);
867         mparse_free(mp);
868         mchars_free();
869         free(conf.man);
870         free(conf.style);
871 }
872
873 static void
874 resp_show(const struct req *req, const char *file)
875 {
876
877         if ('.' == file[0] && '/' == file[1])
878                 file += 2;
879
880         if ('c' == *file)
881                 resp_catman(req, file);
882         else
883                 resp_format(req, file);
884 }
885
886 static void
887 pg_show(struct req *req, const char *fullpath)
888 {
889         char            *manpath;
890         const char      *file;
891
892         if ((file = strchr(fullpath, '/')) == NULL) {
893                 pg_error_badrequest(
894                     "You did not specify a page to show.");
895                 return;
896         }
897         manpath = mandoc_strndup(fullpath, file - fullpath);
898         file++;
899
900         if ( ! validate_manpath(req, manpath)) {
901                 pg_error_badrequest(
902                     "You specified an invalid manpath.");
903                 free(manpath);
904                 return;
905         }
906
907         /*
908          * Begin by chdir()ing into the manpath.
909          * This way we can pick up the database files, which are
910          * relative to the manpath root.
911          */
912
913         if (chdir(manpath) == -1) {
914                 warn("chdir %s", manpath);
915                 pg_error_internal();
916                 free(manpath);
917                 return;
918         }
919         free(manpath);
920
921         if ( ! validate_filename(file)) {
922                 pg_error_badrequest(
923                     "You specified an invalid manual file.");
924                 return;
925         }
926
927         resp_begin_html(200, NULL, file);
928         resp_searchform(req, FOCUS_NONE);
929         resp_show(req, file);
930         resp_end_html();
931 }
932
933 static void
934 pg_search(const struct req *req)
935 {
936         struct mansearch          search;
937         struct manpaths           paths;
938         struct manpage           *res;
939         char                    **argv;
940         char                     *query, *rp, *wp;
941         size_t                    ressz;
942         int                       argc;
943
944         /*
945          * Begin by chdir()ing into the root of the manpath.
946          * This way we can pick up the database files, which are
947          * relative to the manpath root.
948          */
949
950         if (chdir(req->q.manpath) == -1) {
951                 warn("chdir %s", req->q.manpath);
952                 pg_error_internal();
953                 return;
954         }
955
956         search.arch = req->q.arch;
957         search.sec = req->q.sec;
958         search.outkey = "Nd";
959         search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
960         search.firstmatch = 1;
961
962         paths.sz = 1;
963         paths.paths = mandoc_malloc(sizeof(char *));
964         paths.paths[0] = mandoc_strdup(".");
965
966         /*
967          * Break apart at spaces with backslash-escaping.
968          */
969
970         argc = 0;
971         argv = NULL;
972         rp = query = mandoc_strdup(req->q.query);
973         for (;;) {
974                 while (isspace((unsigned char)*rp))
975                         rp++;
976                 if (*rp == '\0')
977                         break;
978                 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
979                 argv[argc++] = wp = rp;
980                 for (;;) {
981                         if (isspace((unsigned char)*rp)) {
982                                 *wp = '\0';
983                                 rp++;
984                                 break;
985                         }
986                         if (rp[0] == '\\' && rp[1] != '\0')
987                                 rp++;
988                         if (wp != rp)
989                                 *wp = *rp;
990                         if (*rp == '\0')
991                                 break;
992                         wp++;
993                         rp++;
994                 }
995         }
996
997         res = NULL;
998         ressz = 0;
999         if (req->isquery && req->q.equal && argc == 1)
1000                 pg_redirect(req, argv[0]);
1001         else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1002                 pg_noresult(req, "You entered an invalid query.");
1003         else if (ressz == 0)
1004                 pg_noresult(req, "No results found.");
1005         else
1006                 pg_searchres(req, res, ressz);
1007
1008         free(query);
1009         mansearch_free(res, ressz);
1010         free(paths.paths[0]);
1011         free(paths.paths);
1012 }
1013
1014 int
1015 main(void)
1016 {
1017         struct req       req;
1018         struct itimerval itimer;
1019         const char      *path;
1020         const char      *querystring;
1021         int              i;
1022
1023 #if HAVE_PLEDGE
1024         /*
1025          * The "rpath" pledge could be revoked after mparse_readfd()
1026          * if the file desciptor to "/footer.html" would be opened
1027          * up front, but it's probably not worth the complication
1028          * of the code it would cause: it would require scattering
1029          * pledge() calls in multiple low-level resp_*() functions.
1030          */
1031
1032         if (pledge("stdio rpath", NULL) == -1) {
1033                 warn("pledge");
1034                 pg_error_internal();
1035                 return EXIT_FAILURE;
1036         }
1037 #endif
1038
1039         /* Poor man's ReDoS mitigation. */
1040
1041         itimer.it_value.tv_sec = 2;
1042         itimer.it_value.tv_usec = 0;
1043         itimer.it_interval.tv_sec = 2;
1044         itimer.it_interval.tv_usec = 0;
1045         if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1046                 warn("setitimer");
1047                 pg_error_internal();
1048                 return EXIT_FAILURE;
1049         }
1050
1051         /*
1052          * First we change directory into the MAN_DIR so that
1053          * subsequent scanning for manpath directories is rooted
1054          * relative to the same position.
1055          */
1056
1057         if (chdir(MAN_DIR) == -1) {
1058                 warn("MAN_DIR: %s", MAN_DIR);
1059                 pg_error_internal();
1060                 return EXIT_FAILURE;
1061         }
1062
1063         memset(&req, 0, sizeof(struct req));
1064         req.q.equal = 1;
1065         parse_manpath_conf(&req);
1066
1067         /* Parse the path info and the query string. */
1068
1069         if ((path = getenv("PATH_INFO")) == NULL)
1070                 path = "";
1071         else if (*path == '/')
1072                 path++;
1073
1074         if (*path != '\0') {
1075                 parse_path_info(&req, path);
1076                 if (req.q.manpath == NULL || req.q.sec == NULL ||
1077                     *req.q.query == '\0' || access(path, F_OK) == -1)
1078                         path = "";
1079         } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1080                 parse_query_string(&req, querystring);
1081
1082         /* Validate parsed data and add defaults. */
1083
1084         if (req.q.manpath == NULL)
1085                 req.q.manpath = mandoc_strdup(req.p[0]);
1086         else if ( ! validate_manpath(&req, req.q.manpath)) {
1087                 pg_error_badrequest(
1088                     "You specified an invalid manpath.");
1089                 return EXIT_FAILURE;
1090         }
1091
1092         if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1093                 pg_error_badrequest(
1094                     "You specified an invalid architecture.");
1095                 return EXIT_FAILURE;
1096         }
1097
1098         /* Dispatch to the three different pages. */
1099
1100         if ('\0' != *path)
1101                 pg_show(&req, path);
1102         else if (NULL != req.q.query)
1103                 pg_search(&req);
1104         else
1105                 pg_index(&req);
1106
1107         free(req.q.manpath);
1108         free(req.q.arch);
1109         free(req.q.sec);
1110         free(req.q.query);
1111         for (i = 0; i < (int)req.psz; i++)
1112                 free(req.p[i]);
1113         free(req.p);
1114         return EXIT_SUCCESS;
1115 }
1116
1117 /*
1118  * If PATH_INFO is not a file name, translate it to a query.
1119  */
1120 static void
1121 parse_path_info(struct req *req, const char *path)
1122 {
1123         char    *dir[4];
1124         int      i;
1125
1126         req->isquery = 0;
1127         req->q.equal = 1;
1128         req->q.manpath = mandoc_strdup(path);
1129         req->q.arch = NULL;
1130
1131         /* Mandatory manual page name. */
1132         if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1133                 req->q.query = req->q.manpath;
1134                 req->q.manpath = NULL;
1135         } else
1136                 *req->q.query++ = '\0';
1137
1138         /* Optional trailing section. */
1139         if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1140                 if(isdigit((unsigned char)req->q.sec[1])) {
1141                         *req->q.sec++ = '\0';
1142                         req->q.sec = mandoc_strdup(req->q.sec);
1143                 } else
1144                         req->q.sec = NULL;
1145         }
1146
1147         /* Handle the case of name[.section] only. */
1148         if (req->q.manpath == NULL)
1149                 return;
1150         req->q.query = mandoc_strdup(req->q.query);
1151
1152         /* Split directory components. */
1153         dir[i = 0] = req->q.manpath;
1154         while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1155                 if (++i == 3) {
1156                         pg_error_badrequest(
1157                             "You specified too many directory components.");
1158                         exit(EXIT_FAILURE);
1159                 }
1160                 *dir[i]++ = '\0';
1161         }
1162
1163         /* Optional manpath. */
1164         if ((i = validate_manpath(req, req->q.manpath)) == 0)
1165                 req->q.manpath = NULL;
1166         else if (dir[1] == NULL)
1167                 return;
1168
1169         /* Optional section. */
1170         if (strncmp(dir[i], "man", 3) == 0) {
1171                 free(req->q.sec);
1172                 req->q.sec = mandoc_strdup(dir[i++] + 3);
1173         }
1174         if (dir[i] == NULL) {
1175                 if (req->q.manpath == NULL)
1176                         free(dir[0]);
1177                 return;
1178         }
1179         if (dir[i + 1] != NULL) {
1180                 pg_error_badrequest(
1181                     "You specified an invalid directory component.");
1182                 exit(EXIT_FAILURE);
1183         }
1184
1185         /* Optional architecture. */
1186         if (i) {
1187                 req->q.arch = mandoc_strdup(dir[i]);
1188                 if (req->q.manpath == NULL)
1189                         free(dir[0]);
1190         } else
1191                 req->q.arch = dir[0];
1192 }
1193
1194 /*
1195  * Scan for indexable paths.
1196  */
1197 static void
1198 parse_manpath_conf(struct req *req)
1199 {
1200         FILE    *fp;
1201         char    *dp;
1202         size_t   dpsz;
1203         ssize_t  len;
1204
1205         if ((fp = fopen("manpath.conf", "r")) == NULL) {
1206                 warn("%s/manpath.conf", MAN_DIR);
1207                 pg_error_internal();
1208                 exit(EXIT_FAILURE);
1209         }
1210
1211         dp = NULL;
1212         dpsz = 0;
1213
1214         while ((len = getline(&dp, &dpsz, fp)) != -1) {
1215                 if (dp[len - 1] == '\n')
1216                         dp[--len] = '\0';
1217                 req->p = mandoc_realloc(req->p,
1218                     (req->psz + 1) * sizeof(char *));
1219                 if ( ! validate_urifrag(dp)) {
1220                         warnx("%s/manpath.conf contains "
1221                             "unsafe path \"%s\"", MAN_DIR, dp);
1222                         pg_error_internal();
1223                         exit(EXIT_FAILURE);
1224                 }
1225                 if (strchr(dp, '/') != NULL) {
1226                         warnx("%s/manpath.conf contains "
1227                             "path with slash \"%s\"", MAN_DIR, dp);
1228                         pg_error_internal();
1229                         exit(EXIT_FAILURE);
1230                 }
1231                 req->p[req->psz++] = dp;
1232                 dp = NULL;
1233                 dpsz = 0;
1234         }
1235         free(dp);
1236
1237         if (req->p == NULL) {
1238                 warnx("%s/manpath.conf is empty", MAN_DIR);
1239                 pg_error_internal();
1240                 exit(EXIT_FAILURE);
1241         }
1242 }