]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/mdocml/cgi.c
Update mandoc to 20160116
[FreeBSD/FreeBSD.git] / contrib / mdocml / cgi.c
1 /*      $Id: cgi.c,v 1.116 2016/01/04 12:36:26 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19
20 #include <sys/types.h>
21 #include <sys/time.h>
22
23 #include <ctype.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <limits.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "mandoc_aux.h"
34 #include "mandoc.h"
35 #include "roff.h"
36 #include "mdoc.h"
37 #include "man.h"
38 #include "main.h"
39 #include "manconf.h"
40 #include "mansearch.h"
41 #include "cgi.h"
42
43 /*
44  * A query as passed to the search function.
45  */
46 struct  query {
47         char            *manpath; /* desired manual directory */
48         char            *arch; /* architecture */
49         char            *sec; /* manual section */
50         char            *query; /* unparsed query expression */
51         int              equal; /* match whole names, not substrings */
52 };
53
54 struct  req {
55         struct query      q;
56         char            **p; /* array of available manpaths */
57         size_t            psz; /* number of available manpaths */
58 };
59
60 static  void             catman(const struct req *, const char *);
61 static  void             format(const struct req *, const char *);
62 static  void             html_print(const char *);
63 static  void             html_putchar(char);
64 static  int              http_decode(char *);
65 static  void             http_parse(struct req *, const char *);
66 static  void             pathgen(struct req *);
67 static  void             pg_error_badrequest(const char *);
68 static  void             pg_error_internal(void);
69 static  void             pg_index(const struct req *);
70 static  void             pg_noresult(const struct req *, const char *);
71 static  void             pg_search(const struct req *);
72 static  void             pg_searchres(const struct req *,
73                                 struct manpage *, size_t);
74 static  void             pg_show(struct req *, const char *);
75 static  void             resp_begin_html(int, const char *);
76 static  void             resp_begin_http(int, const char *);
77 static  void             resp_copy(const char *);
78 static  void             resp_end_html(void);
79 static  void             resp_searchform(const struct req *);
80 static  void             resp_show(const struct req *, const char *);
81 static  void             set_query_attr(char **, char **);
82 static  int              validate_filename(const char *);
83 static  int              validate_manpath(const struct req *, const char *);
84 static  int              validate_urifrag(const char *);
85
86 static  const char       *scriptname; /* CGI script name */
87
88 static  const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
89 static  const char *const sec_numbers[] = {
90     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
91 };
92 static  const char *const sec_names[] = {
93     "All Sections",
94     "1 - General Commands",
95     "2 - System Calls",
96     "3 - Library Functions",
97     "3p - Perl Library",
98     "4 - Device Drivers",
99     "5 - File Formats",
100     "6 - Games",
101     "7 - Miscellaneous Information",
102     "8 - System Manager\'s Manual",
103     "9 - Kernel Developer\'s Manual"
104 };
105 static  const int sec_MAX = sizeof(sec_names) / sizeof(char *);
106
107 static  const char *const arch_names[] = {
108     "amd64",       "alpha",       "armish",      "armv7",
109     "aviion",      "hppa",        "hppa64",      "i386",
110     "ia64",        "landisk",     "loongson",    "luna88k",
111     "macppc",      "mips64",      "octeon",      "sgi",
112     "socppc",      "solbourne",   "sparc",       "sparc64",
113     "vax",         "zaurus",
114     "amiga",       "arc",         "arm32",       "atari",
115     "beagle",      "cats",        "hp300",       "mac68k",
116     "mvme68k",     "mvme88k",     "mvmeppc",     "palm",
117     "pc532",       "pegasos",     "pmax",        "powerpc",
118     "sun3",        "wgrisc",      "x68k"
119 };
120 static  const int arch_MAX = sizeof(arch_names) / sizeof(char *);
121
122 /*
123  * Print a character, escaping HTML along the way.
124  * This will pass non-ASCII straight to output: be warned!
125  */
126 static void
127 html_putchar(char c)
128 {
129
130         switch (c) {
131         case ('"'):
132                 printf("&quote;");
133                 break;
134         case ('&'):
135                 printf("&amp;");
136                 break;
137         case ('>'):
138                 printf("&gt;");
139                 break;
140         case ('<'):
141                 printf("&lt;");
142                 break;
143         default:
144                 putchar((unsigned char)c);
145                 break;
146         }
147 }
148
149 /*
150  * Call through to html_putchar().
151  * Accepts NULL strings.
152  */
153 static void
154 html_print(const char *p)
155 {
156
157         if (NULL == p)
158                 return;
159         while ('\0' != *p)
160                 html_putchar(*p++);
161 }
162
163 /*
164  * Transfer the responsibility for the allocated string *val
165  * to the query structure.
166  */
167 static void
168 set_query_attr(char **attr, char **val)
169 {
170
171         free(*attr);
172         if (**val == '\0') {
173                 *attr = NULL;
174                 free(*val);
175         } else
176                 *attr = *val;
177         *val = NULL;
178 }
179
180 /*
181  * Parse the QUERY_STRING for key-value pairs
182  * and store the values into the query structure.
183  */
184 static void
185 http_parse(struct req *req, const char *qs)
186 {
187         char            *key, *val;
188         size_t           keysz, valsz;
189
190         req->q.manpath  = NULL;
191         req->q.arch     = NULL;
192         req->q.sec      = NULL;
193         req->q.query    = NULL;
194         req->q.equal    = 1;
195
196         key = val = NULL;
197         while (*qs != '\0') {
198
199                 /* Parse one key. */
200
201                 keysz = strcspn(qs, "=;&");
202                 key = mandoc_strndup(qs, keysz);
203                 qs += keysz;
204                 if (*qs != '=')
205                         goto next;
206
207                 /* Parse one value. */
208
209                 valsz = strcspn(++qs, ";&");
210                 val = mandoc_strndup(qs, valsz);
211                 qs += valsz;
212
213                 /* Decode and catch encoding errors. */
214
215                 if ( ! (http_decode(key) && http_decode(val)))
216                         goto next;
217
218                 /* Handle key-value pairs. */
219
220                 if ( ! strcmp(key, "query"))
221                         set_query_attr(&req->q.query, &val);
222
223                 else if ( ! strcmp(key, "apropos"))
224                         req->q.equal = !strcmp(val, "0");
225
226                 else if ( ! strcmp(key, "manpath")) {
227 #ifdef COMPAT_OLDURI
228                         if ( ! strncmp(val, "OpenBSD ", 8)) {
229                                 val[7] = '-';
230                                 if ('C' == val[8])
231                                         val[8] = 'c';
232                         }
233 #endif
234                         set_query_attr(&req->q.manpath, &val);
235                 }
236
237                 else if ( ! (strcmp(key, "sec")
238 #ifdef COMPAT_OLDURI
239                     && strcmp(key, "sektion")
240 #endif
241                     )) {
242                         if ( ! strcmp(val, "0"))
243                                 *val = '\0';
244                         set_query_attr(&req->q.sec, &val);
245                 }
246
247                 else if ( ! strcmp(key, "arch")) {
248                         if ( ! strcmp(val, "default"))
249                                 *val = '\0';
250                         set_query_attr(&req->q.arch, &val);
251                 }
252
253                 /*
254                  * The key must be freed in any case.
255                  * The val may have been handed over to the query
256                  * structure, in which case it is now NULL.
257                  */
258 next:
259                 free(key);
260                 key = NULL;
261                 free(val);
262                 val = NULL;
263
264                 if (*qs != '\0')
265                         qs++;
266         }
267 }
268
269 /*
270  * HTTP-decode a string.  The standard explanation is that this turns
271  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
272  * over the allocated string.
273  */
274 static int
275 http_decode(char *p)
276 {
277         char             hex[3];
278         char            *q;
279         int              c;
280
281         hex[2] = '\0';
282
283         q = p;
284         for ( ; '\0' != *p; p++, q++) {
285                 if ('%' == *p) {
286                         if ('\0' == (hex[0] = *(p + 1)))
287                                 return 0;
288                         if ('\0' == (hex[1] = *(p + 2)))
289                                 return 0;
290                         if (1 != sscanf(hex, "%x", &c))
291                                 return 0;
292                         if ('\0' == c)
293                                 return 0;
294
295                         *q = (char)c;
296                         p += 2;
297                 } else
298                         *q = '+' == *p ? ' ' : *p;
299         }
300
301         *q = '\0';
302         return 1;
303 }
304
305 static void
306 resp_begin_http(int code, const char *msg)
307 {
308
309         if (200 != code)
310                 printf("Status: %d %s\r\n", code, msg);
311
312         printf("Content-Type: text/html; charset=utf-8\r\n"
313              "Cache-Control: no-cache\r\n"
314              "Pragma: no-cache\r\n"
315              "\r\n");
316
317         fflush(stdout);
318 }
319
320 static void
321 resp_copy(const char *filename)
322 {
323         char     buf[4096];
324         ssize_t  sz;
325         int      fd;
326
327         if ((fd = open(filename, O_RDONLY)) != -1) {
328                 fflush(stdout);
329                 while ((sz = read(fd, buf, sizeof(buf))) > 0)
330                         write(STDOUT_FILENO, buf, sz);
331         }
332 }
333
334 static void
335 resp_begin_html(int code, const char *msg)
336 {
337
338         resp_begin_http(code, msg);
339
340         printf("<!DOCTYPE html>\n"
341                "<HTML>\n"
342                "<HEAD>\n"
343                "<META CHARSET=\"UTF-8\" />\n"
344                "<LINK REL=\"stylesheet\" HREF=\"%s/mandoc.css\""
345                " TYPE=\"text/css\" media=\"all\">\n"
346                "<TITLE>%s</TITLE>\n"
347                "</HEAD>\n"
348                "<BODY>\n"
349                "<!-- Begin page content. //-->\n",
350                CSS_DIR, CUSTOMIZE_TITLE);
351
352         resp_copy(MAN_DIR "/header.html");
353 }
354
355 static void
356 resp_end_html(void)
357 {
358
359         resp_copy(MAN_DIR "/footer.html");
360
361         puts("</BODY>\n"
362              "</HTML>");
363 }
364
365 static void
366 resp_searchform(const struct req *req)
367 {
368         int              i;
369
370         puts("<!-- Begin search form. //-->");
371         printf("<DIV ID=\"mancgi\">\n"
372                "<FORM ACTION=\"%s\" METHOD=\"get\">\n"
373                "<FIELDSET>\n"
374                "<LEGEND>Manual Page Search Parameters</LEGEND>\n",
375                scriptname);
376
377         /* Write query input box. */
378
379         printf( "<TABLE><TR><TD>\n"
380                 "<INPUT TYPE=\"text\" NAME=\"query\" VALUE=\"");
381         if (NULL != req->q.query)
382                 html_print(req->q.query);
383         puts("\" SIZE=\"40\">");
384
385         /* Write submission and reset buttons. */
386
387         printf( "<INPUT TYPE=\"submit\" VALUE=\"Submit\">\n"
388                 "<INPUT TYPE=\"reset\" VALUE=\"Reset\">\n");
389
390         /* Write show radio button */
391
392         printf( "</TD><TD>\n"
393                 "<INPUT TYPE=\"radio\" ");
394         if (req->q.equal)
395                 printf("CHECKED=\"checked\" ");
396         printf( "NAME=\"apropos\" ID=\"show\" VALUE=\"0\">\n"
397                 "<LABEL FOR=\"show\">Show named manual page</LABEL>\n");
398
399         /* Write section selector. */
400
401         puts(   "</TD></TR><TR><TD>\n"
402                 "<SELECT NAME=\"sec\">");
403         for (i = 0; i < sec_MAX; i++) {
404                 printf("<OPTION VALUE=\"%s\"", sec_numbers[i]);
405                 if (NULL != req->q.sec &&
406                     0 == strcmp(sec_numbers[i], req->q.sec))
407                         printf(" SELECTED=\"selected\"");
408                 printf(">%s</OPTION>\n", sec_names[i]);
409         }
410         puts("</SELECT>");
411
412         /* Write architecture selector. */
413
414         printf( "<SELECT NAME=\"arch\">\n"
415                 "<OPTION VALUE=\"default\"");
416         if (NULL == req->q.arch)
417                 printf(" SELECTED=\"selected\"");
418         puts(">All Architectures</OPTION>");
419         for (i = 0; i < arch_MAX; i++) {
420                 printf("<OPTION VALUE=\"%s\"", arch_names[i]);
421                 if (NULL != req->q.arch &&
422                     0 == strcmp(arch_names[i], req->q.arch))
423                         printf(" SELECTED=\"selected\"");
424                 printf(">%s</OPTION>\n", arch_names[i]);
425         }
426         puts("</SELECT>");
427
428         /* Write manpath selector. */
429
430         if (req->psz > 1) {
431                 puts("<SELECT NAME=\"manpath\">");
432                 for (i = 0; i < (int)req->psz; i++) {
433                         printf("<OPTION ");
434                         if (strcmp(req->q.manpath, req->p[i]) == 0)
435                                 printf("SELECTED=\"selected\" ");
436                         printf("VALUE=\"");
437                         html_print(req->p[i]);
438                         printf("\">");
439                         html_print(req->p[i]);
440                         puts("</OPTION>");
441                 }
442                 puts("</SELECT>");
443         }
444
445         /* Write search radio button */
446
447         printf( "</TD><TD>\n"
448                 "<INPUT TYPE=\"radio\" ");
449         if (0 == req->q.equal)
450                 printf("CHECKED=\"checked\" ");
451         printf( "NAME=\"apropos\" ID=\"search\" VALUE=\"1\">\n"
452                 "<LABEL FOR=\"search\">Search with apropos query</LABEL>\n");
453
454         puts("</TD></TR></TABLE>\n"
455              "</FIELDSET>\n"
456              "</FORM>\n"
457              "</DIV>");
458         puts("<!-- End search form. //-->");
459 }
460
461 static int
462 validate_urifrag(const char *frag)
463 {
464
465         while ('\0' != *frag) {
466                 if ( ! (isalnum((unsigned char)*frag) ||
467                     '-' == *frag || '.' == *frag ||
468                     '/' == *frag || '_' == *frag))
469                         return 0;
470                 frag++;
471         }
472         return 1;
473 }
474
475 static int
476 validate_manpath(const struct req *req, const char* manpath)
477 {
478         size_t   i;
479
480         if ( ! strcmp(manpath, "mandoc"))
481                 return 1;
482
483         for (i = 0; i < req->psz; i++)
484                 if ( ! strcmp(manpath, req->p[i]))
485                         return 1;
486
487         return 0;
488 }
489
490 static int
491 validate_filename(const char *file)
492 {
493
494         if ('.' == file[0] && '/' == file[1])
495                 file += 2;
496
497         return ! (strstr(file, "../") || strstr(file, "/..") ||
498             (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
499 }
500
501 static void
502 pg_index(const struct req *req)
503 {
504
505         resp_begin_html(200, NULL);
506         resp_searchform(req);
507         printf("<P>\n"
508                "This web interface is documented in the\n"
509                "<A HREF=\"%s/mandoc/man8/man.cgi.8\">man.cgi</A>\n"
510                "manual, and the\n"
511                "<A HREF=\"%s/mandoc/man1/apropos.1\">apropos</A>\n"
512                "manual explains the query syntax.\n"
513                "</P>\n",
514                scriptname, scriptname);
515         resp_end_html();
516 }
517
518 static void
519 pg_noresult(const struct req *req, const char *msg)
520 {
521         resp_begin_html(200, NULL);
522         resp_searchform(req);
523         puts("<P>");
524         puts(msg);
525         puts("</P>");
526         resp_end_html();
527 }
528
529 static void
530 pg_error_badrequest(const char *msg)
531 {
532
533         resp_begin_html(400, "Bad Request");
534         puts("<H1>Bad Request</H1>\n"
535              "<P>\n");
536         puts(msg);
537         printf("Try again from the\n"
538                "<A HREF=\"%s\">main page</A>.\n"
539                "</P>", scriptname);
540         resp_end_html();
541 }
542
543 static void
544 pg_error_internal(void)
545 {
546         resp_begin_html(500, "Internal Server Error");
547         puts("<P>Internal Server Error</P>");
548         resp_end_html();
549 }
550
551 static void
552 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
553 {
554         char            *arch, *archend;
555         size_t           i, iuse, isec;
556         int              archprio, archpriouse;
557         int              prio, priouse;
558         char             sec;
559
560         for (i = 0; i < sz; i++) {
561                 if (validate_filename(r[i].file))
562                         continue;
563                 fprintf(stderr, "invalid filename %s in %s database\n",
564                     r[i].file, req->q.manpath);
565                 pg_error_internal();
566                 return;
567         }
568
569         if (1 == sz) {
570                 /*
571                  * If we have just one result, then jump there now
572                  * without any delay.
573                  */
574                 printf("Status: 303 See Other\r\n");
575                 printf("Location: http://%s%s/%s/%s",
576                     HTTP_HOST, scriptname, req->q.manpath, r[0].file);
577                 printf("\r\n"
578                      "Content-Type: text/html; charset=utf-8\r\n"
579                      "\r\n");
580                 return;
581         }
582
583         resp_begin_html(200, NULL);
584         resp_searchform(req);
585         puts("<DIV CLASS=\"results\">");
586         puts("<TABLE>");
587
588         for (i = 0; i < sz; i++) {
589                 printf("<TR>\n"
590                        "<TD CLASS=\"title\">\n"
591                        "<A HREF=\"%s/%s/%s",
592                     scriptname, req->q.manpath, r[i].file);
593                 printf("\">");
594                 html_print(r[i].names);
595                 printf("</A>\n"
596                        "</TD>\n"
597                        "<TD CLASS=\"desc\">");
598                 html_print(r[i].output);
599                 puts("</TD>\n"
600                      "</TR>");
601         }
602
603         puts("</TABLE>\n"
604              "</DIV>");
605
606         /*
607          * In man(1) mode, show one of the pages
608          * even if more than one is found.
609          */
610
611         if (req->q.equal) {
612                 puts("<HR>");
613                 iuse = 0;
614                 priouse = 10;
615                 archpriouse = 3;
616                 for (i = 0; i < sz; i++) {
617                         isec = strcspn(r[i].file, "123456789");
618                         sec = r[i].file[isec];
619                         if ('\0' == sec)
620                                 continue;
621                         prio = sec_prios[sec - '1'];
622                         if (NULL == req->q.arch) {
623                                 archprio =
624                                     (NULL == (arch = strchr(
625                                         r[i].file + isec, '/'))) ? 3 :
626                                     (NULL == (archend = strchr(
627                                         arch + 1, '/'))) ? 0 :
628                                     strncmp(arch, "amd64/",
629                                         archend - arch) ? 2 : 1;
630                                 if (archprio < archpriouse) {
631                                         archpriouse = archprio;
632                                         priouse = prio;
633                                         iuse = i;
634                                         continue;
635                                 }
636                                 if (archprio > archpriouse)
637                                         continue;
638                         }
639                         if (prio >= priouse)
640                                 continue;
641                         priouse = prio;
642                         iuse = i;
643                 }
644                 resp_show(req, r[iuse].file);
645         }
646
647         resp_end_html();
648 }
649
650 static void
651 catman(const struct req *req, const char *file)
652 {
653         FILE            *f;
654         char            *p;
655         size_t           sz;
656         ssize_t          len;
657         int              i;
658         int              italic, bold;
659
660         if ((f = fopen(file, "r")) == NULL) {
661                 puts("<P>You specified an invalid manual file.</P>");
662                 return;
663         }
664
665         puts("<DIV CLASS=\"catman\">\n"
666              "<PRE>");
667
668         p = NULL;
669         sz = 0;
670
671         while ((len = getline(&p, &sz, f)) != -1) {
672                 bold = italic = 0;
673                 for (i = 0; i < len - 1; i++) {
674                         /*
675                          * This means that the catpage is out of state.
676                          * Ignore it and keep going (although the
677                          * catpage is bogus).
678                          */
679
680                         if ('\b' == p[i] || '\n' == p[i])
681                                 continue;
682
683                         /*
684                          * Print a regular character.
685                          * Close out any bold/italic scopes.
686                          * If we're in back-space mode, make sure we'll
687                          * have something to enter when we backspace.
688                          */
689
690                         if ('\b' != p[i + 1]) {
691                                 if (italic)
692                                         printf("</I>");
693                                 if (bold)
694                                         printf("</B>");
695                                 italic = bold = 0;
696                                 html_putchar(p[i]);
697                                 continue;
698                         } else if (i + 2 >= len)
699                                 continue;
700
701                         /* Italic mode. */
702
703                         if ('_' == p[i]) {
704                                 if (bold)
705                                         printf("</B>");
706                                 if ( ! italic)
707                                         printf("<I>");
708                                 bold = 0;
709                                 italic = 1;
710                                 i += 2;
711                                 html_putchar(p[i]);
712                                 continue;
713                         }
714
715                         /*
716                          * Handle funny behaviour troff-isms.
717                          * These grok'd from the original man2html.c.
718                          */
719
720                         if (('+' == p[i] && 'o' == p[i + 2]) ||
721                                         ('o' == p[i] && '+' == p[i + 2]) ||
722                                         ('|' == p[i] && '=' == p[i + 2]) ||
723                                         ('=' == p[i] && '|' == p[i + 2]) ||
724                                         ('*' == p[i] && '=' == p[i + 2]) ||
725                                         ('=' == p[i] && '*' == p[i + 2]) ||
726                                         ('*' == p[i] && '|' == p[i + 2]) ||
727                                         ('|' == p[i] && '*' == p[i + 2]))  {
728                                 if (italic)
729                                         printf("</I>");
730                                 if (bold)
731                                         printf("</B>");
732                                 italic = bold = 0;
733                                 putchar('*');
734                                 i += 2;
735                                 continue;
736                         } else if (('|' == p[i] && '-' == p[i + 2]) ||
737                                         ('-' == p[i] && '|' == p[i + 1]) ||
738                                         ('+' == p[i] && '-' == p[i + 1]) ||
739                                         ('-' == p[i] && '+' == p[i + 1]) ||
740                                         ('+' == p[i] && '|' == p[i + 1]) ||
741                                         ('|' == p[i] && '+' == p[i + 1]))  {
742                                 if (italic)
743                                         printf("</I>");
744                                 if (bold)
745                                         printf("</B>");
746                                 italic = bold = 0;
747                                 putchar('+');
748                                 i += 2;
749                                 continue;
750                         }
751
752                         /* Bold mode. */
753
754                         if (italic)
755                                 printf("</I>");
756                         if ( ! bold)
757                                 printf("<B>");
758                         bold = 1;
759                         italic = 0;
760                         i += 2;
761                         html_putchar(p[i]);
762                 }
763
764                 /*
765                  * Clean up the last character.
766                  * We can get to a newline; don't print that.
767                  */
768
769                 if (italic)
770                         printf("</I>");
771                 if (bold)
772                         printf("</B>");
773
774                 if (i == len - 1 && p[i] != '\n')
775                         html_putchar(p[i]);
776
777                 putchar('\n');
778         }
779         free(p);
780
781         puts("</PRE>\n"
782              "</DIV>");
783
784         fclose(f);
785 }
786
787 static void
788 format(const struct req *req, const char *file)
789 {
790         struct manoutput conf;
791         struct mparse   *mp;
792         struct roff_man *man;
793         void            *vp;
794         int              fd;
795         int              usepath;
796
797         if (-1 == (fd = open(file, O_RDONLY, 0))) {
798                 puts("<P>You specified an invalid manual file.</P>");
799                 return;
800         }
801
802         mchars_alloc();
803         mp = mparse_alloc(MPARSE_SO, MANDOCLEVEL_BADARG, NULL, req->q.manpath);
804         mparse_readfd(mp, fd, file);
805         close(fd);
806
807         memset(&conf, 0, sizeof(conf));
808         conf.fragment = 1;
809         usepath = strcmp(req->q.manpath, req->p[0]);
810         mandoc_asprintf(&conf.man, "%s?query=%%N&sec=%%S%s%s%s%s",
811             scriptname,
812             req->q.arch ? "&arch="       : "",
813             req->q.arch ? req->q.arch    : "",
814             usepath     ? "&manpath="    : "",
815             usepath     ? req->q.manpath : "");
816
817         mparse_result(mp, &man, NULL);
818         if (man == NULL) {
819                 fprintf(stderr, "fatal mandoc error: %s/%s\n",
820                     req->q.manpath, file);
821                 pg_error_internal();
822                 mparse_free(mp);
823                 mchars_free();
824                 return;
825         }
826
827         vp = html_alloc(&conf);
828
829         if (man->macroset == MACROSET_MDOC) {
830                 mdoc_validate(man);
831                 html_mdoc(vp, man);
832         } else {
833                 man_validate(man);
834                 html_man(vp, man);
835         }
836
837         html_free(vp);
838         mparse_free(mp);
839         mchars_free();
840         free(conf.man);
841 }
842
843 static void
844 resp_show(const struct req *req, const char *file)
845 {
846
847         if ('.' == file[0] && '/' == file[1])
848                 file += 2;
849
850         if ('c' == *file)
851                 catman(req, file);
852         else
853                 format(req, file);
854 }
855
856 static void
857 pg_show(struct req *req, const char *fullpath)
858 {
859         char            *manpath;
860         const char      *file;
861
862         if ((file = strchr(fullpath, '/')) == NULL) {
863                 pg_error_badrequest(
864                     "You did not specify a page to show.");
865                 return;
866         }
867         manpath = mandoc_strndup(fullpath, file - fullpath);
868         file++;
869
870         if ( ! validate_manpath(req, manpath)) {
871                 pg_error_badrequest(
872                     "You specified an invalid manpath.");
873                 free(manpath);
874                 return;
875         }
876
877         /*
878          * Begin by chdir()ing into the manpath.
879          * This way we can pick up the database files, which are
880          * relative to the manpath root.
881          */
882
883         if (chdir(manpath) == -1) {
884                 fprintf(stderr, "chdir %s: %s\n",
885                     manpath, strerror(errno));
886                 pg_error_internal();
887                 free(manpath);
888                 return;
889         }
890
891         if (strcmp(manpath, "mandoc")) {
892                 free(req->q.manpath);
893                 req->q.manpath = manpath;
894         } else
895                 free(manpath);
896
897         if ( ! validate_filename(file)) {
898                 pg_error_badrequest(
899                     "You specified an invalid manual file.");
900                 return;
901         }
902
903         resp_begin_html(200, NULL);
904         resp_searchform(req);
905         resp_show(req, file);
906         resp_end_html();
907 }
908
909 static void
910 pg_search(const struct req *req)
911 {
912         struct mansearch          search;
913         struct manpaths           paths;
914         struct manpage           *res;
915         char                    **argv;
916         char                     *query, *rp, *wp;
917         size_t                    ressz;
918         int                       argc;
919
920         /*
921          * Begin by chdir()ing into the root of the manpath.
922          * This way we can pick up the database files, which are
923          * relative to the manpath root.
924          */
925
926         if (-1 == (chdir(req->q.manpath))) {
927                 fprintf(stderr, "chdir %s: %s\n",
928                     req->q.manpath, strerror(errno));
929                 pg_error_internal();
930                 return;
931         }
932
933         search.arch = req->q.arch;
934         search.sec = req->q.sec;
935         search.outkey = "Nd";
936         search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
937         search.firstmatch = 1;
938
939         paths.sz = 1;
940         paths.paths = mandoc_malloc(sizeof(char *));
941         paths.paths[0] = mandoc_strdup(".");
942
943         /*
944          * Break apart at spaces with backslash-escaping.
945          */
946
947         argc = 0;
948         argv = NULL;
949         rp = query = mandoc_strdup(req->q.query);
950         for (;;) {
951                 while (isspace((unsigned char)*rp))
952                         rp++;
953                 if (*rp == '\0')
954                         break;
955                 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
956                 argv[argc++] = wp = rp;
957                 for (;;) {
958                         if (isspace((unsigned char)*rp)) {
959                                 *wp = '\0';
960                                 rp++;
961                                 break;
962                         }
963                         if (rp[0] == '\\' && rp[1] != '\0')
964                                 rp++;
965                         if (wp != rp)
966                                 *wp = *rp;
967                         if (*rp == '\0')
968                                 break;
969                         wp++;
970                         rp++;
971                 }
972         }
973
974         if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
975                 pg_noresult(req, "You entered an invalid query.");
976         else if (0 == ressz)
977                 pg_noresult(req, "No results found.");
978         else
979                 pg_searchres(req, res, ressz);
980
981         free(query);
982         mansearch_free(res, ressz);
983         free(paths.paths[0]);
984         free(paths.paths);
985 }
986
987 int
988 main(void)
989 {
990         struct req       req;
991         struct itimerval itimer;
992         const char      *path;
993         const char      *querystring;
994         int              i;
995
996         /* Poor man's ReDoS mitigation. */
997
998         itimer.it_value.tv_sec = 2;
999         itimer.it_value.tv_usec = 0;
1000         itimer.it_interval.tv_sec = 2;
1001         itimer.it_interval.tv_usec = 0;
1002         if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1003                 fprintf(stderr, "setitimer: %s\n", strerror(errno));
1004                 pg_error_internal();
1005                 return EXIT_FAILURE;
1006         }
1007
1008         /* Scan our run-time environment. */
1009
1010         if (NULL == (scriptname = getenv("SCRIPT_NAME")))
1011                 scriptname = "";
1012
1013         if ( ! validate_urifrag(scriptname)) {
1014                 fprintf(stderr, "unsafe SCRIPT_NAME \"%s\"\n",
1015                     scriptname);
1016                 pg_error_internal();
1017                 return EXIT_FAILURE;
1018         }
1019
1020         /*
1021          * First we change directory into the MAN_DIR so that
1022          * subsequent scanning for manpath directories is rooted
1023          * relative to the same position.
1024          */
1025
1026         if (-1 == chdir(MAN_DIR)) {
1027                 fprintf(stderr, "MAN_DIR: %s: %s\n",
1028                     MAN_DIR, strerror(errno));
1029                 pg_error_internal();
1030                 return EXIT_FAILURE;
1031         }
1032
1033         memset(&req, 0, sizeof(struct req));
1034         pathgen(&req);
1035
1036         /* Next parse out the query string. */
1037
1038         if (NULL != (querystring = getenv("QUERY_STRING")))
1039                 http_parse(&req, querystring);
1040
1041         if (req.q.manpath == NULL)
1042                 req.q.manpath = mandoc_strdup(req.p[0]);
1043         else if ( ! validate_manpath(&req, req.q.manpath)) {
1044                 pg_error_badrequest(
1045                     "You specified an invalid manpath.");
1046                 return EXIT_FAILURE;
1047         }
1048
1049         if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1050                 pg_error_badrequest(
1051                     "You specified an invalid architecture.");
1052                 return EXIT_FAILURE;
1053         }
1054
1055         /* Dispatch to the three different pages. */
1056
1057         path = getenv("PATH_INFO");
1058         if (NULL == path)
1059                 path = "";
1060         else if ('/' == *path)
1061                 path++;
1062
1063         if ('\0' != *path)
1064                 pg_show(&req, path);
1065         else if (NULL != req.q.query)
1066                 pg_search(&req);
1067         else
1068                 pg_index(&req);
1069
1070         free(req.q.manpath);
1071         free(req.q.arch);
1072         free(req.q.sec);
1073         free(req.q.query);
1074         for (i = 0; i < (int)req.psz; i++)
1075                 free(req.p[i]);
1076         free(req.p);
1077         return EXIT_SUCCESS;
1078 }
1079
1080 /*
1081  * Scan for indexable paths.
1082  */
1083 static void
1084 pathgen(struct req *req)
1085 {
1086         FILE    *fp;
1087         char    *dp;
1088         size_t   dpsz;
1089         ssize_t  len;
1090
1091         if (NULL == (fp = fopen("manpath.conf", "r"))) {
1092                 fprintf(stderr, "%s/manpath.conf: %s\n",
1093                         MAN_DIR, strerror(errno));
1094                 pg_error_internal();
1095                 exit(EXIT_FAILURE);
1096         }
1097
1098         dp = NULL;
1099         dpsz = 0;
1100
1101         while ((len = getline(&dp, &dpsz, fp)) != -1) {
1102                 if (dp[len - 1] == '\n')
1103                         dp[--len] = '\0';
1104                 req->p = mandoc_realloc(req->p,
1105                     (req->psz + 1) * sizeof(char *));
1106                 if ( ! validate_urifrag(dp)) {
1107                         fprintf(stderr, "%s/manpath.conf contains "
1108                             "unsafe path \"%s\"\n", MAN_DIR, dp);
1109                         pg_error_internal();
1110                         exit(EXIT_FAILURE);
1111                 }
1112                 if (NULL != strchr(dp, '/')) {
1113                         fprintf(stderr, "%s/manpath.conf contains "
1114                             "path with slash \"%s\"\n", MAN_DIR, dp);
1115                         pg_error_internal();
1116                         exit(EXIT_FAILURE);
1117                 }
1118                 req->p[req->psz++] = dp;
1119                 dp = NULL;
1120                 dpsz = 0;
1121         }
1122         free(dp);
1123
1124         if ( req->p == NULL ) {
1125                 fprintf(stderr, "%s/manpath.conf is empty\n", MAN_DIR);
1126                 pg_error_internal();
1127                 exit(EXIT_FAILURE);
1128         }
1129 }