1 /* $Id: cgi.c,v 1.144 2017/01/21 01:20:31 schwarze Exp $ */
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
20 #include <sys/types.h>
34 #include "mandoc_aux.h"
41 #include "mansearch.h"
45 * A query as passed to the search function.
48 char *manpath; /* desired manual directory */
49 char *arch; /* architecture */
50 char *sec; /* manual section */
51 char *query; /* unparsed query expression */
52 int equal; /* match whole names, not substrings */
57 char **p; /* array of available manpaths */
58 size_t psz; /* number of available manpaths */
59 int isquery; /* QUERY_STRING used, not PATH_INFO */
67 static void html_print(const char *);
68 static void html_putchar(char);
69 static int http_decode(char *);
70 static void parse_manpath_conf(struct req *);
71 static void parse_path_info(struct req *req, const char *path);
72 static void parse_query_string(struct req *, const char *);
73 static void pg_error_badrequest(const char *);
74 static void pg_error_internal(void);
75 static void pg_index(const struct req *);
76 static void pg_noresult(const struct req *, const char *);
77 static void pg_search(const struct req *);
78 static void pg_searchres(const struct req *,
79 struct manpage *, size_t);
80 static void pg_show(struct req *, const char *);
81 static void resp_begin_html(int, const char *);
82 static void resp_begin_http(int, const char *);
83 static void resp_catman(const struct req *, const char *);
84 static void resp_copy(const char *);
85 static void resp_end_html(void);
86 static void resp_format(const struct req *, const char *);
87 static void resp_searchform(const struct req *, enum focus);
88 static void resp_show(const struct req *, const char *);
89 static void set_query_attr(char **, char **);
90 static int validate_filename(const char *);
91 static int validate_manpath(const struct req *, const char *);
92 static int validate_urifrag(const char *);
94 static const char *scriptname = SCRIPT_NAME;
96 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
97 static const char *const sec_numbers[] = {
98 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
100 static const char *const sec_names[] = {
102 "1 - General Commands",
104 "3 - Library Functions",
106 "4 - Device Drivers",
109 "7 - Miscellaneous Information",
110 "8 - System Manager\'s Manual",
111 "9 - Kernel Developer\'s Manual"
113 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
115 static const char *const arch_names[] = {
116 "amd64", "alpha", "armv7",
117 "hppa", "i386", "landisk",
118 "loongson", "luna88k", "macppc", "mips64",
119 "octeon", "sgi", "socppc", "sparc64",
120 "amiga", "arc", "armish", "arm32",
121 "atari", "aviion", "beagle", "cats",
123 "ia64", "mac68k", "mvme68k", "mvme88k",
124 "mvmeppc", "palm", "pc532", "pegasos",
125 "pmax", "powerpc", "solbourne", "sparc",
126 "sun3", "vax", "wgrisc", "x68k",
129 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
132 * Print a character, escaping HTML along the way.
133 * This will pass non-ASCII straight to output: be warned!
153 putchar((unsigned char)c);
159 * Call through to html_putchar().
160 * Accepts NULL strings.
163 html_print(const char *p)
173 * Transfer the responsibility for the allocated string *val
174 * to the query structure.
177 set_query_attr(char **attr, char **val)
190 * Parse the QUERY_STRING for key-value pairs
191 * and store the values into the query structure.
194 parse_query_string(struct req *req, const char *qs)
200 req->q.manpath = NULL;
207 while (*qs != '\0') {
211 keysz = strcspn(qs, "=;&");
212 key = mandoc_strndup(qs, keysz);
217 /* Parse one value. */
219 valsz = strcspn(++qs, ";&");
220 val = mandoc_strndup(qs, valsz);
223 /* Decode and catch encoding errors. */
225 if ( ! (http_decode(key) && http_decode(val)))
228 /* Handle key-value pairs. */
230 if ( ! strcmp(key, "query"))
231 set_query_attr(&req->q.query, &val);
233 else if ( ! strcmp(key, "apropos"))
234 req->q.equal = !strcmp(val, "0");
236 else if ( ! strcmp(key, "manpath")) {
238 if ( ! strncmp(val, "OpenBSD ", 8)) {
244 set_query_attr(&req->q.manpath, &val);
247 else if ( ! (strcmp(key, "sec")
249 && strcmp(key, "sektion")
252 if ( ! strcmp(val, "0"))
254 set_query_attr(&req->q.sec, &val);
257 else if ( ! strcmp(key, "arch")) {
258 if ( ! strcmp(val, "default"))
260 set_query_attr(&req->q.arch, &val);
264 * The key must be freed in any case.
265 * The val may have been handed over to the query
266 * structure, in which case it is now NULL.
280 * HTTP-decode a string. The standard explanation is that this turns
281 * "%4e+foo" into "n foo" in the regular way. This is done in-place
282 * over the allocated string.
294 for ( ; '\0' != *p; p++, q++) {
296 if ('\0' == (hex[0] = *(p + 1)))
298 if ('\0' == (hex[1] = *(p + 2)))
300 if (1 != sscanf(hex, "%x", &c))
308 *q = '+' == *p ? ' ' : *p;
316 resp_begin_http(int code, const char *msg)
320 printf("Status: %d %s\r\n", code, msg);
322 printf("Content-Type: text/html; charset=utf-8\r\n"
323 "Cache-Control: no-cache\r\n"
324 "Pragma: no-cache\r\n"
331 resp_copy(const char *filename)
337 if ((fd = open(filename, O_RDONLY)) != -1) {
339 while ((sz = read(fd, buf, sizeof(buf))) > 0)
340 write(STDOUT_FILENO, buf, sz);
346 resp_begin_html(int code, const char *msg)
349 resp_begin_http(code, msg);
351 printf("<!DOCTYPE html>\n"
354 " <meta charset=\"UTF-8\"/>\n"
355 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
356 " type=\"text/css\" media=\"all\">\n"
357 " <title>%s</title>\n"
360 CSS_DIR, CUSTOMIZE_TITLE);
362 resp_copy(MAN_DIR "/header.html");
369 resp_copy(MAN_DIR "/footer.html");
376 resp_searchform(const struct req *req, enum focus focus)
380 printf("<form action=\"/%s\" method=\"get\">\n"
382 " <legend>Manual Page Search Parameters</legend>\n",
385 /* Write query input box. */
387 printf(" <input type=\"text\" name=\"query\" value=\"");
388 if (req->q.query != NULL)
389 html_print(req->q.query);
390 printf( "\" size=\"40\"");
391 if (focus == FOCUS_QUERY)
392 printf(" autofocus");
395 /* Write submission buttons. */
397 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
399 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
403 /* Write section selector. */
405 puts(" <select name=\"sec\">");
406 for (i = 0; i < sec_MAX; i++) {
407 printf(" <option value=\"%s\"", sec_numbers[i]);
408 if (NULL != req->q.sec &&
409 0 == strcmp(sec_numbers[i], req->q.sec))
410 printf(" selected=\"selected\"");
411 printf(">%s</option>\n", sec_names[i]);
415 /* Write architecture selector. */
417 printf( " <select name=\"arch\">\n"
418 " <option value=\"default\"");
419 if (NULL == req->q.arch)
420 printf(" selected=\"selected\"");
421 puts(">All Architectures</option>");
422 for (i = 0; i < arch_MAX; i++) {
423 printf(" <option value=\"%s\"", arch_names[i]);
424 if (NULL != req->q.arch &&
425 0 == strcmp(arch_names[i], req->q.arch))
426 printf(" selected=\"selected\"");
427 printf(">%s</option>\n", arch_names[i]);
431 /* Write manpath selector. */
434 puts(" <select name=\"manpath\">");
435 for (i = 0; i < (int)req->psz; i++) {
437 if (strcmp(req->q.manpath, req->p[i]) == 0)
438 printf("selected=\"selected\" ");
440 html_print(req->p[i]);
442 html_print(req->p[i]);
448 puts(" </fieldset>\n"
453 validate_urifrag(const char *frag)
456 while ('\0' != *frag) {
457 if ( ! (isalnum((unsigned char)*frag) ||
458 '-' == *frag || '.' == *frag ||
459 '/' == *frag || '_' == *frag))
467 validate_manpath(const struct req *req, const char* manpath)
471 for (i = 0; i < req->psz; i++)
472 if ( ! strcmp(manpath, req->p[i]))
479 validate_filename(const char *file)
482 if ('.' == file[0] && '/' == file[1])
485 return ! (strstr(file, "../") || strstr(file, "/..") ||
486 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
490 pg_index(const struct req *req)
493 resp_begin_html(200, NULL);
494 resp_searchform(req, FOCUS_QUERY);
496 "This web interface is documented in the\n"
497 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
499 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
500 "manual explains the query syntax.\n"
502 scriptname, *scriptname == '\0' ? "" : "/",
503 scriptname, *scriptname == '\0' ? "" : "/");
508 pg_noresult(const struct req *req, const char *msg)
510 resp_begin_html(200, NULL);
511 resp_searchform(req, FOCUS_QUERY);
519 pg_error_badrequest(const char *msg)
522 resp_begin_html(400, "Bad Request");
523 puts("<h1>Bad Request</h1>\n"
526 printf("Try again from the\n"
527 "<a href=\"/%s\">main page</a>.\n"
533 pg_error_internal(void)
535 resp_begin_html(500, "Internal Server Error");
536 puts("<p>Internal Server Error</p>");
541 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
543 char *arch, *archend;
546 int archprio, archpriouse;
549 for (i = 0; i < sz; i++) {
550 if (validate_filename(r[i].file))
552 warnx("invalid filename %s in %s database",
553 r[i].file, req->q.manpath);
558 if (req->isquery && sz == 1) {
560 * If we have just one result, then jump there now
563 printf("Status: 303 See Other\r\n");
564 printf("Location: http://%s/%s%s%s/%s",
565 HTTP_HOST, scriptname,
566 *scriptname == '\0' ? "" : "/",
567 req->q.manpath, r[0].file);
569 "Content-Type: text/html; charset=utf-8\r\n"
574 resp_begin_html(200, NULL);
576 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
579 puts("<table class=\"results\">");
580 for (i = 0; i < sz; i++) {
583 "<a class=\"Xr\" href=\"/%s%s%s/%s\">",
584 scriptname, *scriptname == '\0' ? "" : "/",
585 req->q.manpath, r[i].file);
586 html_print(r[i].names);
588 " <td><span class=\"Nd\">");
589 html_print(r[i].output);
590 puts("</span></td>\n"
597 * In man(1) mode, show one of the pages
598 * even if more than one is found.
601 if (req->q.equal || sz == 1) {
606 for (i = 0; i < sz; i++) {
608 sec += strcspn(sec, "123456789");
611 prio = sec_prios[sec[0] - '1'];
614 if (req->q.arch == NULL) {
616 ((arch = strchr(sec + 1, '/'))
618 ((archend = strchr(arch + 1, '/'))
620 strncmp(arch, "amd64/",
621 archend - arch) ? 2 : 1;
622 if (archprio < archpriouse) {
623 archpriouse = archprio;
628 if (archprio > archpriouse)
636 resp_show(req, r[iuse].file);
643 resp_catman(const struct req *req, const char *file)
652 if ((f = fopen(file, "r")) == NULL) {
653 puts("<p>You specified an invalid manual file.</p>");
657 puts("<div class=\"catman\">\n"
663 while ((len = getline(&p, &sz, f)) != -1) {
665 for (i = 0; i < len - 1; i++) {
667 * This means that the catpage is out of state.
668 * Ignore it and keep going (although the
672 if ('\b' == p[i] || '\n' == p[i])
676 * Print a regular character.
677 * Close out any bold/italic scopes.
678 * If we're in back-space mode, make sure we'll
679 * have something to enter when we backspace.
682 if ('\b' != p[i + 1]) {
690 } else if (i + 2 >= len)
708 * Handle funny behaviour troff-isms.
709 * These grok'd from the original man2html.c.
712 if (('+' == p[i] && 'o' == p[i + 2]) ||
713 ('o' == p[i] && '+' == p[i + 2]) ||
714 ('|' == p[i] && '=' == p[i + 2]) ||
715 ('=' == p[i] && '|' == p[i + 2]) ||
716 ('*' == p[i] && '=' == p[i + 2]) ||
717 ('=' == p[i] && '*' == p[i + 2]) ||
718 ('*' == p[i] && '|' == p[i + 2]) ||
719 ('|' == p[i] && '*' == p[i + 2])) {
728 } else if (('|' == p[i] && '-' == p[i + 2]) ||
729 ('-' == p[i] && '|' == p[i + 1]) ||
730 ('+' == p[i] && '-' == p[i + 1]) ||
731 ('-' == p[i] && '+' == p[i + 1]) ||
732 ('+' == p[i] && '|' == p[i + 1]) ||
733 ('|' == p[i] && '+' == p[i + 1])) {
757 * Clean up the last character.
758 * We can get to a newline; don't print that.
766 if (i == len - 1 && p[i] != '\n')
780 resp_format(const struct req *req, const char *file)
782 struct manoutput conf;
784 struct roff_man *man;
789 if (-1 == (fd = open(file, O_RDONLY, 0))) {
790 puts("<p>You specified an invalid manual file.</p>");
795 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
796 MANDOCLEVEL_BADARG, NULL, req->q.manpath);
797 mparse_readfd(mp, fd, file);
800 memset(&conf, 0, sizeof(conf));
802 usepath = strcmp(req->q.manpath, req->p[0]);
803 mandoc_asprintf(&conf.man, "/%s%s%%N.%%S",
804 usepath ? req->q.manpath : "", usepath ? "/" : "");
806 mparse_result(mp, &man, NULL);
808 warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
815 vp = html_alloc(&conf);
817 if (man->macroset == MACROSET_MDOC) {
832 resp_show(const struct req *req, const char *file)
835 if ('.' == file[0] && '/' == file[1])
839 resp_catman(req, file);
841 resp_format(req, file);
845 pg_show(struct req *req, const char *fullpath)
850 if ((file = strchr(fullpath, '/')) == NULL) {
852 "You did not specify a page to show.");
855 manpath = mandoc_strndup(fullpath, file - fullpath);
858 if ( ! validate_manpath(req, manpath)) {
860 "You specified an invalid manpath.");
866 * Begin by chdir()ing into the manpath.
867 * This way we can pick up the database files, which are
868 * relative to the manpath root.
871 if (chdir(manpath) == -1) {
872 warn("chdir %s", manpath);
879 if ( ! validate_filename(file)) {
881 "You specified an invalid manual file.");
885 resp_begin_html(200, NULL);
886 resp_searchform(req, FOCUS_NONE);
887 resp_show(req, file);
892 pg_search(const struct req *req)
894 struct mansearch search;
895 struct manpaths paths;
898 char *query, *rp, *wp;
903 * Begin by chdir()ing into the root of the manpath.
904 * This way we can pick up the database files, which are
905 * relative to the manpath root.
908 if (chdir(req->q.manpath) == -1) {
909 warn("chdir %s", req->q.manpath);
914 search.arch = req->q.arch;
915 search.sec = req->q.sec;
916 search.outkey = "Nd";
917 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
918 search.firstmatch = 1;
921 paths.paths = mandoc_malloc(sizeof(char *));
922 paths.paths[0] = mandoc_strdup(".");
925 * Break apart at spaces with backslash-escaping.
930 rp = query = mandoc_strdup(req->q.query);
932 while (isspace((unsigned char)*rp))
936 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
937 argv[argc++] = wp = rp;
939 if (isspace((unsigned char)*rp)) {
944 if (rp[0] == '\\' && rp[1] != '\0')
955 if (0 == mansearch(&search, &paths, argc, argv, &res, &ressz))
956 pg_noresult(req, "You entered an invalid query.");
958 pg_noresult(req, "No results found.");
960 pg_searchres(req, res, ressz);
963 mansearch_free(res, ressz);
964 free(paths.paths[0]);
972 struct itimerval itimer;
974 const char *querystring;
977 /* Poor man's ReDoS mitigation. */
979 itimer.it_value.tv_sec = 2;
980 itimer.it_value.tv_usec = 0;
981 itimer.it_interval.tv_sec = 2;
982 itimer.it_interval.tv_usec = 0;
983 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
990 * First we change directory into the MAN_DIR so that
991 * subsequent scanning for manpath directories is rooted
992 * relative to the same position.
995 if (chdir(MAN_DIR) == -1) {
996 warn("MAN_DIR: %s", MAN_DIR);
1001 memset(&req, 0, sizeof(struct req));
1003 parse_manpath_conf(&req);
1005 /* Parse the path info and the query string. */
1007 if ((path = getenv("PATH_INFO")) == NULL)
1009 else if (*path == '/')
1012 if (*path != '\0') {
1013 parse_path_info(&req, path);
1014 if (req.q.manpath == NULL || access(path, F_OK) == -1)
1016 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1017 parse_query_string(&req, querystring);
1019 /* Validate parsed data and add defaults. */
1021 if (req.q.manpath == NULL)
1022 req.q.manpath = mandoc_strdup(req.p[0]);
1023 else if ( ! validate_manpath(&req, req.q.manpath)) {
1024 pg_error_badrequest(
1025 "You specified an invalid manpath.");
1026 return EXIT_FAILURE;
1029 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1030 pg_error_badrequest(
1031 "You specified an invalid architecture.");
1032 return EXIT_FAILURE;
1035 /* Dispatch to the three different pages. */
1038 pg_show(&req, path);
1039 else if (NULL != req.q.query)
1044 free(req.q.manpath);
1048 for (i = 0; i < (int)req.psz; i++)
1051 return EXIT_SUCCESS;
1055 * If PATH_INFO is not a file name, translate it to a query.
1058 parse_path_info(struct req *req, const char *path)
1065 req->q.manpath = mandoc_strdup(path);
1068 /* Mandatory manual page name. */
1069 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1070 req->q.query = req->q.manpath;
1071 req->q.manpath = NULL;
1073 *req->q.query++ = '\0';
1075 /* Optional trailing section. */
1076 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1077 if(isdigit((unsigned char)req->q.sec[1])) {
1078 *req->q.sec++ = '\0';
1079 req->q.sec = mandoc_strdup(req->q.sec);
1084 /* Handle the case of name[.section] only. */
1085 if (req->q.manpath == NULL)
1087 req->q.query = mandoc_strdup(req->q.query);
1089 /* Split directory components. */
1090 dir[i = 0] = req->q.manpath;
1091 while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1093 pg_error_badrequest(
1094 "You specified too many directory components.");
1100 /* Optional manpath. */
1101 if ((i = validate_manpath(req, req->q.manpath)) == 0)
1102 req->q.manpath = NULL;
1103 else if (dir[1] == NULL)
1106 /* Optional section. */
1107 if (strncmp(dir[i], "man", 3) == 0) {
1109 req->q.sec = mandoc_strdup(dir[i++] + 3);
1111 if (dir[i] == NULL) {
1112 if (req->q.manpath == NULL)
1116 if (dir[i + 1] != NULL) {
1117 pg_error_badrequest(
1118 "You specified an invalid directory component.");
1122 /* Optional architecture. */
1124 req->q.arch = mandoc_strdup(dir[i]);
1125 if (req->q.manpath == NULL)
1128 req->q.arch = dir[0];
1132 * Scan for indexable paths.
1135 parse_manpath_conf(struct req *req)
1142 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1143 warn("%s/manpath.conf", MAN_DIR);
1144 pg_error_internal();
1151 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1152 if (dp[len - 1] == '\n')
1154 req->p = mandoc_realloc(req->p,
1155 (req->psz + 1) * sizeof(char *));
1156 if ( ! validate_urifrag(dp)) {
1157 warnx("%s/manpath.conf contains "
1158 "unsafe path \"%s\"", MAN_DIR, dp);
1159 pg_error_internal();
1162 if (strchr(dp, '/') != NULL) {
1163 warnx("%s/manpath.conf contains "
1164 "path with slash \"%s\"", MAN_DIR, dp);
1165 pg_error_internal();
1168 req->p[req->psz++] = dp;
1174 if (req->p == NULL) {
1175 warnx("%s/manpath.conf is empty", MAN_DIR);
1176 pg_error_internal();