2 * Copyright (c) 2002 John Rochester
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer,
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/queue.h>
43 #include <stringlist.h>
47 #define DEFAULT_MANPATH "/usr/share/man"
48 #define LINE_ALLOC 4096
50 static char blank[] = "";
53 * Information collected about each man page in a section.
64 * An entry kept for each visited directory.
69 SLIST_ENTRY(visited_dir) next;
76 char * content; /* the start of the buffer */
77 char * end; /* just past the end of the content */
78 char * last; /* the last allocated character */
82 * Removes the last amount characters from the sbuf.
84 #define sbuf_retract(sbuf, amount) \
85 ((sbuf)->end -= (amount))
87 * Returns the length of the sbuf content.
89 #define sbuf_length(sbuf) \
90 ((sbuf)->end - (sbuf)->content)
92 typedef char *edited_copy(char *from, char *to, int length);
94 static int append; /* -a flag: append to existing whatis */
95 static int verbose; /* -v flag: be verbose with warnings */
96 static int indent = 24; /* -i option: description indentation */
97 static const char *whatis_name="whatis";/* -n option: the name */
98 static char *common_output; /* -o option: the single output file */
99 static char *locale; /* user's locale if -L is used */
100 static char *lang_locale; /* short form of locale */
101 static const char *machine;
103 static int exit_code; /* exit code to use when finished */
104 static SLIST_HEAD(, visited_dir) visited_dirs =
105 SLIST_HEAD_INITIALIZER(visited_dirs);
108 * While the whatis line is being formed, it is stored in whatis_proto.
109 * When finished, it is reformatted into whatis_final and then appended
112 static struct sbuf *whatis_proto;
113 static struct sbuf *whatis_final;
114 static StringList *whatis_lines; /* collected output lines */
116 static char tmp_file[MAXPATHLEN]; /* path of temporary file, if any */
118 /* A set of possible names for the NAME man page section */
119 static const char *name_section_titles[] = {
120 "NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce",
121 "\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL
124 /* A subset of the mdoc(7) commands to ignore */
125 static char mdoc_commands[] = "ArDvErEvFlLiNmPa";
128 * Frees a struct page_info and its content.
131 free_page_info(struct page_info *info)
133 free(info->filename);
140 * Allocates and fills in a new struct page_info given the
141 * name of the man section directory and the dirent of the file.
142 * If the file is not a man page, returns NULL.
144 static struct page_info *
145 new_page_info(char *dir, struct dirent *dirent)
147 struct page_info *info;
152 info = (struct page_info *) malloc(sizeof(struct page_info));
155 basename_length = strlen(dirent->d_name);
156 suffix = &dirent->d_name[basename_length];
157 asprintf(&info->filename, "%s/%s", dir, dirent->d_name);
158 if ((info->gzipped = basename_length >= 4 && strcmp(&dirent->d_name[basename_length - 3], ".gz") == 0)) {
163 if (--suffix == dirent->d_name || !isalnum(*suffix)) {
167 warnx("%s: invalid man page name", info->filename);
168 free(info->filename);
174 info->name = strdup(dirent->d_name);
175 info->suffix = strdup(suffix);
176 if (stat(info->filename, &st) < 0) {
177 warn("%s", info->filename);
178 free_page_info(info);
181 if (!S_ISREG(st.st_mode)) {
182 if (verbose && !S_ISDIR(st.st_mode))
183 warnx("%s: not a regular file", info->filename);
184 free_page_info(info);
187 info->inode = st.st_ino;
192 * Reset an sbuf's length to 0.
195 sbuf_clear(struct sbuf *sbuf)
197 sbuf->end = sbuf->content;
201 * Allocate a new sbuf.
206 struct sbuf *sbuf = (struct sbuf *) malloc(sizeof(struct sbuf));
207 sbuf->content = (char *) malloc(LINE_ALLOC);
208 sbuf->last = sbuf->content + LINE_ALLOC - 1;
214 * Ensure that there is enough room in the sbuf for nchars more characters.
217 sbuf_need(struct sbuf *sbuf, int nchars)
220 size_t size, cntsize;
222 /* double the size of the allocation until the buffer is big enough */
223 while (sbuf->end + nchars > sbuf->last) {
224 size = sbuf->last + 1 - sbuf->content;
226 cntsize = sbuf->end - sbuf->content;
228 new_content = (char *)malloc(size);
229 memcpy(new_content, sbuf->content, cntsize);
231 sbuf->content = new_content;
232 sbuf->end = new_content + cntsize;
233 sbuf->last = new_content + size - 1;
238 * Appends a string of a given length to the sbuf.
241 sbuf_append(struct sbuf *sbuf, const char *text, int length)
244 sbuf_need(sbuf, length);
245 memcpy(sbuf->end, text, length);
251 * Appends a null-terminated string to the sbuf.
254 sbuf_append_str(struct sbuf *sbuf, char *text)
256 sbuf_append(sbuf, text, strlen(text));
260 * Appends an edited null-terminated string to the sbuf.
263 sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy)
265 int length = strlen(text);
267 sbuf_need(sbuf, length);
268 sbuf->end = copy(text, sbuf->end, length);
273 * Strips any of a set of chars from the end of the sbuf.
276 sbuf_strip(struct sbuf *sbuf, const char *set)
278 while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL)
283 * Returns the null-terminated string built by the sbuf.
286 sbuf_content(struct sbuf *sbuf)
289 return sbuf->content;
293 * Returns true if no man page exists in the directory with
294 * any of the names in the StringList.
297 no_page_exists(char *dir, StringList *names, char *suffix)
299 char path[MAXPATHLEN];
302 for (i = 0; i < names->sl_cur; i++) {
303 snprintf(path, sizeof path, "%s/%s.%s.gz", dir, names->sl_str[i], suffix);
304 if (access(path, F_OK) < 0) {
305 path[strlen(path) - 3] = '\0';
306 if (access(path, F_OK) < 0)
315 trap_signal(int sig __unused)
317 if (tmp_file[0] != '\0')
323 * Attempts to open an output file. Returns NULL if unsuccessful.
326 open_output(char *name)
330 whatis_lines = sl_init();
332 char line[LINE_ALLOC];
334 output = fopen(name, "r");
335 if (output == NULL) {
340 while (fgets(line, sizeof line, output) != NULL) {
341 line[strlen(line) - 1] = '\0';
342 sl_add(whatis_lines, strdup(line));
345 if (common_output == NULL) {
346 snprintf(tmp_file, sizeof tmp_file, "%s.tmp", name);
349 output = fopen(name, "w");
350 if (output == NULL) {
359 linesort(const void *a, const void *b)
361 return strcmp((*(const char * const *)a), (*(const char * const *)b));
365 * Writes the unique sorted lines to the output file.
368 finish_output(FILE *output, char *name)
373 qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof(char *), linesort);
374 for (i = 0; i < whatis_lines->sl_cur; i++) {
375 char *line = whatis_lines->sl_str[i];
376 if (i > 0 && strcmp(line, prev) == 0)
383 sl_free(whatis_lines, 1);
384 if (common_output == NULL) {
385 rename(tmp_file, name);
391 open_whatis(char *mandir)
393 char filename[MAXPATHLEN];
395 snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
396 return open_output(filename);
400 finish_whatis(FILE *output, char *mandir)
402 char filename[MAXPATHLEN];
404 snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
405 finish_output(output, filename);
409 * Tests to see if the given directory has already been visited.
412 already_visited(char *dir)
415 struct visited_dir *visit;
417 if (stat(dir, &st) < 0) {
422 SLIST_FOREACH(visit, &visited_dirs, next) {
423 if (visit->inode == st.st_ino &&
424 visit->device == st.st_dev) {
425 warnx("already visited %s", dir);
429 visit = (struct visited_dir *) malloc(sizeof(struct visited_dir));
430 visit->device = st.st_dev;
431 visit->inode = st.st_ino;
432 SLIST_INSERT_HEAD(&visited_dirs, visit, next);
437 * Removes trailing spaces from a string, returning a pointer to just
438 * beyond the new last character.
443 char *rhs = &str[strlen(str)];
444 while (--rhs > str && isspace(*rhs))
451 * Returns a pointer to the next non-space character in the string.
456 while (*s != '\0' && isspace(*s))
462 * Returns whether the string contains only digits.
465 only_digits(char *line)
467 if (!isdigit(*line++))
469 while (isdigit(*line))
471 return *line == '\0';
475 * Returns whether the line is of one of the forms:
479 * assuming that section_start is ".Sh".
482 name_section_line(char *line, const char *section_start)
487 if (strncmp(line, section_start, 3) != 0)
489 line = skip_spaces(line + 3);
490 rhs = trim_rhs(line);
496 for (title = name_section_titles; *title != NULL; title++)
497 if (strcmp(*title, line) == 0)
503 * Copies characters while removing the most common nroff/troff
505 * \(em, \(mi, \s[+-N], \&
506 * \fF, \f(fo, \f[font]
507 * \*s, \*(st, \*[stringvar]
510 de_nroff_copy(char *from, char *to, int fromlen)
512 char *from_end = &from[fromlen];
513 while (from < from_end) {
518 if (strncmp(&from[1], "em", 2) == 0 ||
519 strncmp(&from[1], "mi", 2) == 0) {
527 while (isdigit(*from))
534 else if (*from == '[') {
535 while (*++from != ']' && from < from_end);
552 * Appends a string with the nroff formatting removed.
555 add_nroff(char *text)
557 sbuf_append_edited(whatis_proto, text, de_nroff_copy);
561 * Appends "name(suffix), " to whatis_final.
564 add_whatis_name(char *name, char *suffix)
567 sbuf_append_str(whatis_final, name);
568 sbuf_append(whatis_final, "(", 1);
569 sbuf_append_str(whatis_final, suffix);
570 sbuf_append(whatis_final, "), ", 3);
575 * Processes an old-style man(7) line. This ignores commands with only
576 * a single number argument.
579 process_man_line(char *line)
582 while (isalpha(*++line))
584 line = skip_spaces(line);
585 if (only_digits(line))
588 line = skip_spaces(line);
591 sbuf_append(whatis_proto, " ", 1);
596 * Processes a new-style mdoc(7) line.
599 process_mdoc_line(char *line)
603 char *line_end = &line[strlen(line)];
604 int orig_length = sbuf_length(whatis_proto);
609 if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) {
610 add_nroff(skip_spaces(line));
611 sbuf_append(whatis_proto, " ", 1);
614 xref = strncmp(line, ".Xr", 3) == 0;
616 while ((line = skip_spaces(line)) < line_end) {
620 next = strchr(next, '"');
623 memmove(next, next + 1, strlen(next));
630 next = strpbrk(line, " \t");
635 if (isupper(*line) && islower(line[1]) && line[2] == '\0') {
636 if (strcmp(line, "Ns") == 0) {
641 if (strstr(mdoc_commands, line) != NULL) {
646 if (arg > 0 && strchr(",.:;?!)]", *line) == 0) {
648 sbuf_append(whatis_proto, "(", 1);
650 sbuf_append(whatis_proto, ")", 1);
654 sbuf_append(whatis_proto, " ", 1);
660 if (sbuf_length(whatis_proto) > orig_length)
661 sbuf_append(whatis_proto, " ", 1);
665 * Collects a list of comma-separated names from the text.
668 collect_names(StringList *names, char *text)
674 text = strchr(text, ',');
685 enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC };
688 * Processes a man page source into a single whatis line and adds it
692 process_page(struct page_info *page, char *section_dir)
699 int state = STATE_UNKNOWN;
702 sbuf_clear(whatis_proto);
703 if ((in = gzopen(page->filename, "r")) == NULL) {
704 warn("%s", page->filename);
708 while (gzgets(in, buffer, sizeof buffer) != NULL) {
710 if (strncmp(line, ".\\\"", 3) == 0) /* ignore comments */
714 * haven't reached the NAME section yet.
717 if (name_section_line(line, ".SH"))
718 state = STATE_MANSTYLE;
719 else if (name_section_line(line, ".Sh"))
720 state = STATE_MDOCNAME;
723 * Inside an old-style .SH NAME section.
726 if (strncmp(line, ".SH", 3) == 0)
728 if (strncmp(line, ".SS", 3) == 0)
731 if (strcmp(line, ".") == 0)
733 if (strncmp(line, ".IX", 3) == 0) {
735 line = skip_spaces(line);
737 process_man_line(line);
740 * Inside a new-style .Sh NAME section (the .Nm part).
744 if (strncmp(line, ".Nm", 3) == 0) {
745 process_mdoc_line(line);
748 if (strcmp(line, ".") == 0)
750 sbuf_append(whatis_proto, "- ", 2);
751 state = STATE_MDOCDESC;
755 * Inside a new-style .Sh NAME section (after the .Nm-s).
758 if (strncmp(line, ".Sh", 3) == 0)
761 if (strcmp(line, ".") == 0)
763 process_mdoc_line(line);
769 sbuf_strip(whatis_proto, " \t.-");
770 line = sbuf_content(whatis_proto);
772 * line now contains the appropriate data, but without
773 * the proper indentation or the section appended to each name.
775 descr = strstr(line, " - ");
777 descr = strchr(line, ' ');
780 fprintf(stderr, " ignoring junk description \"%s\"\n", line);
789 collect_names(names, line);
790 sbuf_clear(whatis_final);
791 if (!sl_find(names, page->name) && no_page_exists(section_dir, names, page->suffix)) {
793 * Add the page name since that's the only thing that
796 add_whatis_name(page->name, page->suffix);
798 for (i = 0; i < names->sl_cur; i++)
799 add_whatis_name(names->sl_str[i], page->suffix);
801 sbuf_retract(whatis_final, 2); /* remove last ", " */
802 while (sbuf_length(whatis_final) < indent)
803 sbuf_append(whatis_final, " ", 1);
804 sbuf_append(whatis_final, " - ", 3);
805 sbuf_append_str(whatis_final, skip_spaces(descr));
806 sl_add(whatis_lines, strdup(sbuf_content(whatis_final)));
810 * Sorts pages first by inode number, then by name.
813 pagesort(const void *a, const void *b)
815 const struct page_info *p1 = *(struct page_info * const *) a;
816 const struct page_info *p2 = *(struct page_info * const *) b;
817 if (p1->inode == p2->inode)
818 return strcmp(p1->name, p2->name);
819 return p1->inode - p2->inode;
823 * Processes a single man section.
826 process_section(char *section_dir)
828 struct dirent **entries;
830 struct page_info **pages;
833 ino_t prev_inode = 0;
836 fprintf(stderr, " %s\n", section_dir);
839 * scan the man section directory for pages
841 nentries = scandir(section_dir, &entries, NULL, alphasort);
843 warn("%s", section_dir);
848 * collect information about man pages
850 pages = (struct page_info **) calloc(nentries, sizeof(struct page_info *));
851 for (i = 0; i < nentries; i++) {
852 struct page_info *info = new_page_info(section_dir, entries[i]);
854 pages[npages++] = info;
858 qsort(pages, npages, sizeof(struct page_info *), pagesort);
860 * process each unique page
862 for (i = 0; i < npages; i++) {
863 struct page_info *page = pages[i];
864 if (page->inode != prev_inode) {
865 prev_inode = page->inode;
867 fprintf(stderr, " reading %s\n", page->filename);
868 process_page(page, section_dir);
870 fprintf(stderr, " skipping %s, duplicate\n", page->filename);
871 free_page_info(page);
877 * Returns whether the directory entry is a man page section.
880 select_sections(struct dirent *entry)
882 char *p = &entry->d_name[3];
884 if (strncmp(entry->d_name, "man", 3) != 0)
894 * Processes a single top-level man directory by finding all the
895 * sub-directories named man* and processing each one in turn.
898 process_mandir(char *dir_name)
900 struct dirent **entries;
906 if (already_visited(dir_name))
909 fprintf(stderr, "man directory %s\n", dir_name);
910 nsections = scandir(dir_name, &entries, select_sections, alphasort);
912 warn("%s", dir_name);
916 if (common_output == NULL && (fp = open_whatis(dir_name)) == NULL)
918 for (i = 0; i < nsections; i++) {
919 char section_dir[MAXPATHLEN];
920 snprintf(section_dir, sizeof section_dir, "%s/%s", dir_name, entries[i]->d_name);
921 process_section(section_dir);
922 snprintf(section_dir, sizeof section_dir, "%s/%s/%s", dir_name,
923 entries[i]->d_name, machine);
924 if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode))
925 process_section(section_dir);
929 if (common_output == NULL)
930 finish_whatis(fp, dir_name);
934 * Processes one argument, which may be a colon-separated list of
938 process_argument(const char *arg)
946 err(1, "out of memory");
947 while ((dir = strsep(&parg, ":")) != NULL) {
948 if (locale != NULL) {
949 asprintf(&mandir, "%s/%s", dir, locale);
950 process_mandir(mandir);
952 if (lang_locale != NULL) {
953 asprintf(&mandir, "%s/%s", dir, lang_locale);
954 process_mandir(mandir);
966 main(int argc, char **argv)
971 while ((opt = getopt(argc, argv, "ai:n:o:vL")) != -1) {
977 indent = atoi(optarg);
980 whatis_name = optarg;
983 common_output = optarg;
989 locale = getenv("LC_ALL");
991 locale = getenv("LC_CTYPE");
993 locale = getenv("LANG");
994 if (locale != NULL) {
995 char *sep = strchr(locale, '_');
996 if (sep != NULL && isupper(sep[1]) &&
998 asprintf(&lang_locale, "%.*s%s", sep - locale, locale, &sep[3]);
1003 fprintf(stderr, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv[0]);
1008 signal(SIGINT, trap_signal);
1009 signal(SIGHUP, trap_signal);
1010 signal(SIGQUIT, trap_signal);
1011 signal(SIGTERM, trap_signal);
1012 SLIST_INIT(&visited_dirs);
1013 whatis_proto = new_sbuf();
1014 whatis_final = new_sbuf();
1016 if ((machine = getenv("MACHINE")) == NULL)
1019 if (common_output != NULL && (fp = open_output(common_output)) == NULL)
1020 err(1, "%s", common_output);
1021 if (optind == argc) {
1022 const char *manpath = getenv("MANPATH");
1023 if (manpath == NULL)
1024 manpath = DEFAULT_MANPATH;
1025 process_argument(manpath);
1027 while (optind < argc)
1028 process_argument(argv[optind++]);
1030 if (common_output != NULL)
1031 finish_output(fp, common_output);