2 * Copyright (c) 2002 John Rochester
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer,
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/queue.h>
36 #include <sys/utsname.h>
45 #include <stringlist.h>
49 #define DEFAULT_MANPATH "/usr/share/man"
50 #define LINE_ALLOC 4096
52 static char blank[] = "";
55 * Information collected about each man page in a section.
66 * An entry kept for each visited directory.
71 SLIST_ENTRY(visited_dir) next;
78 char * content; /* the start of the buffer */
79 char * end; /* just past the end of the content */
80 char * last; /* the last allocated character */
84 * Removes the last amount characters from the sbuf.
86 #define sbuf_retract(sbuf, amount) \
87 ((sbuf)->end -= (amount))
89 * Returns the length of the sbuf content.
91 #define sbuf_length(sbuf) \
92 ((sbuf)->end - (sbuf)->content)
94 typedef char *edited_copy(char *from, char *to, int length);
96 static int append; /* -a flag: append to existing whatis */
97 static int verbose; /* -v flag: be verbose with warnings */
98 static int indent = 24; /* -i option: description indentation */
99 static const char *whatis_name="whatis";/* -n option: the name */
100 static char *common_output; /* -o option: the single output file */
101 static char *locale; /* user's locale if -L is used */
102 static char *lang_locale; /* short form of locale */
103 static const char *machine, *machine_arch;
105 static int exit_code; /* exit code to use when finished */
106 static SLIST_HEAD(, visited_dir) visited_dirs =
107 SLIST_HEAD_INITIALIZER(visited_dirs);
110 * While the whatis line is being formed, it is stored in whatis_proto.
111 * When finished, it is reformatted into whatis_final and then appended
114 static struct sbuf *whatis_proto;
115 static struct sbuf *whatis_final;
116 static StringList *whatis_lines; /* collected output lines */
118 static char tmp_file[MAXPATHLEN]; /* path of temporary file, if any */
120 /* A set of possible names for the NAME man page section */
121 static const char *name_section_titles[] = {
122 "NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce",
123 "\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL
126 /* A subset of the mdoc(7) commands to ignore */
127 static char mdoc_commands[] = "ArDvErEvFlLiNmPa";
130 * Frees a struct page_info and its content.
133 free_page_info(struct page_info *info)
135 free(info->filename);
142 * Allocates and fills in a new struct page_info given the
143 * name of the man section directory and the dirent of the file.
144 * If the file is not a man page, returns NULL.
146 static struct page_info *
147 new_page_info(char *dir, struct dirent *dirent)
149 struct page_info *info;
154 info = (struct page_info *) malloc(sizeof(struct page_info));
157 basename_length = strlen(dirent->d_name);
158 suffix = &dirent->d_name[basename_length];
159 asprintf(&info->filename, "%s/%s", dir, dirent->d_name);
160 if ((info->gzipped = basename_length >= 4 && strcmp(&dirent->d_name[basename_length - 3], ".gz") == 0)) {
165 if (--suffix == dirent->d_name || !isalnum(*suffix)) {
169 warnx("%s: invalid man page name", info->filename);
170 free(info->filename);
176 info->name = strdup(dirent->d_name);
177 info->suffix = strdup(suffix);
178 if (stat(info->filename, &st) < 0) {
179 warn("%s", info->filename);
180 free_page_info(info);
183 if (!S_ISREG(st.st_mode)) {
184 if (verbose && !S_ISDIR(st.st_mode))
185 warnx("%s: not a regular file", info->filename);
186 free_page_info(info);
189 info->inode = st.st_ino;
194 * Reset an sbuf's length to 0.
197 sbuf_clear(struct sbuf *sbuf)
199 sbuf->end = sbuf->content;
203 * Allocate a new sbuf.
208 struct sbuf *sbuf = (struct sbuf *) malloc(sizeof(struct sbuf));
209 sbuf->content = (char *) malloc(LINE_ALLOC);
210 sbuf->last = sbuf->content + LINE_ALLOC - 1;
216 * Ensure that there is enough room in the sbuf for nchars more characters.
219 sbuf_need(struct sbuf *sbuf, int nchars)
222 size_t size, cntsize;
224 /* double the size of the allocation until the buffer is big enough */
225 while (sbuf->end + nchars > sbuf->last) {
226 size = sbuf->last + 1 - sbuf->content;
228 cntsize = sbuf->end - sbuf->content;
230 new_content = (char *)malloc(size);
231 memcpy(new_content, sbuf->content, cntsize);
233 sbuf->content = new_content;
234 sbuf->end = new_content + cntsize;
235 sbuf->last = new_content + size - 1;
240 * Appends a string of a given length to the sbuf.
243 sbuf_append(struct sbuf *sbuf, const char *text, int length)
246 sbuf_need(sbuf, length);
247 memcpy(sbuf->end, text, length);
253 * Appends a null-terminated string to the sbuf.
256 sbuf_append_str(struct sbuf *sbuf, char *text)
258 sbuf_append(sbuf, text, strlen(text));
262 * Appends an edited null-terminated string to the sbuf.
265 sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy)
267 int length = strlen(text);
269 sbuf_need(sbuf, length);
270 sbuf->end = copy(text, sbuf->end, length);
275 * Strips any of a set of chars from the end of the sbuf.
278 sbuf_strip(struct sbuf *sbuf, const char *set)
280 while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL)
285 * Returns the null-terminated string built by the sbuf.
288 sbuf_content(struct sbuf *sbuf)
291 return sbuf->content;
295 * Returns true if no man page exists in the directory with
296 * any of the names in the StringList.
299 no_page_exists(char *dir, StringList *names, char *suffix)
301 char path[MAXPATHLEN];
304 for (i = 0; i < names->sl_cur; i++) {
305 snprintf(path, sizeof path, "%s/%s.%s.gz", dir, names->sl_str[i], suffix);
306 if (access(path, F_OK) < 0) {
307 path[strlen(path) - 3] = '\0';
308 if (access(path, F_OK) < 0)
317 trap_signal(int sig __unused)
319 if (tmp_file[0] != '\0')
325 * Attempts to open an output file. Returns NULL if unsuccessful.
328 open_output(char *name)
332 whatis_lines = sl_init();
334 char line[LINE_ALLOC];
336 output = fopen(name, "r");
337 if (output == NULL) {
342 while (fgets(line, sizeof line, output) != NULL) {
343 line[strlen(line) - 1] = '\0';
344 sl_add(whatis_lines, strdup(line));
347 if (common_output == NULL) {
348 snprintf(tmp_file, sizeof tmp_file, "%s.tmp", name);
351 output = fopen(name, "w");
352 if (output == NULL) {
361 linesort(const void *a, const void *b)
363 return strcmp((*(const char * const *)a), (*(const char * const *)b));
367 * Writes the unique sorted lines to the output file.
370 finish_output(FILE *output, char *name)
375 qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof(char *), linesort);
376 for (i = 0; i < whatis_lines->sl_cur; i++) {
377 char *line = whatis_lines->sl_str[i];
378 if (i > 0 && strcmp(line, prev) == 0)
385 sl_free(whatis_lines, 1);
386 if (common_output == NULL) {
387 rename(tmp_file, name);
393 open_whatis(char *mandir)
395 char filename[MAXPATHLEN];
397 snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
398 return open_output(filename);
402 finish_whatis(FILE *output, char *mandir)
404 char filename[MAXPATHLEN];
406 snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
407 finish_output(output, filename);
411 * Tests to see if the given directory has already been visited.
414 already_visited(char *dir)
417 struct visited_dir *visit;
419 if (stat(dir, &st) < 0) {
424 SLIST_FOREACH(visit, &visited_dirs, next) {
425 if (visit->inode == st.st_ino &&
426 visit->device == st.st_dev) {
427 warnx("already visited %s", dir);
431 visit = (struct visited_dir *) malloc(sizeof(struct visited_dir));
432 visit->device = st.st_dev;
433 visit->inode = st.st_ino;
434 SLIST_INSERT_HEAD(&visited_dirs, visit, next);
439 * Removes trailing spaces from a string, returning a pointer to just
440 * beyond the new last character.
445 char *rhs = &str[strlen(str)];
446 while (--rhs > str && isspace(*rhs))
453 * Returns a pointer to the next non-space character in the string.
458 while (*s != '\0' && isspace(*s))
464 * Returns whether the string contains only digits.
467 only_digits(char *line)
469 if (!isdigit(*line++))
471 while (isdigit(*line))
473 return *line == '\0';
477 * Returns whether the line is of one of the forms:
481 * assuming that section_start is ".Sh".
484 name_section_line(char *line, const char *section_start)
489 if (strncmp(line, section_start, 3) != 0)
491 line = skip_spaces(line + 3);
492 rhs = trim_rhs(line);
498 for (title = name_section_titles; *title != NULL; title++)
499 if (strcmp(*title, line) == 0)
505 * Copies characters while removing the most common nroff/troff
507 * \(em, \(mi, \s[+-N], \&
508 * \fF, \f(fo, \f[font]
509 * \*s, \*(st, \*[stringvar]
512 de_nroff_copy(char *from, char *to, int fromlen)
514 char *from_end = &from[fromlen];
515 while (from < from_end) {
520 if (strncmp(&from[1], "em", 2) == 0 ||
521 strncmp(&from[1], "mi", 2) == 0) {
529 while (isdigit(*from))
536 else if (*from == '[') {
537 while (*++from != ']' && from < from_end);
554 * Appends a string with the nroff formatting removed.
557 add_nroff(char *text)
559 sbuf_append_edited(whatis_proto, text, de_nroff_copy);
563 * Appends "name(suffix), " to whatis_final.
566 add_whatis_name(char *name, char *suffix)
569 sbuf_append_str(whatis_final, name);
570 sbuf_append(whatis_final, "(", 1);
571 sbuf_append_str(whatis_final, suffix);
572 sbuf_append(whatis_final, "), ", 3);
577 * Processes an old-style man(7) line. This ignores commands with only
578 * a single number argument.
581 process_man_line(char *line)
584 while (isalpha(*++line))
586 line = skip_spaces(line);
587 if (only_digits(line))
590 line = skip_spaces(line);
593 sbuf_append(whatis_proto, " ", 1);
598 * Processes a new-style mdoc(7) line.
601 process_mdoc_line(char *line)
605 char *line_end = &line[strlen(line)];
606 int orig_length = sbuf_length(whatis_proto);
611 if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) {
612 add_nroff(skip_spaces(line));
613 sbuf_append(whatis_proto, " ", 1);
616 xref = strncmp(line, ".Xr", 3) == 0;
618 while ((line = skip_spaces(line)) < line_end) {
622 next = strchr(next, '"');
625 memmove(next, next + 1, strlen(next));
632 next = strpbrk(line, " \t");
637 if (isupper(*line) && islower(line[1]) && line[2] == '\0') {
638 if (strcmp(line, "Ns") == 0) {
643 if (strstr(mdoc_commands, line) != NULL) {
648 if (arg > 0 && strchr(",.:;?!)]", *line) == 0) {
650 sbuf_append(whatis_proto, "(", 1);
652 sbuf_append(whatis_proto, ")", 1);
656 sbuf_append(whatis_proto, " ", 1);
662 if (sbuf_length(whatis_proto) > orig_length)
663 sbuf_append(whatis_proto, " ", 1);
667 * Collects a list of comma-separated names from the text.
670 collect_names(StringList *names, char *text)
676 text = strchr(text, ',');
687 enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC };
690 * Processes a man page source into a single whatis line and adds it
694 process_page(struct page_info *page, char *section_dir)
701 int state = STATE_UNKNOWN;
704 sbuf_clear(whatis_proto);
705 if ((in = gzopen(page->filename, "r")) == NULL) {
706 warn("%s", page->filename);
710 while (gzgets(in, buffer, sizeof buffer) != NULL) {
712 if (strncmp(line, ".\\\"", 3) == 0) /* ignore comments */
716 * haven't reached the NAME section yet.
719 if (name_section_line(line, ".SH"))
720 state = STATE_MANSTYLE;
721 else if (name_section_line(line, ".Sh"))
722 state = STATE_MDOCNAME;
725 * Inside an old-style .SH NAME section.
728 if (strncmp(line, ".SH", 3) == 0)
730 if (strncmp(line, ".SS", 3) == 0)
733 if (strcmp(line, ".") == 0)
735 if (strncmp(line, ".IX", 3) == 0) {
737 line = skip_spaces(line);
739 process_man_line(line);
742 * Inside a new-style .Sh NAME section (the .Nm part).
746 if (strncmp(line, ".Nm", 3) == 0) {
747 process_mdoc_line(line);
750 if (strcmp(line, ".") == 0)
752 sbuf_append(whatis_proto, "- ", 2);
753 state = STATE_MDOCDESC;
757 * Inside a new-style .Sh NAME section (after the .Nm-s).
760 if (strncmp(line, ".Sh", 3) == 0)
763 if (strcmp(line, ".") == 0)
765 process_mdoc_line(line);
771 sbuf_strip(whatis_proto, " \t.-");
772 line = sbuf_content(whatis_proto);
774 * line now contains the appropriate data, but without
775 * the proper indentation or the section appended to each name.
777 descr = strstr(line, " - ");
779 descr = strchr(line, ' ');
782 fprintf(stderr, " ignoring junk description \"%s\"\n", line);
791 collect_names(names, line);
792 sbuf_clear(whatis_final);
793 if (!sl_find(names, page->name) && no_page_exists(section_dir, names, page->suffix)) {
795 * Add the page name since that's the only thing that
798 add_whatis_name(page->name, page->suffix);
800 for (i = 0; i < names->sl_cur; i++)
801 add_whatis_name(names->sl_str[i], page->suffix);
803 sbuf_retract(whatis_final, 2); /* remove last ", " */
804 while (sbuf_length(whatis_final) < indent)
805 sbuf_append(whatis_final, " ", 1);
806 sbuf_append(whatis_final, " - ", 3);
807 sbuf_append_str(whatis_final, skip_spaces(descr));
808 sl_add(whatis_lines, strdup(sbuf_content(whatis_final)));
812 * Sorts pages first by inode number, then by name.
815 pagesort(const void *a, const void *b)
817 const struct page_info *p1 = *(struct page_info * const *) a;
818 const struct page_info *p2 = *(struct page_info * const *) b;
819 if (p1->inode == p2->inode)
820 return strcmp(p1->name, p2->name);
821 return p1->inode - p2->inode;
825 * Processes a single man section.
828 process_section(char *section_dir)
830 struct dirent **entries;
832 struct page_info **pages;
835 ino_t prev_inode = 0;
838 fprintf(stderr, " %s\n", section_dir);
841 * scan the man section directory for pages
843 nentries = scandir(section_dir, &entries, NULL, alphasort);
845 warn("%s", section_dir);
850 * collect information about man pages
852 pages = (struct page_info **) calloc(nentries, sizeof(struct page_info *));
853 for (i = 0; i < nentries; i++) {
854 struct page_info *info = new_page_info(section_dir, entries[i]);
856 pages[npages++] = info;
860 qsort(pages, npages, sizeof(struct page_info *), pagesort);
862 * process each unique page
864 for (i = 0; i < npages; i++) {
865 struct page_info *page = pages[i];
866 if (page->inode != prev_inode) {
867 prev_inode = page->inode;
869 fprintf(stderr, " reading %s\n", page->filename);
870 process_page(page, section_dir);
872 fprintf(stderr, " skipping %s, duplicate\n", page->filename);
873 free_page_info(page);
879 * Returns whether the directory entry is a man page section.
882 select_sections(const struct dirent *entry)
884 const char *p = &entry->d_name[3];
886 if (strncmp(entry->d_name, "man", 3) != 0)
896 * Processes a single top-level man directory by finding all the
897 * sub-directories named man* and processing each one in turn.
900 process_mandir(char *dir_name)
902 struct dirent **entries;
908 if (already_visited(dir_name))
911 fprintf(stderr, "man directory %s\n", dir_name);
912 nsections = scandir(dir_name, &entries, select_sections, alphasort);
914 warn("%s", dir_name);
918 if (common_output == NULL && (fp = open_whatis(dir_name)) == NULL)
920 for (i = 0; i < nsections; i++) {
921 char section_dir[MAXPATHLEN];
922 snprintf(section_dir, sizeof section_dir, "%s/%s", dir_name, entries[i]->d_name);
923 process_section(section_dir);
924 snprintf(section_dir, sizeof section_dir, "%s/%s/%s", dir_name,
925 entries[i]->d_name, machine);
926 if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode))
927 process_section(section_dir);
928 if (strcmp(machine_arch, machine) != 0) {
929 snprintf(section_dir, sizeof section_dir, "%s/%s/%s",
930 dir_name, entries[i]->d_name, machine_arch);
931 if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode))
932 process_section(section_dir);
937 if (common_output == NULL)
938 finish_whatis(fp, dir_name);
942 * Processes one argument, which may be a colon-separated list of
946 process_argument(const char *arg)
954 err(1, "out of memory");
955 while ((dir = strsep(&parg, ":")) != NULL) {
956 if (locale != NULL) {
957 asprintf(&mandir, "%s/%s", dir, locale);
958 process_mandir(mandir);
960 if (lang_locale != NULL) {
961 asprintf(&mandir, "%s/%s", dir, lang_locale);
962 process_mandir(mandir);
974 main(int argc, char **argv)
979 while ((opt = getopt(argc, argv, "ai:n:o:vL")) != -1) {
985 indent = atoi(optarg);
988 whatis_name = optarg;
991 common_output = optarg;
997 locale = getenv("LC_ALL");
999 locale = getenv("LC_CTYPE");
1001 locale = getenv("LANG");
1002 if (locale != NULL) {
1003 char *sep = strchr(locale, '_');
1004 if (sep != NULL && isupper(sep[1]) &&
1006 asprintf(&lang_locale, "%.*s%s", (int)(ptrdiff_t)(sep - locale), locale, &sep[3]);
1011 fprintf(stderr, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv[0]);
1016 signal(SIGINT, trap_signal);
1017 signal(SIGHUP, trap_signal);
1018 signal(SIGQUIT, trap_signal);
1019 signal(SIGTERM, trap_signal);
1020 SLIST_INIT(&visited_dirs);
1021 whatis_proto = new_sbuf();
1022 whatis_final = new_sbuf();
1024 if ((machine = getenv("MACHINE")) == NULL) {
1025 static struct utsname utsname;
1027 if (uname(&utsname) == -1)
1029 machine = utsname.machine;
1032 if ((machine_arch = getenv("MACHINE_ARCH")) == NULL)
1033 machine_arch = MACHINE_ARCH;
1035 if (common_output != NULL && (fp = open_output(common_output)) == NULL)
1036 err(1, "%s", common_output);
1037 if (optind == argc) {
1038 const char *manpath = getenv("MANPATH");
1039 if (manpath == NULL)
1040 manpath = DEFAULT_MANPATH;
1041 process_argument(manpath);
1043 while (optind < argc)
1044 process_argument(argv[optind++]);
1046 if (common_output != NULL)
1047 finish_output(fp, common_output);