]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - usr.bin/makewhatis/makewhatis.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / usr.bin / makewhatis / makewhatis.c
1 /*-
2  * Copyright (c) 2002 John Rochester
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer,
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/param.h>
35 #include <sys/queue.h>
36 #include <sys/utsname.h>
37
38 #include <ctype.h>
39 #include <dirent.h>
40 #include <err.h>
41 #include <stddef.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <stringlist.h>
46 #include <unistd.h>
47 #include <zlib.h>
48
49 #define DEFAULT_MANPATH         "/usr/share/man"
50 #define LINE_ALLOC              4096
51
52 static char blank[] =           "";
53
54 /*
55  * Information collected about each man page in a section.
56  */
57 struct page_info {
58         char *  filename;
59         char *  name;
60         char *  suffix;
61         int     gzipped;
62         ino_t   inode;
63 };
64
65 /*
66  * An entry kept for each visited directory.
67  */
68 struct visited_dir {
69         dev_t           device;
70         ino_t           inode;
71         SLIST_ENTRY(visited_dir)        next;
72 };
73
74 /*
75  * an expanding string
76  */
77 struct sbuf {
78         char *  content;                /* the start of the buffer */
79         char *  end;                    /* just past the end of the content */
80         char *  last;                   /* the last allocated character */
81 };
82
83 /*
84  * Removes the last amount characters from the sbuf.
85  */
86 #define sbuf_retract(sbuf, amount)      \
87         ((sbuf)->end -= (amount))
88 /*
89  * Returns the length of the sbuf content.
90  */
91 #define sbuf_length(sbuf)               \
92         ((sbuf)->end - (sbuf)->content)
93
94 typedef char *edited_copy(char *from, char *to, int length);
95
96 static int append;                      /* -a flag: append to existing whatis */
97 static int verbose;                     /* -v flag: be verbose with warnings */
98 static int indent = 24;                 /* -i option: description indentation */
99 static const char *whatis_name="whatis";/* -n option: the name */
100 static char *common_output;             /* -o option: the single output file */
101 static char *locale;                    /* user's locale if -L is used */
102 static char *lang_locale;               /* short form of locale */
103 static const char *machine, *machine_arch;
104
105 static int exit_code;                   /* exit code to use when finished */
106 static SLIST_HEAD(, visited_dir) visited_dirs =
107     SLIST_HEAD_INITIALIZER(visited_dirs);
108
109 /*
110  * While the whatis line is being formed, it is stored in whatis_proto.
111  * When finished, it is reformatted into whatis_final and then appended
112  * to whatis_lines.
113  */
114 static struct sbuf *whatis_proto;
115 static struct sbuf *whatis_final;
116 static StringList *whatis_lines;        /* collected output lines */
117
118 static char tmp_file[MAXPATHLEN];       /* path of temporary file, if any */
119
120 /* A set of possible names for the NAME man page section */
121 static const char *name_section_titles[] = {
122         "NAME", "Name", "NAMN", "BEZEICHNUNG", "\xcc\xbe\xbe\xce",
123         "\xee\xe1\xfa\xf7\xe1\xee\xe9\xe5", NULL
124 };
125
126 /* A subset of the mdoc(7) commands to ignore */
127 static char mdoc_commands[] = "ArDvErEvFlLiNmPa";
128
129 /*
130  * Frees a struct page_info and its content.
131  */
132 static void
133 free_page_info(struct page_info *info)
134 {
135         free(info->filename);
136         free(info->name);
137         free(info->suffix);
138         free(info);
139 }
140
141 /*
142  * Allocates and fills in a new struct page_info given the
143  * name of the man section directory and the dirent of the file.
144  * If the file is not a man page, returns NULL.
145  */
146 static struct page_info *
147 new_page_info(char *dir, struct dirent *dirent)
148 {
149         struct page_info *info;
150         int basename_length;
151         char *suffix;
152         struct stat st;
153
154         info = (struct page_info *) malloc(sizeof(struct page_info));
155         if (info == NULL)
156                 err(1, "malloc");
157         basename_length = strlen(dirent->d_name);
158         suffix = &dirent->d_name[basename_length];
159         asprintf(&info->filename, "%s/%s", dir, dirent->d_name);
160         if ((info->gzipped = basename_length >= 4 && strcmp(&dirent->d_name[basename_length - 3], ".gz") == 0)) {
161                 suffix -= 3;
162                 *suffix = '\0';
163         }
164         for (;;) {
165                 if (--suffix == dirent->d_name || !isalnum(*suffix)) {
166                         if (*suffix == '.')
167                                 break;
168                         if (verbose)
169                                 warnx("%s: invalid man page name", info->filename);
170                         free(info->filename);
171                         free(info);
172                         return NULL;
173                 }
174         }
175         *suffix++ = '\0';
176         info->name = strdup(dirent->d_name);
177         info->suffix = strdup(suffix);
178         if (stat(info->filename, &st) < 0) {
179                 warn("%s", info->filename);
180                 free_page_info(info);
181                 return NULL;
182         }
183         if (!S_ISREG(st.st_mode)) {
184                 if (verbose && !S_ISDIR(st.st_mode))
185                         warnx("%s: not a regular file", info->filename);
186                 free_page_info(info);
187                 return NULL;
188         }
189         info->inode = st.st_ino;
190         return info;
191 }
192
193 /*
194  * Reset an sbuf's length to 0.
195  */
196 static void
197 sbuf_clear(struct sbuf *sbuf)
198 {
199         sbuf->end = sbuf->content;
200 }
201
202 /*
203  * Allocate a new sbuf.
204  */
205 static struct sbuf *
206 new_sbuf(void)
207 {
208         struct sbuf *sbuf = (struct sbuf *) malloc(sizeof(struct sbuf));
209         sbuf->content = (char *) malloc(LINE_ALLOC);
210         sbuf->last = sbuf->content + LINE_ALLOC - 1;
211         sbuf_clear(sbuf);
212         return sbuf;
213 }
214
215 /*
216  * Ensure that there is enough room in the sbuf for nchars more characters.
217  */
218 static void
219 sbuf_need(struct sbuf *sbuf, int nchars)
220 {
221         char *new_content;
222         size_t size, cntsize;
223
224         /* double the size of the allocation until the buffer is big enough */
225         while (sbuf->end + nchars > sbuf->last) {
226                 size = sbuf->last + 1 - sbuf->content;
227                 size *= 2;
228                 cntsize = sbuf->end - sbuf->content;
229
230                 new_content = (char *)malloc(size);
231                 memcpy(new_content, sbuf->content, cntsize);
232                 free(sbuf->content);
233                 sbuf->content = new_content;
234                 sbuf->end = new_content + cntsize;
235                 sbuf->last = new_content + size - 1;
236         }
237 }
238
239 /*
240  * Appends a string of a given length to the sbuf.
241  */
242 static void
243 sbuf_append(struct sbuf *sbuf, const char *text, int length)
244 {
245         if (length > 0) {
246                 sbuf_need(sbuf, length);
247                 memcpy(sbuf->end, text, length);
248                 sbuf->end += length;
249         }
250 }
251
252 /*
253  * Appends a null-terminated string to the sbuf.
254  */
255 static void
256 sbuf_append_str(struct sbuf *sbuf, char *text)
257 {
258         sbuf_append(sbuf, text, strlen(text));
259 }
260
261 /*
262  * Appends an edited null-terminated string to the sbuf.
263  */
264 static void
265 sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy)
266 {
267         int length = strlen(text);
268         if (length > 0) {
269                 sbuf_need(sbuf, length);
270                 sbuf->end = copy(text, sbuf->end, length);
271         }
272 }
273
274 /*
275  * Strips any of a set of chars from the end of the sbuf.
276  */
277 static void
278 sbuf_strip(struct sbuf *sbuf, const char *set)
279 {
280         while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL)
281                 sbuf->end--;
282 }
283
284 /*
285  * Returns the null-terminated string built by the sbuf.
286  */
287 static char *
288 sbuf_content(struct sbuf *sbuf)
289 {
290         *sbuf->end = '\0';
291         return sbuf->content;
292 }
293
294 /*
295  * Returns true if no man page exists in the directory with
296  * any of the names in the StringList.
297  */
298 static int
299 no_page_exists(char *dir, StringList *names, char *suffix)
300 {
301         char path[MAXPATHLEN];
302         size_t i;
303
304         for (i = 0; i < names->sl_cur; i++) {
305                 snprintf(path, sizeof path, "%s/%s.%s.gz", dir, names->sl_str[i], suffix);
306                 if (access(path, F_OK) < 0) {
307                         path[strlen(path) - 3] = '\0';
308                         if (access(path, F_OK) < 0)
309                                 continue;
310                 }
311                 return 0;
312         }
313         return 1;
314 }
315
316 static void
317 trap_signal(int sig __unused)
318 {
319         if (tmp_file[0] != '\0')
320                 unlink(tmp_file);
321         exit(1);
322 }
323
324 /*
325  * Attempts to open an output file.  Returns NULL if unsuccessful.
326  */
327 static FILE *
328 open_output(char *name)
329 {
330         FILE *output;
331
332         whatis_lines = sl_init();
333         if (append) {
334                 char line[LINE_ALLOC];
335
336                 output = fopen(name, "r");
337                 if (output == NULL) {
338                         warn("%s", name);
339                         exit_code = 1;
340                         return NULL;
341                 }
342                 while (fgets(line, sizeof line, output) != NULL) {
343                         line[strlen(line) - 1] = '\0';
344                         sl_add(whatis_lines, strdup(line));
345                 }
346         }
347         if (common_output == NULL) {
348                 snprintf(tmp_file, sizeof tmp_file, "%s.tmp", name);
349                 name = tmp_file;
350         }
351         output = fopen(name, "w");
352         if (output == NULL) {
353                 warn("%s", name);
354                 exit_code = 1;
355                 return NULL;
356         }
357         return output;
358 }
359
360 static int
361 linesort(const void *a, const void *b)
362 {
363         return strcmp((*(const char * const *)a), (*(const char * const *)b));
364 }
365
366 /*
367  * Writes the unique sorted lines to the output file.
368  */
369 static void
370 finish_output(FILE *output, char *name)
371 {
372         size_t i;
373         char *prev = NULL;
374
375         qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof(char *), linesort);
376         for (i = 0; i < whatis_lines->sl_cur; i++) {
377                 char *line = whatis_lines->sl_str[i];
378                 if (i > 0 && strcmp(line, prev) == 0)
379                         continue;
380                 prev = line;
381                 fputs(line, output);
382                 putc('\n', output);
383         }
384         fclose(output);
385         sl_free(whatis_lines, 1);
386         if (common_output == NULL) {
387                 rename(tmp_file, name);
388                 unlink(tmp_file);
389         }
390 }
391
392 static FILE *
393 open_whatis(char *mandir)
394 {
395         char filename[MAXPATHLEN];
396
397         snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
398         return open_output(filename);
399 }
400
401 static void
402 finish_whatis(FILE *output, char *mandir)
403 {
404         char filename[MAXPATHLEN];
405
406         snprintf(filename, sizeof filename, "%s/%s", mandir, whatis_name);
407         finish_output(output, filename);
408 }
409
410 /*
411  * Tests to see if the given directory has already been visited.
412  */
413 static int
414 already_visited(char *dir)
415 {
416         struct stat st;
417         struct visited_dir *visit;
418
419         if (stat(dir, &st) < 0) {
420                 warn("%s", dir);
421                 exit_code = 1;
422                 return 1;
423         }
424         SLIST_FOREACH(visit, &visited_dirs, next) {
425                 if (visit->inode == st.st_ino &&
426                     visit->device == st.st_dev) {
427                         warnx("already visited %s", dir);
428                         return 1;
429                 }
430         }
431         visit = (struct visited_dir *) malloc(sizeof(struct visited_dir));
432         visit->device = st.st_dev;
433         visit->inode = st.st_ino;
434         SLIST_INSERT_HEAD(&visited_dirs, visit, next);
435         return 0;
436 }
437
438 /*
439  * Removes trailing spaces from a string, returning a pointer to just
440  * beyond the new last character.
441  */
442 static char *
443 trim_rhs(char *str)
444 {
445         char *rhs = &str[strlen(str)];
446         while (--rhs > str && isspace(*rhs))
447                 ;
448         *++rhs = '\0';
449         return rhs;
450 }
451
452 /*
453  * Returns a pointer to the next non-space character in the string.
454  */
455 static char *
456 skip_spaces(char *s)
457 {
458         while (*s != '\0' && isspace(*s))
459                 s++;
460         return s;
461 }
462
463 /*
464  * Returns whether the string contains only digits.
465  */
466 static int
467 only_digits(char *line)
468 {
469         if (!isdigit(*line++))
470                 return 0;
471         while (isdigit(*line))
472                 line++;
473         return *line == '\0';
474 }
475
476 /*
477  * Returns whether the line is of one of the forms:
478  *      .Sh NAME
479  *      .Sh "NAME"
480  *      etc.
481  * assuming that section_start is ".Sh".
482  */
483 static int
484 name_section_line(char *line, const char *section_start)
485 {
486         char *rhs;
487         const char **title;
488
489         if (strncmp(line, section_start, 3) != 0)
490                 return 0;
491         line = skip_spaces(line + 3);
492         rhs = trim_rhs(line);
493         if (*line == '"') {
494                 line++;
495                 if (*--rhs == '"')
496                         *rhs = '\0';
497         }
498         for (title = name_section_titles; *title != NULL; title++)
499                 if (strcmp(*title, line) == 0)
500                         return 1;
501         return 0;
502 }
503
504 /*
505  * Copies characters while removing the most common nroff/troff
506  * markup:
507  *      \(em, \(mi, \s[+-N], \&
508  *      \fF, \f(fo, \f[font]
509  *      \*s, \*(st, \*[stringvar]
510  */
511 static char *
512 de_nroff_copy(char *from, char *to, int fromlen)
513 {
514         char *from_end = &from[fromlen];
515         while (from < from_end) {
516                 switch (*from) {
517                 case '\\':
518                         switch (*++from) {
519                         case '(':
520                                 if (strncmp(&from[1], "em", 2) == 0 ||
521                                                 strncmp(&from[1], "mi", 2) == 0) {
522                                         from += 3;
523                                         continue;
524                                 }
525                                 break;
526                         case 's':
527                                 if (*++from == '-')
528                                         from++;
529                                 while (isdigit(*from))
530                                         from++;
531                                 continue;
532                         case 'f':
533                         case '*':
534                                 if (*++from == '(')
535                                         from += 3;
536                                 else if (*from == '[') {
537                                         while (*++from != ']' && from < from_end);
538                                         from++;
539                                 } else
540                                         from++;
541                                 continue;
542                         case '&':
543                                 from++;
544                                 continue;
545                         }
546                         break;
547                 }
548                 *to++ = *from++;
549         }
550         return to;
551 }
552
553 /*
554  * Appends a string with the nroff formatting removed.
555  */
556 static void
557 add_nroff(char *text)
558 {
559         sbuf_append_edited(whatis_proto, text, de_nroff_copy);
560 }
561
562 /*
563  * Appends "name(suffix), " to whatis_final.
564  */
565 static void
566 add_whatis_name(char *name, char *suffix)
567 {
568         if (*name != '\0') {
569                 sbuf_append_str(whatis_final, name);
570                 sbuf_append(whatis_final, "(", 1);
571                 sbuf_append_str(whatis_final, suffix);
572                 sbuf_append(whatis_final, "), ", 3);
573         }
574 }
575
576 /*
577  * Processes an old-style man(7) line.  This ignores commands with only
578  * a single number argument.
579  */
580 static void
581 process_man_line(char *line)
582 {
583         if (*line == '.') {
584                 while (isalpha(*++line))
585                         ;
586                 line = skip_spaces(line);
587                 if (only_digits(line))
588                         return;
589         } else
590                 line = skip_spaces(line);
591         if (*line != '\0') {
592                 add_nroff(line);
593                 sbuf_append(whatis_proto, " ", 1);
594         }
595 }
596
597 /*
598  * Processes a new-style mdoc(7) line.
599  */
600 static void
601 process_mdoc_line(char *line)
602 {
603         int xref;
604         int arg = 0;
605         char *line_end = &line[strlen(line)];
606         int orig_length = sbuf_length(whatis_proto);
607         char *next;
608
609         if (*line == '\0')
610                 return;
611         if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) {
612                 add_nroff(skip_spaces(line));
613                 sbuf_append(whatis_proto, " ", 1);
614                 return;
615         }
616         xref = strncmp(line, ".Xr", 3) == 0;
617         line += 3;
618         while ((line = skip_spaces(line)) < line_end) {
619                 if (*line == '"') {
620                         next = ++line;
621                         for (;;) {
622                                 next = strchr(next, '"');
623                                 if (next == NULL)
624                                         break;
625                                 memmove(next, next + 1, strlen(next));
626                                 line_end--;
627                                 if (*next != '"')
628                                         break;
629                                 next++;
630                         }
631                 } else
632                         next = strpbrk(line, " \t");
633                 if (next != NULL)
634                         *next++ = '\0';
635                 else
636                         next = line_end;
637                 if (isupper(*line) && islower(line[1]) && line[2] == '\0') {
638                         if (strcmp(line, "Ns") == 0) {
639                                 arg = 0;
640                                 line = next;
641                                 continue;
642                         }
643                         if (strstr(mdoc_commands, line) != NULL) {
644                                 line = next;
645                                 continue;
646                         }
647                 }
648                 if (arg > 0 && strchr(",.:;?!)]", *line) == 0) {
649                         if (xref) {
650                                 sbuf_append(whatis_proto, "(", 1);
651                                 add_nroff(line);
652                                 sbuf_append(whatis_proto, ")", 1);
653                                 xref = 0;
654                                 line = blank;
655                         } else
656                                 sbuf_append(whatis_proto, " ", 1);
657                 }
658                 add_nroff(line);
659                 arg++;
660                 line = next;
661         }
662         if (sbuf_length(whatis_proto) > orig_length)
663                 sbuf_append(whatis_proto, " ", 1);
664 }
665
666 /*
667  * Collects a list of comma-separated names from the text.
668  */
669 static void
670 collect_names(StringList *names, char *text)
671 {
672         char *arg;
673
674         for (;;) {
675                 arg = text;
676                 text = strchr(text, ',');
677                 if (text != NULL)
678                         *text++ = '\0';
679                 sl_add(names, arg);
680                 if (text == NULL)
681                         return;
682                 if (*text == ' ')
683                         text++;
684         }
685 }
686
687 enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC };
688
689 /*
690  * Processes a man page source into a single whatis line and adds it
691  * to whatis_lines.
692  */
693 static void
694 process_page(struct page_info *page, char *section_dir)
695 {
696         gzFile in;
697         char buffer[4096];
698         char *line;
699         StringList *names;
700         char *descr;
701         int state = STATE_UNKNOWN;
702         size_t i;
703
704         sbuf_clear(whatis_proto);
705         if ((in = gzopen(page->filename, "r")) == NULL) {
706                 warn("%s", page->filename);
707                 exit_code = 1;
708                 return;
709         }
710         while (gzgets(in, buffer, sizeof buffer) != NULL) {
711                 line = buffer;
712                 if (strncmp(line, ".\\\"", 3) == 0)             /* ignore comments */
713                         continue;
714                 switch (state) {
715                 /*
716                  * haven't reached the NAME section yet.
717                  */
718                 case STATE_UNKNOWN:
719                         if (name_section_line(line, ".SH"))
720                                 state = STATE_MANSTYLE;
721                         else if (name_section_line(line, ".Sh"))
722                                 state = STATE_MDOCNAME;
723                         continue;
724                 /*
725                  * Inside an old-style .SH NAME section.
726                  */
727                 case STATE_MANSTYLE:
728                         if (strncmp(line, ".SH", 3) == 0)
729                                 break;
730                         if (strncmp(line, ".SS", 3) == 0)
731                                 break;
732                         trim_rhs(line);
733                         if (strcmp(line, ".") == 0)
734                                 continue;
735                         if (strncmp(line, ".IX", 3) == 0) {
736                                 line += 3;
737                                 line = skip_spaces(line);
738                         }
739                         process_man_line(line);
740                         continue;
741                 /*
742                  * Inside a new-style .Sh NAME section (the .Nm part).
743                  */
744                 case STATE_MDOCNAME:
745                         trim_rhs(line);
746                         if (strncmp(line, ".Nm", 3) == 0) {
747                                 process_mdoc_line(line);
748                                 continue;
749                         } else {
750                                 if (strcmp(line, ".") == 0)
751                                         continue;
752                                 sbuf_append(whatis_proto, "- ", 2);
753                                 state = STATE_MDOCDESC;
754                         }
755                         /* fall through */
756                 /*
757                  * Inside a new-style .Sh NAME section (after the .Nm-s).
758                  */
759                 case STATE_MDOCDESC:
760                         if (strncmp(line, ".Sh", 3) == 0)
761                                 break;
762                         trim_rhs(line);
763                         if (strcmp(line, ".") == 0)
764                                 continue;
765                         process_mdoc_line(line);
766                         continue;
767                 }
768                 break;
769         }
770         gzclose(in);
771         sbuf_strip(whatis_proto, " \t.-");
772         line = sbuf_content(whatis_proto);
773         /*
774          * line now contains the appropriate data, but without
775          * the proper indentation or the section appended to each name.
776          */
777         descr = strstr(line, " - ");
778         if (descr == NULL) {
779                 descr = strchr(line, ' ');
780                 if (descr == NULL) {
781                         if (verbose)
782                                 fprintf(stderr, "       ignoring junk description \"%s\"\n", line);
783                         return;
784                 }
785                 *descr++ = '\0';
786         } else {
787                 *descr = '\0';
788                 descr += 3;
789         }
790         names = sl_init();
791         collect_names(names, line);
792         sbuf_clear(whatis_final);
793         if (!sl_find(names, page->name) && no_page_exists(section_dir, names, page->suffix)) {
794                 /*
795                  * Add the page name since that's the only thing that
796                  * man(1) will find.
797                  */
798                 add_whatis_name(page->name, page->suffix);
799         }
800         for (i = 0; i < names->sl_cur; i++)
801                 add_whatis_name(names->sl_str[i], page->suffix);
802         sl_free(names, 0);
803         sbuf_retract(whatis_final, 2);          /* remove last ", " */
804         while (sbuf_length(whatis_final) < indent)
805                 sbuf_append(whatis_final, " ", 1);
806         sbuf_append(whatis_final, " - ", 3);
807         sbuf_append_str(whatis_final, skip_spaces(descr));
808         sl_add(whatis_lines, strdup(sbuf_content(whatis_final)));
809 }
810
811 /*
812  * Sorts pages first by inode number, then by name.
813  */
814 static int
815 pagesort(const void *a, const void *b)
816 {
817         const struct page_info *p1 = *(struct page_info * const *) a;
818         const struct page_info *p2 = *(struct page_info * const *) b;
819         if (p1->inode == p2->inode)
820                 return strcmp(p1->name, p2->name);
821         return p1->inode - p2->inode;
822 }
823
824 /*
825  * Processes a single man section.
826  */
827 static void
828 process_section(char *section_dir)
829 {
830         struct dirent **entries;
831         int nentries;
832         struct page_info **pages;
833         int npages = 0;
834         int i;
835         ino_t prev_inode = 0;
836
837         if (verbose)
838                 fprintf(stderr, "  %s\n", section_dir);
839
840         /*
841          * scan the man section directory for pages
842          */
843         nentries = scandir(section_dir, &entries, NULL, alphasort);
844         if (nentries < 0) {
845                 warn("%s", section_dir);
846                 exit_code = 1;
847                 return;
848         }
849         /*
850          * collect information about man pages
851          */
852         pages = (struct page_info **) calloc(nentries, sizeof(struct page_info *));
853         for (i = 0; i < nentries; i++) {
854                 struct page_info *info = new_page_info(section_dir, entries[i]);
855                 if (info != NULL)
856                         pages[npages++] = info;
857                 free(entries[i]);
858         }
859         free(entries);
860         qsort(pages, npages, sizeof(struct page_info *), pagesort);
861         /*
862          * process each unique page
863          */
864         for (i = 0; i < npages; i++) {
865                 struct page_info *page = pages[i];
866                 if (page->inode != prev_inode) {
867                         prev_inode = page->inode;
868                         if (verbose)
869                                 fprintf(stderr, "       reading %s\n", page->filename);
870                         process_page(page, section_dir);
871                 } else if (verbose)
872                         fprintf(stderr, "       skipping %s, duplicate\n", page->filename);
873                 free_page_info(page);
874         }
875         free(pages);
876 }
877
878 /*
879  * Returns whether the directory entry is a man page section.
880  */
881 static int
882 select_sections(const struct dirent *entry)
883 {
884         const char *p = &entry->d_name[3];
885
886         if (strncmp(entry->d_name, "man", 3) != 0)
887                 return 0;
888         while (*p != '\0') {
889                 if (!isalnum(*p++))
890                         return 0;
891         }
892         return 1;
893 }
894
895 /*
896  * Processes a single top-level man directory by finding all the
897  * sub-directories named man* and processing each one in turn.
898  */
899 static void
900 process_mandir(char *dir_name)
901 {
902         struct dirent **entries;
903         int nsections;
904         FILE *fp = NULL;
905         int i;
906         struct stat st;
907
908         if (already_visited(dir_name))
909                 return;
910         if (verbose)
911                 fprintf(stderr, "man directory %s\n", dir_name);
912         nsections = scandir(dir_name, &entries, select_sections, alphasort);
913         if (nsections < 0) {
914                 warn("%s", dir_name);
915                 exit_code = 1;
916                 return;
917         }
918         if (common_output == NULL && (fp = open_whatis(dir_name)) == NULL)
919                 return;
920         for (i = 0; i < nsections; i++) {
921                 char section_dir[MAXPATHLEN];
922                 snprintf(section_dir, sizeof section_dir, "%s/%s", dir_name, entries[i]->d_name);
923                 process_section(section_dir);
924                 snprintf(section_dir, sizeof section_dir, "%s/%s/%s", dir_name,
925                     entries[i]->d_name, machine);
926                 if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode))
927                         process_section(section_dir);
928                 if (strcmp(machine_arch, machine) != 0) {
929                         snprintf(section_dir, sizeof section_dir, "%s/%s/%s",
930                             dir_name, entries[i]->d_name, machine_arch);
931                         if (stat(section_dir, &st) == 0 && S_ISDIR(st.st_mode))
932                                 process_section(section_dir);
933                 }
934                 free(entries[i]);
935         }
936         free(entries);
937         if (common_output == NULL)
938                 finish_whatis(fp, dir_name);
939 }
940
941 /*
942  * Processes one argument, which may be a colon-separated list of
943  * directories.
944  */
945 static void
946 process_argument(const char *arg)
947 {
948         char *dir;
949         char *mandir;
950         char *parg;
951
952         parg = strdup(arg);
953         if (parg == NULL)
954                 err(1, "out of memory");
955         while ((dir = strsep(&parg, ":")) != NULL) {
956                 if (locale != NULL) {
957                         asprintf(&mandir, "%s/%s", dir, locale);
958                         process_mandir(mandir);
959                         free(mandir);
960                         if (lang_locale != NULL) {
961                                 asprintf(&mandir, "%s/%s", dir, lang_locale);
962                                 process_mandir(mandir);
963                                 free(mandir);
964                         }
965                 } else {
966                         process_mandir(dir);
967                 }
968         }
969         free(parg);
970 }
971
972
973 int
974 main(int argc, char **argv)
975 {
976         int opt;
977         FILE *fp = NULL;
978
979         while ((opt = getopt(argc, argv, "ai:n:o:vL")) != -1) {
980                 switch (opt) {
981                 case 'a':
982                         append++;
983                         break;
984                 case 'i':
985                         indent = atoi(optarg);
986                         break;
987                 case 'n':
988                         whatis_name = optarg;
989                         break;
990                 case 'o':
991                         common_output = optarg;
992                         break;
993                 case 'v':
994                         verbose++;
995                         break;
996                 case 'L':
997                         locale = getenv("LC_ALL");
998                         if (locale == NULL)
999                                 locale = getenv("LC_CTYPE");
1000                         if (locale == NULL)
1001                                 locale = getenv("LANG");
1002                         if (locale != NULL) {
1003                                 char *sep = strchr(locale, '_');
1004                                 if (sep != NULL && isupper(sep[1]) &&
1005                                     isupper(sep[2])) {
1006                                         asprintf(&lang_locale, "%.*s%s", (int)(ptrdiff_t)(sep - locale), locale, &sep[3]);
1007                                 }
1008                         }
1009                         break;
1010                 default:
1011                         fprintf(stderr, "usage: %s [-a] [-i indent] [-n name] [-o output_file] [-v] [-L] [directories...]\n", argv[0]);
1012                         exit(1);
1013                 }
1014         }
1015
1016         signal(SIGINT, trap_signal);
1017         signal(SIGHUP, trap_signal);
1018         signal(SIGQUIT, trap_signal);
1019         signal(SIGTERM, trap_signal);
1020         SLIST_INIT(&visited_dirs);
1021         whatis_proto = new_sbuf();
1022         whatis_final = new_sbuf();
1023
1024         if ((machine = getenv("MACHINE")) == NULL) {
1025                 static struct utsname utsname;
1026
1027                 if (uname(&utsname) == -1)
1028                         err(1, "uname");
1029                 machine = utsname.machine;
1030         }
1031
1032         if ((machine_arch = getenv("MACHINE_ARCH")) == NULL)
1033                 machine_arch = MACHINE_ARCH;
1034
1035         if (common_output != NULL && (fp = open_output(common_output)) == NULL)
1036                 err(1, "%s", common_output);
1037         if (optind == argc) {
1038                 const char *manpath = getenv("MANPATH");
1039                 if (manpath == NULL)
1040                         manpath = DEFAULT_MANPATH;
1041                 process_argument(manpath);
1042         } else {
1043                 while (optind < argc)
1044                         process_argument(argv[optind++]);
1045         }
1046         if (common_output != NULL)
1047                 finish_output(fp, common_output);
1048         exit(exit_code);
1049 }