2 * Copyright (c) 2002 John Rochester
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer,
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/utsname.h>
52 #define DEFAULT_MANPATH "/usr/share/man"
54 #define TOP_LEVEL_DIR 0 /* signifies a top-level man directory */
55 #define MAN_SECTION_DIR 1 /* signifies a man section directory */
56 #define UNKNOWN 2 /* signifies an unclassifiable directory */
58 #define TEST_EXISTS 0x01
60 #define TEST_FILE 0x04
61 #define TEST_READABLE 0x08
62 #define TEST_WRITABLE 0x10
64 static int verbose; /* -v flag: be verbose with warnings */
65 static int pretend; /* -n, -p flags: print out what would be done
66 instead of actually doing it */
67 static int force; /* -f flag: force overwriting all cat pages */
68 static int rm_junk; /* -r flag: remove garbage pages */
69 static char *locale; /* user's locale if -L is used */
70 static char *lang_locale; /* short form of locale */
71 static const char *machine, *machine_arch;
72 static int exit_code; /* exit code to use when finished */
74 extern char **environ;
77 * -T argument for nroff
79 static const char *nroff_device = "ascii";
82 * Mapping from locale to nroff device
84 static const char *locale_device[] = {
86 "ISO8859-1", "latin1",
87 "ISO8859-15", "latin1",
91 #define BZ2_CMD "bzip2"
92 #define BZ2_EXT ".bz2"
93 #define BZ2CAT_CMD "bz"
97 enum Ziptype {NONE, BZIP, GZIP};
99 static bool mandoc_locales = false;
101 static int starting_dir;
102 static char tmp_file[MAXPATHLEN];
103 static struct stat test_st;
106 * A hashtable is an array of chains composed of this entry structure.
112 struct hash_entry *next;
115 #define HASHTABLE_ALLOC 16384 /* allocation for hashtable (power of 2) */
116 #define HASH_MASK (HASHTABLE_ALLOC - 1)
118 static struct hash_entry *visited[HASHTABLE_ALLOC];
119 static struct hash_entry *links[HASHTABLE_ALLOC];
122 * Inserts a string into a hashtable keyed by inode & device number.
125 insert_hashtable(struct hash_entry **table,
130 struct hash_entry *new_entry;
131 struct hash_entry **chain;
133 new_entry = (struct hash_entry *) malloc(sizeof(struct hash_entry));
134 if (new_entry == NULL)
135 err(1, "can't insert into hashtable");
136 chain = &table[inode_number & HASH_MASK];
137 new_entry->inode_number = inode_number;
138 new_entry->device_number = device_number;
139 new_entry->data = data;
140 new_entry->next = *chain;
145 * Finds a string in a hashtable keyed by inode & device number.
148 find_hashtable(struct hash_entry **table,
152 struct hash_entry *chain;
154 chain = table[inode_number & HASH_MASK];
155 while (chain != NULL) {
156 if (chain->inode_number == inode_number &&
157 chain->device_number == device_number)
165 trap_signal(int sig __unused)
167 if (tmp_file[0] != '\0')
173 * Deals with junk files in the man or cat section directories.
176 junk(const char *mandir, const char *name, const char *reason)
179 fprintf(stderr, "%s/%s: %s\n", mandir, name, reason);
181 fprintf(stderr, "rm %s/%s\n", mandir, name);
182 if (!pretend && unlink(name) < 0)
183 warn("%s/%s", mandir, name);
188 * Returns TOP_LEVEL_DIR for .../man, MAN_SECTION_DIR for .../manXXX,
189 * and UNKNOWN for everything else.
192 directory_type(char *dir)
197 p = strrchr(dir, '/');
198 if (p == NULL || p[1] != '\0')
206 if (strncmp(p, "man", 3) == 0) {
209 return TOP_LEVEL_DIR;
210 while (isalnum((unsigned char)*p) || *p == '_') {
212 return MAN_SECTION_DIR;
219 * Tests whether the given file name (without a preceding path)
220 * is a proper man page name (like "mk-amd-map.8.gz").
221 * Only alphanumerics and '_' are allowed after the last '.' and
222 * the last '.' can't be the first or last characters.
225 is_manpage_name(char *name)
227 char *lastdot = NULL;
231 if (!isalnum((unsigned char)*n)) {
250 return lastdot > name && lastdot + 1 < n;
254 is_bzipped(char *name)
256 int len = strlen(name);
257 return len >= 5 && strcmp(&name[len - 4], BZ2_EXT) == 0;
261 is_gzipped(char *name)
263 int len = strlen(name);
264 return len >= 4 && strcmp(&name[len - 3], GZ_EXT) == 0;
268 * Converts manXXX to catXXX.
271 get_cat_section(char *section)
275 cat_section = strdup(section);
276 assert(strlen(section) > 3 && strncmp(section, "man", 3) == 0);
277 memcpy(cat_section, "cat", 3);
282 * Tests to see if the given directory has already been visited.
285 already_visited(char *mandir, char *dir, int count_visit)
289 if (stat(dir, &st) < 0) {
291 warn("%s/%s", mandir, dir);
297 if (find_hashtable(visited, st.st_ino, st.st_dev) != NULL) {
299 warnx("already visited %s/%s", mandir, dir);
301 warnx("already visited %s", dir);
305 insert_hashtable(visited, st.st_ino, st.st_dev, "");
310 * Returns a set of TEST_* bits describing a file's type and permissions.
311 * If mod_time isn't NULL, it will contain the file's modification time.
314 test_path(char *name, time_t *mod_time)
318 if (stat(name, &test_st) < 0)
320 result = TEST_EXISTS;
321 if (mod_time != NULL)
322 *mod_time = test_st.st_mtime;
323 if (S_ISDIR(test_st.st_mode))
325 else if (S_ISREG(test_st.st_mode))
327 if (access(name, R_OK))
328 result |= TEST_READABLE;
329 if (access(name, W_OK))
330 result |= TEST_WRITABLE;
335 * Checks whether a file is a symbolic link.
338 is_symlink(char *path)
342 return lstat(path, &st) >= 0 && S_ISLNK(st.st_mode);
346 * Tests to see if the given directory can be written to.
349 check_writable(char *mandir)
351 if (verbose && !(test_path(mandir, NULL) & TEST_WRITABLE))
352 fprintf(stderr, "%s: not writable - will only be able to write to existing cat directories\n", mandir);
356 * If the directory exists, attempt to make it writable, otherwise
357 * attempt to create it.
360 make_writable_dir(char *mandir, char *dir)
364 if ((test = test_path(dir, NULL)) != 0) {
365 if (!(test & TEST_WRITABLE) && chmod(dir, 0755) < 0) {
366 warn("%s/%s: chmod", mandir, dir);
371 if (verbose || pretend)
372 fprintf(stderr, "mkdir %s\n", dir);
375 if (mkdir(dir, 0755) < 0) {
376 warn("%s/%s: mkdir", mandir, dir);
386 * Processes a single man page source by using nroff to create
387 * the preformatted cat page.
390 process_page(char *mandir, char *src, char *cat, enum Ziptype zipped)
392 int src_test, cat_test;
393 time_t src_mtime, cat_mtime;
394 char cmd[MAXPATHLEN];
397 const char *link_name;
399 src_test = test_path(src, &src_mtime);
400 if (!(src_test & (TEST_FILE|TEST_READABLE))) {
401 if (!(src_test & TEST_DIR)) {
402 warnx("%s/%s: unreadable", mandir, src);
404 if (rm_junk && is_symlink(src))
405 junk(mandir, src, "bogus symlink");
409 src_dev = test_st.st_dev;
410 src_ino = test_st.st_ino;
411 cat_test = test_path(cat, &cat_mtime);
412 if (cat_test & (TEST_FILE|TEST_READABLE)) {
413 if (!force && cat_mtime >= src_mtime) {
415 fprintf(stderr, "\t%s/%s: up to date\n",
422 * Is the man page a link to one we've already processed?
424 if ((link_name = find_hashtable(links, src_ino, src_dev)) != NULL) {
425 if (verbose || pretend) {
426 fprintf(stderr, "%slink %s -> %s\n",
427 verbose ? "\t" : "", cat, link_name);
431 if (link(link_name, cat) < 0)
432 warn("%s %s: link", link_name, cat);
436 insert_hashtable(links, src_ino, src_dev, strdup(cat));
437 if (verbose || pretend) {
438 fprintf(stderr, "%sformat %s -> %s\n",
439 verbose ? "\t" : "", src, cat);
443 snprintf(tmp_file, sizeof tmp_file, "%s.tmp", cat);
444 snprintf(cmd, sizeof cmd,
445 "%scat %s | mandoc -Tlint -Wunsupp 2>/dev/null",
446 zipped == BZIP ? BZ2CAT_CMD : zipped == GZIP ? GZCAT_CMD : "",
448 if (system(cmd) == 0) {
449 snprintf(cmd, sizeof cmd,
450 "%scat %s | mandoc -T%s | %s > %s.tmp",
451 zipped == BZIP ? BZ2CAT_CMD : zipped == GZIP ? GZCAT_CMD : "",
452 src, mandoc_locales ? "locale" : "ascii",
453 zipped == BZIP ? BZ2_CMD : zipped == GZIP ? GZ_CMD : "cat",
456 snprintf(cmd, sizeof cmd,
457 "%scat %s | tbl | nroff -c -T%s -man | %s > %s.tmp",
458 zipped == BZIP ? BZ2CAT_CMD : zipped == GZIP ? GZCAT_CMD : "",
460 zipped == BZIP ? BZ2_CMD : zipped == GZIP ? GZ_CMD : "cat",
463 if (system(cmd) != 0)
464 err(1, "formatting pipeline");
465 if (rename(tmp_file, cat) < 0)
471 * Scan the man section directory for pages and process each one,
472 * then check for junk in the corresponding cat section.
475 scan_section(char *mandir, char *section, char *cat_section)
477 struct dirent **entries;
478 char **expected = NULL;
484 char page_path[MAXPATHLEN];
485 char cat_path[MAXPATHLEN];
486 char zip_path[MAXPATHLEN];
489 * scan the man section directory for pages
491 npages = scandir(section, &entries, NULL, alphasort);
493 warn("%s/%s", mandir, section);
497 if (verbose || rm_junk) {
499 * Maintain a list of all cat pages that should exist,
500 * corresponding to existing man pages.
502 expected = (char **) calloc(npages, sizeof(char *));
504 for (i = 0; i < npages; free(entries[i++])) {
505 page_name = entries[i]->d_name;
506 snprintf(page_path, sizeof page_path, "%s/%s", section,
508 if (!is_manpage_name(page_name)) {
509 if (!(test_path(page_path, NULL) & TEST_DIR)) {
510 junk(mandir, page_path,
511 "invalid man page name");
515 zipped = is_bzipped(page_name) ? BZIP :
516 is_gzipped(page_name) ? GZIP : NONE;
517 if (zipped != NONE) {
518 snprintf(cat_path, sizeof cat_path, "%s/%s",
519 cat_section, page_name);
520 if (expected != NULL)
521 expected[nexpected++] = strdup(page_name);
522 process_page(mandir, page_path, cat_path, zipped);
525 * We've got an uncompressed man page,
526 * check to see if there's a (preferred)
529 snprintf(zip_path, sizeof zip_path, "%s%s",
531 if (test_path(zip_path, NULL) != 0) {
532 junk(mandir, page_path,
533 "man page unused due to existing " GZ_EXT);
537 "warning, %s is uncompressed\n",
540 snprintf(cat_path, sizeof cat_path, "%s/%s",
541 cat_section, page_name);
542 if (expected != NULL) {
543 asprintf(&expected[nexpected++],
546 process_page(mandir, page_path, cat_path, NONE);
551 if (expected == NULL)
554 * scan cat sections for junk
556 npages = scandir(cat_section, &entries, NULL, alphasort);
558 for (i = 0; i < npages; free(entries[i++])) {
559 const char *junk_reason;
562 page_name = entries[i]->d_name;
563 if (strcmp(page_name, ".") == 0 || strcmp(page_name, "..") == 0)
566 * Keep the index into the expected cat page list
567 * ahead of the name we've found.
569 while (e < nexpected &&
570 (cmp = strcmp(page_name, expected[e])) > 0)
574 /* we have an unexpected page */
575 snprintf(cat_path, sizeof cat_path, "%s/%s", cat_section,
577 if (!is_manpage_name(page_name)) {
578 if (test_path(cat_path, NULL) & TEST_DIR)
580 junk_reason = "invalid cat page name";
581 } else if (!is_gzipped(page_name) && e + 1 < nexpected &&
582 strncmp(page_name, expected[e + 1], strlen(page_name)) == 0 &&
583 strlen(expected[e + 1]) == strlen(page_name) + 3) {
584 junk_reason = "cat page unused due to existing " GZ_EXT;
586 junk_reason = "cat page without man page";
587 junk(mandir, cat_path, junk_reason);
590 while (e < nexpected)
597 * Processes a single man section.
600 process_section(char *mandir, char *section)
604 if (already_visited(mandir, section, 1))
607 fprintf(stderr, " section %s\n", section);
608 cat_section = get_cat_section(section);
609 if (make_writable_dir(mandir, cat_section))
610 scan_section(mandir, section, cat_section);
615 select_sections(const struct dirent *entry)
620 name = strdup(entry->d_name);
621 ret = directory_type(name) == MAN_SECTION_DIR;
627 * Processes a single top-level man directory. If section isn't NULL,
628 * it will only process that section sub-directory, otherwise it will
629 * process all of them.
632 process_mandir(char *dir_name, char *section)
634 if (fchdir(starting_dir) < 0)
636 if (already_visited(NULL, dir_name, section == NULL))
638 check_writable(dir_name);
640 fprintf(stderr, "man directory %s\n", dir_name);
642 fprintf(stderr, "cd %s\n", dir_name);
643 if (chdir(dir_name) < 0) {
644 warn("%s: chdir", dir_name);
648 if (section != NULL) {
649 process_section(dir_name, section);
651 struct dirent **entries;
652 char *machine_dir, *arch_dir;
656 nsections = scandir(".", &entries, select_sections, alphasort);
658 warn("%s", dir_name);
662 for (i = 0; i < nsections; i++) {
663 process_section(dir_name, entries[i]->d_name);
664 asprintf(&machine_dir, "%s/%s", entries[i]->d_name,
666 if (test_path(machine_dir, NULL) & TEST_DIR)
667 process_section(dir_name, machine_dir);
669 if (strcmp(machine_arch, machine) != 0) {
670 asprintf(&arch_dir, "%s/%s", entries[i]->d_name,
672 if (test_path(arch_dir, NULL) & TEST_DIR)
673 process_section(dir_name, arch_dir);
683 * Processes one argument, which may be a colon-separated list of
687 process_argument(const char *arg)
696 err(1, "out of memory");
697 while ((dir = strsep(&parg, ":")) != NULL) {
698 switch (directory_type(dir)) {
700 if (locale != NULL) {
701 asprintf(&mandir, "%s/%s", dir, locale);
702 process_mandir(mandir, NULL);
704 if (lang_locale != NULL) {
705 asprintf(&mandir, "%s/%s", dir,
707 process_mandir(mandir, NULL);
711 process_mandir(dir, NULL);
714 case MAN_SECTION_DIR: {
715 mandir = strdup(dirname(dir));
716 section = strdup(basename(dir));
717 process_mandir(mandir, section);
723 warnx("%s: directory name not in proper man form", dir);
731 determine_locale(void)
735 if ((locale = setlocale(LC_CTYPE, "")) == NULL) {
736 warnx("-L option used, but no locale found\n");
739 sep = strchr(locale, '_');
740 if (sep != NULL && isupper((unsigned char)sep[1])
741 && isupper((unsigned char)sep[2])) {
742 asprintf(&lang_locale, "%.*s%s", (int)(sep - locale),
745 sep = nl_langinfo(CODESET);
746 if (sep != NULL && *sep != '\0' && strcmp(sep, "US-ASCII") != 0) {
749 for (i = 0; locale_device[i] != NULL; i += 2) {
750 if (strcmp(sep, locale_device[i]) == 0) {
751 nroff_device = locale_device[i + 1];
757 if (lang_locale != NULL)
758 fprintf(stderr, "short locale is %s\n", lang_locale);
759 fprintf(stderr, "nroff device is %s\n", nroff_device);
766 fprintf(stderr, "usage: %s [-fLnrv] [directories ...]\n",
772 main(int argc, char **argv)
776 if ((uid = getuid()) == 0) {
777 fprintf(stderr, "don't run %s as root, use:\n echo", argv[0]);
778 for (optind = 0; optind < argc; optind++) {
779 fprintf(stderr, " %s", argv[optind]);
781 fprintf(stderr, " | nice -5 su -m man\n");
784 while ((opt = getopt(argc, argv, "vnfLrh")) != -1) {
791 mandoc_locales = true;
807 if ((starting_dir = open(".", 0)) < 0) {
811 signal(SIGINT, trap_signal);
812 signal(SIGHUP, trap_signal);
813 signal(SIGQUIT, trap_signal);
814 signal(SIGTERM, trap_signal);
816 if ((machine = getenv("MACHINE")) == NULL) {
817 static struct utsname utsname;
819 if (uname(&utsname) == -1)
821 machine = utsname.machine;
824 if ((machine_arch = getenv("MACHINE_ARCH")) == NULL)
825 machine_arch = MACHINE_ARCH;
827 if (optind == argc) {
828 const char *manpath = getenv("MANPATH");
830 manpath = DEFAULT_MANPATH;
831 process_argument(manpath);
833 while (optind < argc)
834 process_argument(argv[optind++]);