3 # tcsh.man2html, Dave Schweisguth <dcs@proton.chem.yale.edu>
7 # Always puts all files in the directory tcsh.html, creating it if necessary.
8 # tcsh.html/top.html is the entry point, and tcsh.html/index.html is a symlink
9 # to tcsh.html/top.html so one needn't specify a file at all if working through
10 # a typically configured server.
12 # Designed for tcsh manpage. Guaranteed not to work on manpages not written
13 # in the exact same style of nroff -man, i.e. any other manpage.
15 # Makes links FROM items which are both a) in particular sections (see
16 # Configuration) and b) marked with .B or .I. Makes links TO items which
17 # are marked with \fB ... \fR or \fI ... \fR.
19 # Designed with X Mosaic in mind and tested lightly with lynx. I've punted on
20 # HTML's lack of a .PD equivalent and lynx's different <menu> handling.
22 # Emulate #!/usr/local/bin/perl on systems without #!
24 eval '(exit $?0)' && eval 'exec perl -S $0 ${1+"$@"}'
25 & eval 'exec perl -S $0 $argv:q' if 0;
31 ($whatami = $0) =~ s|.*/||; # `basename $0`
36 $index = 0; # Don't make a searchable index CGI script
37 $cgibin = 0; # Look for $cgifile in $dir, not $cgibindir
38 $shortfiles = 0; # Use long filenames
39 $single = 0; # Make single page instead of top and sections
41 $host = ''; # host:port part of server URL ***
42 $updir = ''; # Directories between $host and $dir ***
43 $dir = 'tcsh'; # Directory in which to put the pieces *
44 $cgifile = 'tcsh.cgi'; # CGI script name **
45 $cgibindir = 'cgi-bin'; # CGI directory ***
46 $headerfile = 'header'; # HTML file for initial comments *
47 $indexfile = 'index'; # Symlink to $topfile *
48 $listsfile = 'lists'; # Mailing list description HTML file *
49 $outfile = 'tcsh.man'; # Default input file and copy of input file
50 $script = $whatami; # Copy of script; filename length must be OK
51 $topfile = 'top'; # Top-level HTML file *
53 # * .htm or .html suffix added later
54 # ** Only used with -i or -c
55 # *** Only used with -c
57 # Sections to inline in the top page
59 %inline_me = ('NAME', 1,
62 # Sections in which to put name anchors and the font in which to look for
63 # links to those anchors
65 %link_me = ('Editor commands', 'I',
66 'Builtin commands', 'I',
67 'Special aliases', 'I',
68 'Special shell variables', 'B',
72 ### Arguments and error-checking
76 while ($#ARGV > -1 && (($first, $rest) = ($ARGV[0] =~ /^-(.)(.*)/))) {
77 # Perl 5 lossage alert
78 if ($first =~ /[CdDGh]/) { # Switches with arguments
80 $arg = $rest ne '' ? $rest : $ARGV[0] ne '' ? shift :
81 &usage("$whatami: -$first requires an argument.\n");
82 } elsif ($rest ne '') {
87 if ($first eq '1') { $single = 1; }
88 elsif ($first eq 'c') { $cgibin = 1; }
89 elsif ($first eq 'C') { $cgibindir = $arg; }
90 elsif ($first eq 'd') { $updir = $arg; }
91 elsif ($first eq 'D') { $dir = $arg; }
92 elsif ($first eq 'G') { $cgifile = $arg; }
93 elsif ($first eq 'h') { $host = $arg; }
94 elsif ($first eq 'i') { $index = 1; }
95 elsif ($first eq 's') { $shortfiles = 1; }
96 elsif ($first eq 'u') { &usage(0); }
97 else { &usage("$whatami: -$first is not an option.\n"); }
102 $infile = $outfile; # Default input file if interactive
104 $infile = 'STDIN'; # Read STDIN if no args and not a tty
106 } elsif (@ARGV == 1) {
109 &usage("$whatami: Please specify one and only one file.\n");
112 $index = $index || $cgibin; # $index is true if $cgibin is true
114 if ($cgibin && ! $host) {
115 die "$whatami: Must specify host with -h if using -c.\n";
118 # Decide on HTML suffix and append it to filenames
120 $html = $shortfiles ? 'htm' : 'html'; # Max 3-character extension
121 $dir .= ".$html"; # Directory in which to put the pieces
122 $headerfile .= ".$html"; # HTML file for initial comments
123 $topfile .= ".$html"; # Top-level HTML file (or moved notice)
124 $indexfile .= ".$html"; # Symlink to $topfile
125 $listsfile .= ".$html"; # Mailing list description HTML file
127 # Check for input file
129 unless ($infile eq 'STDIN') {
130 die "$whatami: $infile doesn't exist!\n" unless -e $infile;
131 die "$whatami: $infile is unreadable!\n" unless -r _;
132 die "$whatami: $infile is empty!\n" unless -s _;
135 # Check for output directory and create if necessary
138 -d _ || die "$whatami: $dir is not a directory!\n";
139 -r _ && -w _ && -x _ || die "$whatami: $dir is inaccessible!\n"
141 mkdir($dir, 0755) || die "$whatami: Can't create $dir!\n";
146 if ($infile eq 'STDIN') {
149 open(MAN, $infile) || die "$whatami: Error opening $infile!\n";
154 # Print manpage to HTML directory (can't use cp if we're reading from STDIN)
156 open(MAN, ">$dir/$outfile") || die "$whatami: Can't open $dir/$outfile!\n";
160 # Copy script to HTML directory
162 (system("cp $0 $dir") >> 8) && die "$whatami: Can't copy $0 to $dir!\n";
164 # Link top.html to index.html in case someone looks at tcsh.html/
166 system("rm -f $dir/$indexfile"); # Some systems can't ln -sf
167 (system("ln -s $topfile $dir/$indexfile") >> 8)
168 && die "$whatami: Can't link $topfile to $dir/$indexfile!\n";
170 ### Get title and section headings
172 $comment = 0; # 0 for text, 1 for ignored text
173 @sectionlines = (0); # First line of section
174 @sectiontypes = (0); # H or S
175 @sectiontexts = ('Header'); # Text of section heading
176 @sectionfiles = ($headerfile); # Filename in which to store section
177 %name = (); # Array of name anchors
178 @name = () if $index; # Ordered array of name anchors
179 $font = ''; # '' to not make names, 'B' or 'I' to do so
183 if (/^\.ig/) { # Start ignoring
185 } elsif (/^\.\./) { # Stop ignoring
187 } elsif (! $comment) { # Not in .ig'ed section; do stuff
189 # nroff special characters
193 s/^\\'/'/; # leading ' escape
194 s/^\\(\s)/$1/; # leading space escape
195 s/\\(e|\\)/\\/g; # \e, \\; must do this after other escapes
197 # HTML special characters; deal with these before adding more
205 if (/^\.TH\s+(\w+)\s+(\w+)\s+\"([^\"]*)\"\s+\"([^\"]*)\"/) {
206 $title = "$1($2) $4 ($3) $1($2)";
209 # Build per-section info arrays
211 if (($type, $text) = /^\.S([HS])\s+\"?([^\"]*)\"?/) {
213 push(@sectionlines, $line); # Index of first line of section
214 push(@sectiontypes, $type eq 'H' ? 0 : 1); # Type of section
215 $text =~ s/\s*$//; # Remove trailing whitespace
216 push(@sectiontexts, $text); # Title of section (key for href)
217 $text =~ s/\s*\(\+\)$//; # Remove (+)
219 $file = $#sectionlines; # Short filenames; use number
221 $file = $text; # Long filenames; use title
222 $file =~ s/[\s\/]+/_/g; # Replace whitespace and / with _
224 $file .= ".$html" unless $single;
225 push(@sectionfiles, $file); # File in which to store section
226 $name{"$text B"} = ($single ? '#' : '') . $file;
227 # Index entry for &make_hrefs
228 push(@name, "$text\t" . $name{"$text B"}) if $index;
229 # Index entry for CGI script
230 # Look for anchors in the rest of this section if $link_me{$text}
231 # is non-null, and mark them with the font which is its value
233 $font = $link_me{$text};
235 &make_name(*name, *font, *file, *index, *_) if $font;
242 open(TOP, ">$dir/$topfile");
249 <TITLE>$title</TITLE>
257 # FORM block, if we're making an index
259 $action = $cgibin ? "http://$host/$cgibindir/$cgifile" : $cgifile;
261 print <<EOP if $index;
262 <FORM METHOD="GET" ACTION="$action">
263 Go directly to a section, command or variable: <INPUT NAME="input">
273 foreach $section (1 .. $#sectionlines) {
274 if ($sectiontypes[$section - 1] < $sectiontypes[$section]) {
275 print "</H2> <menu>\n"; # Indent, smaller font
276 } elsif ($sectiontypes[$section - 1] > $sectiontypes[$section]) {
277 print "</menu> <H2>\n"; # Outdent, larger font
279 if ($inline_me{$sectiontexts[$section]}) { # Section is in %inline_me
281 # Print section inline
283 print "$sectiontexts[$section]\n";
284 print "</H2> <menu>\n"; # Indent, smaller font
285 &printsectionbody(*man, *sectionlines, *section, *name);
286 print "</menu> <H2>\n"; # Outdent, larger font
289 # Print link to section
291 print "<A HREF=\"", $single ? '#' : '',
292 "$sectionfiles[$section]\">$sectiontexts[$section]</A><BR>\n";
300 print "<HR>\n" if $single;
304 foreach $section (0 .. $#sectionlines) {
306 # Skip inlined sections
308 next if $inline_me{$sectiontexts[$section]};
314 print <<EOP if $section; # Skip header section
315 <H2><A NAME="$sectionfiles[$section]">$sectiontexts[$section]</A></H2>
318 &printsectionbody(*man, *sectionlines, *section, *name);
319 print <<EOP if $section; # Skip header section
320 <A HREF="#top">Table of Contents</A>
326 # Make pointer line for header and trailer
328 $pointers = "<A HREF=\"$topfile\">Up</A>";
329 $pointers .= "\n<A HREF=\"$sectionfiles[$section + 1]\">Next</A>"
330 if ($section < $#sectionlines) &&
331 ! $inline_me{$sectiontexts[$section + 1]};
332 $pointers .= "\n<A HREF=\"$sectionfiles[$section - 1]\">Previous</A>"
333 if ($section > 1) && # section 0 is initial comments
334 ! $inline_me{$sectiontexts[$section - 1]};
338 open(OUT, ">$dir/$sectionfiles[$section]");
342 <TITLE>$sectiontexts[$section]</TITLE>
346 <H2>$sectiontexts[$section]</H2>
348 &printsectionbody(*man, *sectionlines, *section, *name);
360 select TOP unless $single;
367 Here are the <A HREF="$outfile">nroff manpage</A> (175K)
368 from which this HTML version was generated,
369 the <A HREF="$script">Perl script</A> which did the conversion
370 and the <A HREF="ftp://ftp.astron.com/pub/tcsh/">
371 complete source code</A> for <I>tcsh</I>.
373 <I>tcsh</I> is maintained by
374 Christos Zoulas <A HREF="mailto:christos\@astron.com"><christos\@astron.com></A>
375 and the <A HREF="$listsfile"><I>tcsh</I> maintainers' mailing list</A>.
376 Dave Schweisguth <A HREF="mailto:dcs\@proton.chem.yale.edu"><dcs\@proton.chem.yale.edu></A>
377 wrote the manpage and the HTML conversion script.
385 open(LISTS, ">$dir/$listsfile");
387 while(($_ = <DATA>) ne "END\n") { # Text stored after __END__
388 s/TOPFILEHERE/$topfile/;
393 ### Make search script
397 # URL of $dir; see comments in search script
400 ? "'http://$host/" . ($updir ? "$updir/" : '') . "$dir/'"
401 : '"http://$ENV{\'SERVER_NAME\'}:$ENV{\'SERVER_PORT\'}" . (($_ = $ENV{\'SCRIPT_NAME\'}) =~ s|[^/]*$||, $_)';
403 # String for passing @name to search script
405 $name = join("',\n'", @name);
407 open(TOP, ">$dir/$cgifile");
409 while(($_ = <DATA>) ne "END\n") { # Text stored after __END__
412 s/TOPFILEHERE/$topfile/;
416 chmod(0755, "$dir/$cgifile") ||
417 die "$whatami: Can't chmod 0755 $dir/$cgifile!\n";
418 warn "$whatami: Don't forget to move $dir/$cgifile to /$cgibindir.\n"
422 ### That's all, folks
428 # Process and print the body of a section
430 sub printsectionbody {
432 local(*man, *sectionlines, *sline, *name) = @_; # Number of section
433 local($sfirst, $slast, @paralines, @paratypes, $comment, $dl, $pline,
434 $comment, $pfirst, $plast, @para, @tag, $changeindent);
436 # Define section boundaries
438 $sfirst = $sectionlines[$sline] + 1;
439 if ($sline == $#sectionlines) {
442 $slast = $sectionlines[$sline + 1] - 1;
445 # Find paragraph markers, ignoring those between '.ig' and '..'
447 if ($man[$sfirst] =~ /^\.[PIT]P/) {
451 @paralines = ($sfirst - 1); # .P follows .S[HS] by default
455 foreach ($sfirst .. $slast) {
456 if ($man[$_] =~ /^\.ig/) { # Start ignoring
458 } elsif ($man[$_] =~ /^\.\./) { # Stop ignoring
460 } elsif (! $comment && $man[$_] =~ /^\.([PIT])P/) {
461 push(@paralines, $_);
462 push(@paratypes, $1);
470 foreach $pline (0 .. $#paralines) {
475 # Define para boundaries
477 $pfirst = $paralines[$pline] + 1;
478 if ($pline == $#paralines) {
481 $plast = $paralines[$pline + 1] - 1;
484 foreach (@man[$pfirst .. $plast]) {
485 if (/^\.ig/) { # nroff begin ignore
488 push(@para, "<!--\n");
489 } elsif ($comment == 1) {
491 } elsif ($comment == 2) {
492 s/--/-/g; # Remove double-dashes in comments
495 } elsif (/^\.\./) { # nroff end ignore
498 } elsif ($comment == 1) {
500 } elsif ($comment == 2) {
503 } elsif (/^\.\\\"/) { # nroff comment
506 push(@para, "<!--\n");
508 } elsif ($comment == 1) {
510 } elsif ($comment == 2) {
513 s/--/-/g; # Remove double-dashes in comments
515 } else { # Nothing to do with comments
518 } elsif ($comment == 1) {
520 push(@para, "-->\n");
521 } elsif ($comment == 2) {
522 s/--/-/g; # Remove double-dashes in comments
527 if (/^\.TH/) { # Title; got this already
529 } elsif (/^\.PD/) { # Para spacing; unimplemented
531 } elsif (/^\.RS/) { # Indent (one width only)
534 } elsif (/^\.RE/) { # Outdent
542 # More nroff special characters
544 s/^\\&\;//; # leading dot escape; save until
545 # now so leading dots aren't
546 # confused with ends of .igs
548 &make_hrefs(*name, *_);
554 push(@para, "-->\n") if $comment; # Close open comment
558 if ($paratypes[$pline] eq 'P') {
561 } elsif ($paratypes[$pline] eq 'I') {
570 print "<DL compact>\n" unless $dl;
575 if ($pline == $#paratypes || $paratypes[$pline + 1] ne 'T') {
576 # Perl 5 lossage alert
577 # Next para is not a definition list
578 $dl = 0; # Close open definition list
581 $dl = 1; # Leave definition list open
586 # Indent/outdent the *next* para
588 while ($changeindent > 0) {
592 while ($changeindent < 0) {
600 # Make one name anchor in a line; cue on fonts (.B or .I) but leave them alone
604 local(*name, *font, *file, *index, *line) = @_;
607 if (($text) = ($line =~ /^\.[BI]\s+([^\s\\]+)/)) { # Found pattern
610 $text !~ /^-/ # Avoid lists of options
611 && (length($text) > 1 # and history escapes
612 || $text =~ /^[%:@]$/) # Special pleading for %, :, @
613 && ! $name{"$text $font"} # Skip if there's one already
617 $name{"$text $font"} = ($single ? '' : $file) . "#$text";
618 push(@name, "$text\t" . $name{"$text $font"}) if $index;
620 # Put in the name anchor
622 $line =~ s/^(\.[BI]\s+)([^\s\\]+)/$1<A NAME=\"$text\">$2<\/A>/;
628 # Make all the href anchors in a line; cue on fonts (\fB ... \fR or
629 # \fI ... \fR) but leave them alone
633 local(*name, *line) = @_;
634 local(@pieces, $piece);
636 @pieces = split(/(\\f[BI][^\\]*\\fR)/, $line);
640 if (/\\f([BI])([^\\]*)\\fR/ # Found a possibility
642 # It's not followed by (, i.e. it's not a manpage reference
644 && substr($pieces[$piece + 1], 0, 1) ne '(') {
646 if ($name{$key}) { # If there's a matching name
647 s/(\\f[BI])([^\\]*)(\\fR)/$1<A HREF=\"$name{$key}\">$2<\/A>$3/;
652 $line = join('', @pieces);
655 # Convert nroff font escapes to HTML
656 # Expects comments and breaks to be in HTML form already
661 local($i, $j, @begin, @end, $part, @pieces, $bold, $italic);
663 return 0 if $#para == -1; # Ignore empty paragraphs
664 # Perl 5 lossage alert
666 # Find beginning and end of each part between HTML comments
672 push(@begin, $i + 1) if /^-->/ || /^<BR>/;
673 push(@end, $i - 1) if /^<!--/ || /^<BR>/;
676 if ($para[0] =~ /^<!--/ || $para[0] =~ /^<BR>/) {
679 unshift(@begin, 0); # Begin at the beginning
681 if ($para[$#para] =~ /^-->/ || $para[$#para] =~ /^<BR>/) {
684 push(@end, $#para); # End at the end
690 foreach $i (0 .. $#begin) {
691 $part = join('', @para[$begin[$i] .. $end[$i]]);
692 $part =~ s/^\.([BI])\s+(.*)$/\\f$1$2\\fR/gm; # .B, .I
693 @pieces = split(/(\\f[BIR])/m, $part);
695 foreach $j (@pieces) {
705 } elsif ($j eq '\fI') {
714 } elsif ($j eq '\fR') {
727 # Close bold/italic before break
729 if ($end[$i] == $#para || $para[$end[$i] + 1] =~ /^<BR>/) {
730 # Perl 5 lossage alert
733 $part =~ s/(\n)?$/<\/B>$1\n/;
736 $part =~ s/(\n)?$/<\/I>$1\n/;
740 # Rebuild this section of @para
742 foreach $j ($begin[$i] .. $end[$i]) {
743 $part =~ s/^([^\n]*(\n|$))//;
748 # Close bold/italic on last non-comment line
749 # Do this only here because fonts pass through comments
751 $para[$end[$#end]] =~ s/(\n)?$/<\/B>$1/ if $bold;
752 $para[$end[$#end]] =~ s/(\n)?$/<\/I>$1/ if $italic;
756 local ($message) = $_[0];
758 warn $message if $message;
760 Usage: $whatami [-1icsu] [-C dir] [-d dir] [-h host] [file]
761 Without [file], reads from tcsh.man or stdin.
762 -1 Makes a single page instead of a table of contents and sections
763 -i Makes a CGI searchable index script, tcsh.html/tcsh.cgi, intended
764 for a server which respects the .cgi extension in any directory.
765 -c Like -i, but the CGI script is intended for a server which wants
766 scripts in /cgi-bin (or some other privileged directory separate
767 from the rest of the HTML) and must be moved there by hand.
768 -C dir Uses /dir instead of /cgi-bin as the CGI bin dir.
769 Meaningless without -c.
770 -d dir Uses /dir/tcsh.html instead of /tcsh.html as the HTML dir.
771 Meaningless without -c.
772 -D dir Uses /dir.html instead of /tcsh.html as the HTML dir.
773 Meaningless without -c.
774 -G name Uses name instead of tcsh.cgi as the name of the CGI script.
775 Meaningless without -c or -i.
776 -h host Uses host as the host:port part of the URL to the entry point.
777 Meaningless without -c.
778 -s Filenames are shorter (max 8 + 3) but less descriptive.
784 ### Inlined documents. Watch for *HERE tokens.
788 <TITLE>The tcsh mailing lists</TITLE>
791 <A HREF="TOPFILEHERE">Up</A>
792 <H2>The <I>tcsh</I> mailing lists</H2>
793 There are three <I>tcsh</I> mailing lists:
796 <I>tcsh@mailman.astron.com</I>
798 The <I>tcsh</I> maintainers and testers' mailing list.
800 <I>tcsh-bugs@astron.com</I>
802 Open bug and user comment discussion.
804 You can subscribe to either of these lists by visiting
805 <I><A HREF="https://mailman.astron.com/">https://mailman.astron.com/</A></I>
807 To file a bug report or a feature suggestion (preferably
808 with code), please visit
809 <I><A HREF="https://bugs.astron.com/">https://bugs.astron.com/</A></I>
811 <A HREF="TOPFILEHERE">Up</A>
816 # Emulate #!/usr/local/bin/perl on systems without #!
818 eval '(exit $?0)' && eval 'exec perl -S $0 ${1+"$@"}'
819 & eval 'exec perl -S $0 $argv:q' if 0;
823 # Location: doesn't work with relative URLs, so we need to know where to find
824 # the top and section files.
825 # If the search engine is in /cgi-bin, we need a hard-coded URL.
826 # If the search engine is in the same directory, we can figure it out from CGI
827 # environment variables.
830 $topfile = 'TOPFILEHERE';
837 $input = $ENV{'QUERY_STRING'};
838 $input =~ s/^input=//;
840 print "Status: 302 Found\n";
841 if ($input ne '' && ($key = (grep(/^$input/, @name))[0] ||
842 (grep(/^$input/i, @name))[0] ||
843 (grep( /$input/i, @name))[0] )) {
844 $key =~ /\t([^\t]*)$/;
845 print "Location: $root$1\n\n";
847 print "Location: $root$topfile\n\n";