2 # $tcsh: tcsh.man2html,v 1.15 2011/02/05 16:15:56 christos Exp $
4 # tcsh.man2html, Dave Schweisguth <dcs@proton.chem.yale.edu>
8 # Always puts all files in the directory tcsh.html, creating it if necessary.
9 # tcsh.html/top.html is the entry point, and tcsh.html/index.html is a symlink
10 # to tcsh.html/top.html so one needn't specify a file at all if working through
11 # a typically configured server.
13 # Designed for tcsh manpage. Guaranteed not to work on manpages not written
14 # in the exact same style of nroff -man, i.e. any other manpage.
16 # Makes links FROM items which are both a) in particular sections (see
17 # Configuration) and b) marked with .B or .I. Makes links TO items which
18 # are marked with \fB ... \fR or \fI ... \fR.
20 # Designed with X Mosaic in mind and tested lightly with lynx. I've punted on
21 # HTML's lack of a .PD equivalent and lynx's different <menu> handling.
23 # Emulate #!/usr/local/bin/perl on systems without #!
25 eval '(exit $?0)' && eval 'exec perl -S $0 ${1+"$@"}'
26 & eval 'exec perl -S $0 $argv:q' if 0;
32 ($whatami = $0) =~ s|.*/||; # `basename $0`
37 $index = 0; # Don't make a searchable index CGI script
38 $cgibin = 0; # Look for $cgifile in $dir, not $cgibindir
39 $shortfiles = 0; # Use long filenames
40 $single = 0; # Make single page instead of top and sections
42 $host = ''; # host:port part of server URL ***
43 $updir = ''; # Directories between $host and $dir ***
44 $dir = 'tcsh'; # Directory in which to put the pieces *
45 $cgifile = 'tcsh.cgi'; # CGI script name **
46 $cgibindir = 'cgi-bin'; # CGI directory ***
47 $headerfile = 'header'; # HTML file for initial comments *
48 $indexfile = 'index'; # Symlink to $topfile *
49 $listsfile = 'lists'; # Mailing list description HTML file *
50 $outfile = 'tcsh.man'; # Default input file and copy of input file
51 $script = $whatami; # Copy of script; filename length must be OK
52 $topfile = 'top'; # Top-level HTML file *
54 # * .htm or .html suffix added later
55 # ** Only used with -i or -c
56 # *** Only used with -c
58 # Sections to inline in the top page
60 %inline_me = ('NAME', 1,
63 # Sections in which to put name anchors and the font in which to look for
64 # links to those anchors
66 %link_me = ('Editor commands', 'I',
67 'Builtin commands', 'I',
68 'Special aliases', 'I',
69 'Special shell variables', 'B',
73 ### Arguments and error-checking
77 while ($#ARGV > -1 && (($first, $rest) = ($ARGV[0] =~ /^-(.)(.*)/))) {
78 # Perl 5 lossage alert
79 if ($first =~ /[CdDGh]/) { # Switches with arguments
81 $arg = $rest ne '' ? $rest : $ARGV[0] ne '' ? shift :
82 &usage("$whatami: -$first requires an argument.\n");
83 } elsif ($rest ne '') {
88 if ($first eq '1') { $single = 1; }
89 elsif ($first eq 'c') { $cgibin = 1; }
90 elsif ($first eq 'C') { $cgibindir = $arg; }
91 elsif ($first eq 'd') { $updir = $arg; }
92 elsif ($first eq 'D') { $dir = $arg; }
93 elsif ($first eq 'G') { $cgifile = $arg; }
94 elsif ($first eq 'h') { $host = $arg; }
95 elsif ($first eq 'i') { $index = 1; }
96 elsif ($first eq 's') { $shortfiles = 1; }
97 elsif ($first eq 'u') { &usage(0); }
98 else { &usage("$whatami: -$first is not an option.\n"); }
103 $infile = $outfile; # Default input file if interactive
105 $infile = 'STDIN'; # Read STDIN if no args and not a tty
107 } elsif (@ARGV == 1) {
110 &usage("$whatami: Please specify one and only one file.\n");
113 $index = $index || $cgibin; # $index is true if $cgibin is true
115 if ($cgibin && ! $host) {
116 die "$whatami: Must specify host with -h if using -c.\n";
119 # Decide on HTML suffix and append it to filenames
121 $html = $shortfiles ? 'htm' : 'html'; # Max 3-character extension
122 $dir .= ".$html"; # Directory in which to put the pieces
123 $headerfile .= ".$html"; # HTML file for initial comments
124 $topfile .= ".$html"; # Top-level HTML file (or moved notice)
125 $indexfile .= ".$html"; # Symlink to $topfile
126 $listsfile .= ".$html"; # Mailing list description HTML file
128 # Check for input file
130 unless ($infile eq 'STDIN') {
131 die "$whatami: $infile doesn't exist!\n" unless -e $infile;
132 die "$whatami: $infile is unreadable!\n" unless -r _;
133 die "$whatami: $infile is empty!\n" unless -s _;
136 # Check for output directory and create if necessary
139 -d _ || die "$whatami: $dir is not a directory!\n";
140 -r _ && -w _ && -x _ || die "$whatami: $dir is inaccessible!\n"
142 mkdir($dir, 0755) || die "$whatami: Can't create $dir!\n";
147 if ($infile eq 'STDIN') {
150 open(MAN, $infile) || die "$whatami: Error opening $infile!\n";
155 # Print manpage to HTML directory (can't use cp if we're reading from STDIN)
157 open(MAN, ">$dir/$outfile") || die "$whatami: Can't open $dir/$outfile!\n";
161 # Copy script to HTML directory
163 (system("cp $0 $dir") >> 8) && die "$whatami: Can't copy $0 to $dir!\n";
165 # Link top.html to index.html in case someone looks at tcsh.html/
167 system("rm -f $dir/$indexfile"); # Some systems can't ln -sf
168 (system("ln -s $topfile $dir/$indexfile") >> 8)
169 && die "$whatami: Can't link $topfile to $dir/$indexfile!\n";
171 ### Get title and section headings
173 $comment = 0; # 0 for text, 1 for ignored text
174 @sectionlines = (0); # First line of section
175 @sectiontypes = (0); # H or S
176 @sectiontexts = ('Header'); # Text of section heading
177 @sectionfiles = ($headerfile); # Filename in which to store section
178 %name = (); # Array of name anchors
179 @name = () if $index; # Ordered array of name anchors
180 $font = ''; # '' to not make names, 'B' or 'I' to do so
184 if (/^\.ig/) { # Start ignoring
186 } elsif (/^\.\./) { # Stop ignoring
188 } elsif (! $comment) { # Not in .ig'ed section; do stuff
190 # nroff special characters
194 s/^\\'/'/; # leading ' escape
195 s/^\\(\s)/$1/; # leading space escape
196 s/\\(e|\\)/\\/g; # \e, \\; must do this after other escapes
198 # HTML special characters; deal with these before adding more
206 if (/^\.TH\s+(\w+)\s+(\w+)\s+\"([^\"]*)\"\s+\"([^\"]*)\"/) {
207 $title = "$1($2) $4 ($3) $1($2)";
210 # Build per-section info arrays
212 if (($type, $text) = /^\.S([HS])\s+\"?([^\"]*)\"?/) {
214 push(@sectionlines, $line); # Index of first line of section
215 push(@sectiontypes, $type eq 'H' ? 0 : 1); # Type of section
216 $text =~ s/\s*$//; # Remove trailing whitespace
217 push(@sectiontexts, $text); # Title of section (key for href)
218 $text =~ s/\s*\(\+\)$//; # Remove (+)
220 $file = $#sectionlines; # Short filenames; use number
222 $file = $text; # Long filenames; use title
223 $file =~ s/[\s\/]+/_/g; # Replace whitespace and / with _
225 $file .= ".$html" unless $single;
226 push(@sectionfiles, $file); # File in which to store section
227 $name{"$text B"} = ($single ? '#' : '') . $file;
228 # Index entry for &make_hrefs
229 push(@name, "$text\t" . $name{"$text B"}) if $index;
230 # Index entry for CGI script
231 # Look for anchors in the rest of this section if $link_me{$text}
232 # is non-null, and mark them with the font which is its value
234 $font = $link_me{$text};
236 &make_name(*name, *font, *file, *index, *_) if $font;
243 open(TOP, ">$dir/$topfile");
250 <TITLE>$title</TITLE>
258 # FORM block, if we're making an index
260 $action = $cgibin ? "http://$host/$cgibindir/$cgifile" : $cgifile;
262 print <<EOP if $index;
263 <FORM METHOD="GET" ACTION="$action">
264 Go directly to a section, command or variable: <INPUT NAME="input">
274 foreach $section (1 .. $#sectionlines) {
275 if ($sectiontypes[$section - 1] < $sectiontypes[$section]) {
276 print "</H2> <menu>\n"; # Indent, smaller font
277 } elsif ($sectiontypes[$section - 1] > $sectiontypes[$section]) {
278 print "</menu> <H2>\n"; # Outdent, larger font
280 if ($inline_me{$sectiontexts[$section]}) { # Section is in %inline_me
282 # Print section inline
284 print "$sectiontexts[$section]\n";
285 print "</H2> <menu>\n"; # Indent, smaller font
286 &printsectionbody(*man, *sectionlines, *section, *name);
287 print "</menu> <H2>\n"; # Outdent, larger font
290 # Print link to section
292 print "<A HREF=\"", $single ? '#' : '',
293 "$sectionfiles[$section]\">$sectiontexts[$section]</A><BR>\n";
301 print "<HR>\n" if $single;
305 foreach $section (0 .. $#sectionlines) {
307 # Skip inlined sections
309 next if $inline_me{$sectiontexts[$section]};
315 print <<EOP if $section; # Skip header section
316 <H2><A NAME="$sectionfiles[$section]">$sectiontexts[$section]</A></H2>
319 &printsectionbody(*man, *sectionlines, *section, *name);
320 print <<EOP if $section; # Skip header section
321 <A HREF="#top">Table of Contents</A>
327 # Make pointer line for header and trailer
329 $pointers = "<A HREF=\"$topfile\">Up</A>";
330 $pointers .= "\n<A HREF=\"$sectionfiles[$section + 1]\">Next</A>"
331 if ($section < $#sectionlines) &&
332 ! $inline_me{$sectiontexts[$section + 1]};
333 $pointers .= "\n<A HREF=\"$sectionfiles[$section - 1]\">Previous</A>"
334 if ($section > 1) && # section 0 is initial comments
335 ! $inline_me{$sectiontexts[$section - 1]};
339 open(OUT, ">$dir/$sectionfiles[$section]");
343 <TITLE>$sectiontexts[$section]</TITLE>
347 <H2>$sectiontexts[$section]</H2>
349 &printsectionbody(*man, *sectionlines, *section, *name);
361 select TOP unless $single;
368 Here are the <A HREF="$outfile">nroff manpage</A> (175K)
369 from which this HTML version was generated,
370 the <A HREF="$script">Perl script</A> which did the conversion
371 and the <A HREF="ftp://ftp.astron.com/pub/tcsh/">
372 complete source code</A> for <I>tcsh</I>.
374 <I>tcsh</I> is maintained by
375 Christos Zoulas <A HREF="mailto:christos\@gw.com"><christos\@gw.com></A>
376 and the <A HREF="$listsfile"><I>tcsh</I> maintainers' mailing list</A>.
377 Dave Schweisguth <A HREF="mailto:dcs\@proton.chem.yale.edu"><dcs\@proton.chem.yale.edu></A>
378 wrote the manpage and the HTML conversion script.
386 open(LISTS, ">$dir/$listsfile");
388 while(($_ = <DATA>) ne "END\n") { # Text stored after __END__
389 s/TOPFILEHERE/$topfile/;
394 ### Make search script
398 # URL of $dir; see comments in search script
401 ? "'http://$host/" . ($updir ? "$updir/" : '') . "$dir/'"
402 : '"http://$ENV{\'SERVER_NAME\'}:$ENV{\'SERVER_PORT\'}" . (($_ = $ENV{\'SCRIPT_NAME\'}) =~ s|[^/]*$||, $_)';
404 # String for passing @name to search script
406 $name = join("',\n'", @name);
408 open(TOP, ">$dir/$cgifile");
410 while(($_ = <DATA>) ne "END\n") { # Text stored after __END__
413 s/TOPFILEHERE/$topfile/;
417 chmod(0755, "$dir/$cgifile") ||
418 die "$whatami: Can't chmod 0755 $dir/$cgifile!\n";
419 warn "$whatami: Don't forget to move $dir/$cgifile to /$cgibindir.\n"
423 ### That's all, folks
429 # Process and print the body of a section
431 sub printsectionbody {
433 local(*man, *sectionlines, *sline, *name) = @_; # Number of section
434 local($sfirst, $slast, @paralines, @paratypes, $comment, $dl, $pline,
435 $comment, $pfirst, $plast, @para, @tag, $changeindent);
437 # Define section boundaries
439 $sfirst = $sectionlines[$sline] + 1;
440 if ($sline == $#sectionlines) {
443 $slast = $sectionlines[$sline + 1] - 1;
446 # Find paragraph markers, ignoring those between '.ig' and '..'
448 if ($man[$sfirst] =~ /^\.[PIT]P/) {
452 @paralines = ($sfirst - 1); # .P follows .S[HS] by default
456 foreach ($sfirst .. $slast) {
457 if ($man[$_] =~ /^\.ig/) { # Start ignoring
459 } elsif ($man[$_] =~ /^\.\./) { # Stop ignoring
461 } elsif (! $comment && $man[$_] =~ /^\.([PIT])P/) {
462 push(@paralines, $_);
463 push(@paratypes, $1);
471 foreach $pline (0 .. $#paralines) {
476 # Define para boundaries
478 $pfirst = $paralines[$pline] + 1;
479 if ($pline == $#paralines) {
482 $plast = $paralines[$pline + 1] - 1;
485 foreach (@man[$pfirst .. $plast]) {
486 if (/^\.ig/) { # nroff begin ignore
489 push(@para, "<!--\n");
490 } elsif ($comment == 1) {
492 } elsif ($comment == 2) {
493 s/--/-/g; # Remove double-dashes in comments
496 } elsif (/^\.\./) { # nroff end ignore
499 } elsif ($comment == 1) {
501 } elsif ($comment == 2) {
504 } elsif (/^\.\\\"/) { # nroff comment
507 push(@para, "<!--\n");
509 } elsif ($comment == 1) {
511 } elsif ($comment == 2) {
514 s/--/-/g; # Remove double-dashes in comments
516 } else { # Nothing to do with comments
519 } elsif ($comment == 1) {
521 push(@para, "-->\n");
522 } elsif ($comment == 2) {
523 s/--/-/g; # Remove double-dashes in comments
528 if (/^\.TH/) { # Title; got this already
530 } elsif (/^\.PD/) { # Para spacing; unimplemented
532 } elsif (/^\.RS/) { # Indent (one width only)
535 } elsif (/^\.RE/) { # Outdent
543 # More nroff special characters
545 s/^\\&\;//; # leading dot escape; save until
546 # now so leading dots aren't
547 # confused with ends of .igs
549 &make_hrefs(*name, *_);
555 push(@para, "-->\n") if $comment; # Close open comment
559 if ($paratypes[$pline] eq 'P') {
562 } elsif ($paratypes[$pline] eq 'I') {
571 print "<DL compact>\n" unless $dl;
576 if ($pline == $#paratypes || $paratypes[$pline + 1] ne 'T') {
577 # Perl 5 lossage alert
578 # Next para is not a definition list
579 $dl = 0; # Close open definition list
582 $dl = 1; # Leave definition list open
587 # Indent/outdent the *next* para
589 while ($changeindent > 0) {
593 while ($changeindent < 0) {
601 # Make one name anchor in a line; cue on fonts (.B or .I) but leave them alone
605 local(*name, *font, *file, *index, *line) = @_;
608 if (($text) = ($line =~ /^\.[BI]\s+([^\s\\]+)/)) { # Found pattern
611 $text !~ /^-/ # Avoid lists of options
612 && (length($text) > 1 # and history escapes
613 || $text =~ /^[%:@]$/) # Special pleading for %, :, @
614 && ! $name{"$text $font"} # Skip if there's one already
618 $name{"$text $font"} = ($single ? '' : $file) . "#$text";
619 push(@name, "$text\t" . $name{"$text $font"}) if $index;
621 # Put in the name anchor
623 $line =~ s/^(\.[BI]\s+)([^\s\\]+)/$1<A NAME=\"$text\">$2<\/A>/;
629 # Make all the href anchors in a line; cue on fonts (\fB ... \fR or
630 # \fI ... \fR) but leave them alone
634 local(*name, *line) = @_;
635 local(@pieces, $piece);
637 @pieces = split(/(\\f[BI][^\\]*\\fR)/, $line);
641 if (/\\f([BI])([^\\]*)\\fR/ # Found a possibility
643 # It's not followed by (, i.e. it's not a manpage reference
645 && substr($pieces[$piece + 1], 0, 1) ne '(') {
647 if ($name{$key}) { # If there's a matching name
648 s/(\\f[BI])([^\\]*)(\\fR)/$1<A HREF=\"$name{$key}\">$2<\/A>$3/;
653 $line = join('', @pieces);
656 # Convert nroff font escapes to HTML
657 # Expects comments and breaks to be in HTML form already
662 local($i, $j, @begin, @end, $part, @pieces, $bold, $italic);
664 return 0 if $#para == -1; # Ignore empty paragraphs
665 # Perl 5 lossage alert
667 # Find beginning and end of each part between HTML comments
673 push(@begin, $i + 1) if /^-->/ || /^<BR>/;
674 push(@end, $i - 1) if /^<!--/ || /^<BR>/;
677 if ($para[0] =~ /^<!--/ || $para[0] =~ /^<BR>/) {
680 unshift(@begin, 0); # Begin at the beginning
682 if ($para[$#para] =~ /^-->/ || $para[$#para] =~ /^<BR>/) {
685 push(@end, $#para); # End at the end
691 foreach $i (0 .. $#begin) {
692 $part = join('', @para[$begin[$i] .. $end[$i]]);
693 $part =~ s/^\.([BI])\s+(.*)$/\\f$1$2\\fR/gm; # .B, .I
694 @pieces = split(/(\\f[BIR])/m, $part);
696 foreach $j (@pieces) {
706 } elsif ($j eq '\fI') {
715 } elsif ($j eq '\fR') {
728 # Close bold/italic before break
730 if ($end[$i] == $#para || $para[$end[$i] + 1] =~ /^<BR>/) {
731 # Perl 5 lossage alert
734 $part =~ s/(\n)?$/<\/B>$1\n/;
737 $part =~ s/(\n)?$/<\/I>$1\n/;
741 # Rebuild this section of @para
743 foreach $j ($begin[$i] .. $end[$i]) {
744 $part =~ s/^([^\n]*(\n|$))//;
749 # Close bold/italic on last non-comment line
750 # Do this only here because fonts pass through comments
752 $para[$end[$#end]] =~ s/(\n)?$/<\/B>$1/ if $bold;
753 $para[$end[$#end]] =~ s/(\n)?$/<\/I>$1/ if $italic;
757 local ($message) = $_[0];
759 warn $message if $message;
761 Usage: $whatami [-1icsu] [-C dir] [-d dir] [-h host] [file]
762 Without [file], reads from tcsh.man or stdin.
763 -1 Makes a single page instead of a table of contents and sections
764 -i Makes a CGI searchable index script, tcsh.html/tcsh.cgi, intended
765 for a server which respects the .cgi extension in any directory.
766 -c Like -i, but the CGI script is intended for a server which wants
767 scripts in /cgi-bin (or some other privileged directory separate
768 from the rest of the HTML) and must be moved there by hand.
769 -C dir Uses /dir instead of /cgi-bin as the CGI bin dir.
770 Meaningless without -c.
771 -d dir Uses /dir/tcsh.html instead of /tcsh.html as the HTML dir.
772 Meaningless without -c.
773 -D dir Uses /dir.html instead of /tcsh.html as the HTML dir.
774 Meaningless without -c.
775 -G name Uses name instead of tcsh.cgi as the name of the CGI script.
776 Meaningless without -c or -i.
777 -h host Uses host as the host:port part of the URL to the entry point.
778 Meaningless without -c.
779 -s Filenames are shorter (max 8 + 3) but less descriptive.
785 ### Inlined documents. Watch for *HERE tokens.
789 <TITLE>The tcsh mailing lists</TITLE>
792 <A HREF="TOPFILEHERE">Up</A>
793 <H2>The <I>tcsh</I> mailing lists</H2>
794 There are three <I>tcsh</I> mailing lists:
797 <I>tcsh@mx.gw.com</I>
799 The <I>tcsh</I> maintainers and testers' mailing list.
801 <I>tcsh-bugs@mx.gw.com</I>
803 Open bug and user comment discussion.
805 You can subscribe to either of these lists by visiting
806 <I><A HREF="http://mx.gw.com/">http://mx.gw.com/</A></I>
808 To file a bug report or a feature suggestion (preferably
809 with code), please visit
810 <I><A HREF="http://bugs.gw.com/">http://bugs.gw.com/</A></I>
812 <A HREF="TOPFILEHERE">Up</A>
817 # Emulate #!/usr/local/bin/perl on systems without #!
819 eval '(exit $?0)' && eval 'exec perl -S $0 ${1+"$@"}'
820 & eval 'exec perl -S $0 $argv:q' if 0;
824 # Location: doesn't work with relative URLs, so we need to know where to find
825 # the top and section files.
826 # If the search engine is in /cgi-bin, we need a hard-coded URL.
827 # If the search engine is in the same directory, we can figure it out from CGI
828 # environment variables.
831 $topfile = 'TOPFILEHERE';
838 $input = $ENV{'QUERY_STRING'};
839 $input =~ s/^input=//;
841 print "Status: 302 Found\n";
842 if ($input ne '' && ($key = (grep(/^$input/, @name))[0] ||
843 (grep(/^$input/i, @name))[0] ||
844 (grep( /$input/i, @name))[0] )) {
845 $key =~ /\t([^\t]*)$/;
846 print "Location: $root$1\n\n";
848 print "Location: $root$topfile\n\n";