2 # Emacs should use -*- cperl -*- mode
4 # Copyright (c) 2003-2006 Simon L. Nielsen <simon@FreeBSD.org>
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions
10 # 1. Redistributions of source code must retain the above copyright
11 # notice, this list of conditions and the following disclaimer.
12 # 2. Redistributions in binary form must reproduce the above copyright
13 # notice, this list of conditions and the following disclaimer in the
14 # documentation and/or other materials provided with the distribution.
16 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 # Parse the list of supported hardware out of section 4 manual pages
32 # and output it on stdout as SGML/DocBook entities.
34 # The script will look for the following line in the manual page:
36 # and make an entity of the content until the line containing:
39 # For Lists only the first line will be printed. If there are
40 # arguments to the .It command, only the argument will be printed.
43 # man2hwnotes.pl [-cl] [-d 0-6] [-a <archlist file>] [-o <outputfile>]
44 # <manualpage> [<manualpage> ...]
48 use Digest::MD5 qw(md5_hex);
50 # Section from manual page to extract
51 my $hwlist_sect = "HARDWARE";
53 # Override default archtecture list for some devices:
54 my $archlist_file = "dev.archlist.txt";
58 my $compat_mode = 0; # Enable compat for old Hardware Notes style
60 my $only_list_out = 0; # Should only lists be generated in the output?
61 my @out_lines; # Single lines
62 my @out_dev; # Device entities
66 if (!getopts("a:cd:lo:",\%options)) {
67 die("$!: Invalid command line arguments in ", __LINE__, "\n");
70 if (defined($options{c})) {
73 if (defined($options{d})) {
74 $debuglevel = $options{d};
76 if (defined($options{a})) {
77 $archlist_file = $options{a};
79 if (defined($options{l})) {
83 my $outputfile = $options{o};
85 if ($debuglevel > 0) {
86 # Don't do output buffering in debug mode.
90 load_archlist($archlist_file);
92 if (defined($outputfile)) {
93 open(OLDOUT, ">&STDOUT") || die("$!: Could not open STDOUT in ", __LINE__, ".\n");
94 open(STDOUT, ">$outputfile") || die("$!: Could not open $outputfile in ", __LINE__, ".\n");
99 These are automatically generated device lists for FreeBSD hardware notes.
103 if ($only_list_out) {
104 # Print the default device preamble entities
105 print "<!ENTITY hwlist.preamble.pre 'The'>\n";
106 print "<!ENTITY hwlist.preamble.post 'driver supports:'>\n";
109 foreach my $page (@ARGV) {
110 if ($page !~ m/\.4$/) {
111 dlog(2, "Skipped $page (not *.4)");
114 dlog(2, "Parsing $page");
118 print join("\n", @out_lines), "\n";
121 print join("\n", @out_dev), "\n";
128 if (defined($outputfile)) {
129 open(STDOUT, ">&OLDOUT") || die("$!: Could not open STDOUT in ", __LINE__, ".\n");
130 close(OLDOUT) || die("$!: Could not close OLDOUT in ", __LINE__, ".\n");
136 foreach my $l (@lines) {
138 $l =~ s:([\x21-\x2f\x5b-\x60\x7b-\x7f]):sprintf("&\#\%d;", ord($1)):eg;
139 # Make sure ampersand is encoded as & since jade seems to
140 # be confused when it is encoded as & inside an entity.
141 $l =~ s/&/&/g;
143 return (wantarray) ? @lines : join "", @lines;
150 my $found_hwlist = 0;
152 $mdocvars{isin_hwlist} = 0;
153 $mdocvars{isin_list} = 0;
154 $mdocvars{first_para} = 1;
155 $mdocvars{parabuf} = "";
156 $mdocvars{listtype} = "";
157 $mdocvars{it_nr} = 0;
159 open(MANPAGE, "$manpage") || die("$!: Could not open $manpage in ", __LINE__, ".\n");
164 dlog(5, "Read '$line'");
167 if (s/^\.(.*)$/$1/) {
170 # Detect, and ignore, comment lines
171 if (s/^\\"(.*)$/$1/) {
175 $cmd =~ s/^([^ ]+).*$/$1/;
177 if (/^Nm "?(\w+)"?/ && !defined($mdocvars{Nm})) {
178 dlog(3, "Setting Nm to $1");
180 # "_" cannot be used for an entity name.
181 $mdocvars{EntNm} = $1;
182 $mdocvars{EntNm} =~ s,_,.,g;
185 if (defined($mdocvars{Nm}) && $mdocvars{Nm} ne "") {
186 parabuf_addline(\%mdocvars, "&man.".$mdocvars{EntNm}.".$cur_mansection;");
188 dlog(2, "Warning: Bad Nm call in $manpage");
191 } elsif (/^Sh (.+)$/) {
192 dlog(4, "Setting section to $1");
193 my $cur_section = $1;
195 flush_out(\%mdocvars);
197 if ($cur_section =~ /^${hwlist_sect}$/) {
198 dlog(2, "Found the device section ${hwlist_sect}");
199 $mdocvars{isin_hwlist} = 1;
201 add_sgmltag(\%mdocvars, "<!ENTITY hwlist.".$mdocvars{cur_manname}." '");
202 if ($only_list_out) {
203 add_sgmltag("<para xmlns=\"http://docbook.org/ns/docbook\">&hwlist.preamble.pre; " .
204 "&man.".$mdocvars{EntNm}.".$cur_mansection; " .
205 "&hwlist.preamble.post;</para>");
207 } elsif ($mdocvars{isin_hwlist}) {
208 dlog(2, "Found a HWLIST STOP key!");
209 add_sgmltag(\%mdocvars, "'>");
210 $mdocvars{isin_hwlist} = 0;
212 if ($mdocvars{isin_list}) {
213 dlog(1, "Warning: Still in list, but just entered new " .
214 "section. This is probably due to missing .El; " .
215 "check manual page for errors.");
216 # If we try to recover from this we will probably
217 # just end with bad SGML output and it really
218 # should be fixed in the manual page so we don't
219 # even try to "fix" this.
223 } elsif (/^Dt ([^ ]+) ([^ ]+)/) {
224 dlog(4, "Setting mansection to $2");
225 $mdocvars{cur_manname} = lc($1);
226 $cur_mansection = $2;
228 # "_" cannot be used for an entity name.
229 $mdocvars{cur_manname} =~ s,_,.,g;
231 } elsif (/^It ?(.*)$/) {
237 if ($mdocvars{parabuf} ne "") {
238 add_listitem(\%mdocvars);
241 # Remove quotes, if any.
242 $txt =~ s/"(.*)"/$1/;
244 if ($mdocvars{listtype} eq "column") {
245 # Ignore first item when it is likely to be a
247 if ($mdocvars{it_nr} == 1 && $txt =~ m/^(Em|Sy) /) {
248 dlog(2, "Skipping header line in column list");
251 # Only extract the first column.
253 $txt =~ s/([^\t]+)\t.*/$1/;
259 parabuf_addline(\%mdocvars, normalize($txt));
261 $mdocvars{isin_list} = 1;
262 flush_out(\%mdocvars);
263 add_sgmltag(\%mdocvars, "<itemizedlist xmlns=\"http://docbook.org/ns/docbook\">");
266 $mdocvars{listtype} = "tag";
267 # YACK! Hack for ata(4)
268 if ($mdocvars{Nm} eq "ata") {
269 $mdocvars{listtype} = "tagHACK";
271 } elsif (/-bullet/) {
272 $mdocvars{listtype} = "bullet";
273 } elsif (/-column/) {
274 $mdocvars{listtype} = "column";
276 $mdocvars{listtype} = "unknown";
278 dlog(2, "Listtype set to $mdocvars{listtype}");
280 if ($mdocvars{parabuf} ne "") {
281 add_listitem(\%mdocvars);
284 add_sgmltag(\%mdocvars, "</itemizedlist>");
285 $mdocvars{isin_list} = 0;
286 } elsif (/^Tn (.+)$/) {
287 # For now we print TradeName text as regular text.
288 my ($txt, $punct_str) = split_punct_chars($1);
290 parabuf_addline(\%mdocvars, normalize($txt . $punct_str));
291 } elsif (/^Xr ([^ ]+) (.+)$/) {
292 my ($xr_sect, $punct_str) = split_punct_chars($2);
295 # We need to check if the manual page exist to avoid
296 # breaking the doc build just because of a broken
298 #$txt = "&man.$1.$xr_sect;$punct_str";
299 $txt = "$1($xr_sect)$punct_str";
300 parabuf_addline(\%mdocvars, normalize($txt));
301 } elsif (/^Dq (.+)$/) {
302 my ($txt, $punct_str) = split_punct_chars($1);
304 parabuf_addline(\%mdocvars,
305 normalize("<quote xmlns=\"http://docbook.org/ns/docbook\">$txt</quote>$punct_str"));
306 } elsif (/^Sx (.+)$/) {
307 if ($mdocvars{isin_hwlist}) {
308 dlog(1, "Warning: Reference to another section in the " .
309 "$hwlist_sect section in " . $mdocvars{Nm} .
310 "(${cur_mansection})");
312 parabuf_addline(\%mdocvars, normalize($1));
313 } elsif (/^Pa (.+)$/) {
314 my ($txt, $punct_str) = split_punct_chars($1);
316 $txt = make_ulink($txt) . $punct_str;
317 parabuf_addline(\%mdocvars, normalize($txt));
319 dlog(3, "Got Pp command - forcing new para");
320 flush_out(\%mdocvars);
321 } elsif (/^Fx (.+)/) {
322 dlog(3, "Got Fx command");
323 parabuf_addline(\%mdocvars, "FreeBSD $1");
325 dlog(3, "Got Fx command");
326 parabuf_addline(\%mdocvars, "FreeBSD");
327 } elsif (/^Em (.+)$/) {
328 my ($txt, $punct_str) = split_punct_chars($1);
330 parabuf_addline(\%mdocvars,
331 normalize("<emphasis xmlns=\"http://docbook.org/ns/docbook\">$txt</emphasis>$punct_str"));
333 # Ignore all other commands.
334 dlog(3, "Ignoring unknown command $cmd");
337 # This is then regular text
338 parabuf_addline(\%mdocvars, normalize($_));
341 close(MANPAGE) || die("$!: Could not close $manpage in ", __LINE__, ".\n");
342 if (! $found_hwlist) {
343 dlog(2, "Hardware list not found in $manpage");
348 my ($level, $txt) = @_;
350 if ($level <= $debuglevel) {
351 print STDERR "$level: $txt\n";
357 my ($mdocvars, $txt) = (@_);
359 # We only care about the HW list for now.
360 if (${$mdocvars}{isin_hwlist}) {
361 push(@out_dev, $txt);
365 # Add a text entity, and return the used entity name.
367 my ($itemtxt) = (@_);
370 # Convert mdoc(7) minus
371 $itemtxt =~ s/\\-/-/g;
373 $itemtxt =~ s/'/‘/g;
375 $entity_name = "hwlist." . md5_hex($itemtxt);
376 dlog(4, "Adding '$itemtxt' as entity $entity_name");
377 push(@out_lines, "<!ENTITY $entity_name '$itemtxt'>");
379 return ($entity_name);
382 my ($mdocvars) = (@_);
383 my ($entity_name, $out);
386 if (!${$mdocvars}{isin_hwlist} || ${$mdocvars}{parabuf} eq "") {
390 $entity_name = add_txt_ent(${$mdocvars}{parabuf});
391 ${$mdocvars}{parabuf} = "";
392 if(defined($archlist{${$mdocvars}{Nm}})) {
394 $para_arch = ' arch="' . $archlist{${$mdocvars}{Nm}} . '"';
396 $para_arch = '[' . $archlist{${$mdocvars}{Nm}} . '] ';
400 $out = "<para xmlns=\"http://docbook.org/ns/docbook\"".$para_arch.">&".$entity_name.";</para>";
402 if (${$mdocvars}{first_para}) {
403 $out = "<para xmlns=\"http://docbook.org/ns/docbook\">".$para_arch."&".$entity_name.";</para>";
405 $out = "<para xmlns=\"http://docbook.org/ns/docbook\">&".$entity_name.";</para>";
407 ${$mdocvars}{first_para} = 0;
410 dlog(4, "Flushing parabuf");
411 add_sgmltag($mdocvars, $out);
414 # Add a new list item from the "parabuf".
416 my ($mdocvars) = (@_);
417 my ($listitem, $entity_name);
420 $entity_name = add_txt_ent(${$mdocvars}{parabuf});
421 ${$mdocvars}{parabuf} = "";
424 if(defined($archlist{${$mdocvars}{Nm}})) {
425 $para_arch = ' arch="' . $archlist{${$mdocvars}{Nm}} . '"';
428 $listitem = "<listitem><para".$para_arch.">&".$entity_name.";</para></listitem>";
429 dlog(4, "Adding '$listitem' to out_dev");
430 push(@out_dev, $listitem);
434 # Add a line to the "paragraph buffer"
435 sub parabuf_addline {
436 my $mdocvars = shift;
439 dlog(5, "Now in parabuf_addline for '$txt'");
441 # We only care about the HW list for now.
442 if (!${$mdocvars}{isin_hwlist}) {
443 dlog(6, "Exiting parabuf_addline due to: !\${\$mdocvars}{isin_hwlist}");
447 dlog(6, "Exiting parabuf_addline due to: \$txt eq \"\"");
451 if ($only_list_out && !${$mdocvars}{isin_list}) {
452 dlog(6, "Exiting parabuf_addline due to: ".
453 "\$only_list_out && !\${\$mdocvars}{isin_list}");
457 # We only add the first line for "tag" lists
458 if (${$mdocvars}{parabuf} ne "" && ${$mdocvars}{isin_list} &&
459 ${$mdocvars}{listtype} eq "tag") {
460 dlog(6, "Exiting parabuf_addline due to: ".
461 "\${\$mdocvars}{parabuf} ne \"\" && \${\$mdocvars}{isin_list} && ".
462 "\${\$mdocvars}{listtype} eq \"tag\"");
466 if (${$mdocvars}{parabuf} ne "") {
467 ${$mdocvars}{parabuf} .= " ";
470 dlog(4, "Adding '$txt' to parabuf");
472 ${$mdocvars}{parabuf} .= $txt;
480 dlog(2, "Parsing archlist $file");
482 open(FILE, "$file") || die("$!: Could not open archlist $file in ", __LINE__, ".\n");
487 if (/^#/ || $_ eq "") {
491 if (/(\w+)\t([\w,]+)/) {
492 dlog(4, "For driver $1 setting arch to $2");
495 dlog(1, "Warning: Could not parse archlist line $lineno");
502 # Check if a character is a mdoc(7) punctuation character.
506 return (length($str) == 1 && $str =~ /[\.,:;()\[\]\?!]/);
509 # Split out the punctuation characters of a mdoc(7) line.
510 sub split_punct_chars {
512 my (@stritems, $stritem, $punct_str);
515 @stritems = split(/ /, $str);
517 while (defined($stritem = $stritems[$#stritems]) &&
518 is_punct_char($stritem)) {
519 $punct_str = $stritem . $punct_str;
523 return (join(' ', @stritems), $punct_str);
526 # Create a ulink, if the string contains an URL.
530 $str =~ s,(http://[^ ]+),<link xmlns=\"http://docbook.org/ns/docbook\" xlink:href="$1"></link>,;