3 Mdoc - perl module to parse Mdoc macros
7 use Mdoc qw(ns pp soff son stoggle mapwords);
9 See mdoc2man and mdoc2texi for code examples.
15 =item def_macro( NAME, CODE, [ raw => 1, greedy => 1, concat_until => '.Xx' ] )
17 Define new macro. The CODE reference will be called by call_macro(). You can
18 have two distinct definitions for and inline macro and for a standalone macro
19 (i. e. 'Pa' and '.Pa').
21 The CODE reference is passed a list of arguments and is expected to return list
22 of strings and control characters (see C<CONSTANTS>).
24 By default the surrouding "" from arguments to macros are removed, use C<raw>
27 Normaly CODE reference is passed all arguments up to next nested macro. Set
28 C<greedy> to to pass everything up to the end of the line.
30 If the concat_until is present, the line is concated until the .Xx macro is
31 found. For example the following macro definition
33 def_macro('.Oo', gen_encloser(qw([ ]), concat_until => '.Oc' }
34 def_macro('.Cm', sub { mapwords {'($_)'} @_ } }
36 and the following input
43 results in [(foo) | (bar)]
45 =item get_macro( NAME )
47 Returns a hash reference like:
49 { run => CODE, raw => [1|0], greedy => [1|0] }
51 Where C<CODE> is the CODE reference used to define macro called C<NAME>
53 =item parse_line( INPUT, OUTPUT_CODE, PREPROCESS_CODE )
55 Parse a line from the C<INPUT> filehandle. If a macro was detected it returns a
56 list (MACRO_NAME, @MACRO_ARGS), otherwise it calls the C<OUTPUT_CODE>, giving
57 caller a chance to modify line before printing it. If C<PREPROCESS_CODE> is
58 defined it calls it prior to passing argument to a macro, giving caller a
59 chance to alter them. if EOF was reached undef is returned.
61 =item call_macro( MACRO, ARGS, ... )
63 Call macro C<MACRO> with C<ARGS>. The CODE reference for macro C<MACRO> is
64 called and for all the nested macros. Every called macro returns a list which
65 is appended to return value and returned when all nested macros are processed.
66 Use to_string() to produce a printable string from the list.
68 =item to_string ( LIST )
70 Processes C<LIST> returned from call_macro() and returns formatted string.
72 =item mapwords BLOCK ARRAY
74 This is like perl's map only it calls BLOCK only on elements which are not
75 punctuation or control characters.
77 =item space ( ['on'|'off] )
79 Turn spacing on or off. If called without argument it returns the current state.
81 =item gen_encloser ( START, END )
83 Helper function for generating macros that enclose their arguments.
84 gen_encloser(qw({ }));
86 sub { '{', ns, @_, ns, pp('}')}
88 =item set_Bl_callback( CODE , DEFS )
90 This module implements the Bl/El macros for you. Using set_Bl_callback you can
91 provide a macro definition that should be executed on a .Bl call.
93 =item set_El_callback( CODE , DEFS )
95 This module implements the Bl/El macros for you. Using set_El_callback you can
96 provide a macro definition that should be executed on a .El call.
98 =item set_Re_callback( CODE )
100 The C<CODE> is called after a Rs/Re block is done. With a hash reference as a
101 parameter, describing the reference.
111 Indicate 'no space' between to members of the list.
115 The string is 'punctuation point'. It means that every punctuation
116 preceeding that element is put behind it.
132 Print space no matter spacing mode.
138 * The concat_until only works with standalone macros. This means that
140 will hang until .Pc in encountered.
142 * Provide default macros for Bd/Ed
144 * The reference implementation is uncomplete
151 use List::Util qw(reduce);
152 use Text::ParseWords qw(quotewords);
154 use Exporter qw(import);
155 our @EXPORT_OK = qw(ns pp soff son stoggle hs mapwords gen_encloser nl);
159 soff => ['spaceoff'],
161 stoggle => ['spacetoggle'],
171 return sub { ($o, ns, @_, ns, pp($c)) };
179 push @res, $el =~ /^(?:[,\.\{\}\(\):;\[\]\|])$/ || ref $el eq 'ARRAY' ?
187 ###############################################################################
189 # Default macro definitions start
191 ###############################################################################
193 def_macro('Xo', sub { @_ }, concat_until => '.Xc');
195 def_macro('.Ns', sub {ns, @_});
196 def_macro('Ns', sub {ns, @_});
200 def_macro('.Rs', sub { () } );
201 def_macro('.%A', sub {
202 if ($reference{authors}) {
203 $reference{authors} .= " and @_"
206 $reference{authors} = "@_";
210 def_macro('.%T', sub { $reference{title} = "@_"; () } );
211 def_macro('.%O', sub { $reference{optional} = "@_"; () } );
213 sub set_Re_callback {
215 croak 'Not a CODE reference' if not ref $sub eq 'CODE';
216 def_macro('.Re', sub {
217 my @ret = $sub->(\%reference);
218 %reference = (); @ret
224 def_macro('.Bl', sub { die '.Bl - no list callback set' });
225 def_macro('.It', sub { die ".It called outside of list context - maybe near line $." });
226 def_macro('.El', sub { die '.El requires .Bl first' });
230 my $elcb = sub { () };
232 sub set_El_callback {
234 croak 'Not a CODE reference' if ref $sub ne 'CODE';
239 sub set_Bl_callback {
240 my ($blcb, %defs) = @_;
241 croak 'Not a CODE reference' if ref $blcb ne 'CODE';
242 def_macro('.Bl', sub {
244 my $orig_it = get_macro('.It');
245 my $orig_el = get_macro('.El');
246 my $orig_bl = get_macro('.Bl');
247 my $orig_elcb = $elcb;
249 # Restore previous .It and .El on each .El
250 def_macro('.El', sub {
251 def_macro('.El', delete $orig_el->{run}, %$orig_el);
252 def_macro('.It', delete $orig_it->{run}, %$orig_it);
253 def_macro('.Bl', delete $orig_bl->{run}, %$orig_bl);
254 my @ret = $elcb->(@_);
264 def_macro('.Sm', sub {
275 def_macro('Sm', do { my $off; sub {
277 if (defined $arg && $arg =~ /^(on|off)$/) {
279 if ($arg eq 'off') { soff, @_; }
280 elsif ($arg eq 'on') { son, @_; }
287 ###############################################################################
289 # Default macro definitions end
291 ###############################################################################
294 croak 'Odd number of elements for hash argument <'.(scalar @_).'>' if @_%2;
295 my ($macro, $sub, %def) = @_;
296 croak 'Not a CODE reference' if ref $sub ne 'CODE';
298 $macros{ $macro } = {
300 greedy => delete $def{greedy} || 0,
301 raw => delete $def{raw} || 0,
302 concat_until => delete $def{concat_until},
304 if ($macros{ $macro }{concat_until}) {
305 $macros{ $macros{ $macro }{concat_until} } = { run => sub { @_ } };
306 $macros{ $macro }{greedy} = 1;
313 croak "Macro <$macro> not defined" if not exists $macros{ $macro };
314 +{ %{ $macros{ $macro } } }
325 $last = _unquote($_);
328 $args{$last} = _unquote($_) if $last;
336 my ($el, $expected) = @_;
337 if (defined $expected) {
338 ref $el eq 'ARRAY' and $el->[0] eq $expected;
350 # Handle punctunation
351 my ($in_brace, @punct) = '';
354 ($in_brace = $1) =~ tr/([/)]/;
357 elsif (/^([\)\]])$/ && $in_brace eq $1) {
361 elsif ($_ =~ /^[,\.;:\?\!\)\]]$/) {
365 elsif (_is_control($_, 'pp')) {
368 elsif (_is_control($_)) {
377 # Produce string out of an array dealing with the special control characters
378 # space('off') must but one character delayed
379 my ($no_space, $space_off) = 1;
381 while (defined(my $el = shift @new)) {
382 if (_is_control($el, 'hardspace')) { $no_space = 1; $res .= ' ' }
383 elsif (_is_control($el, 'nospace')) { $no_space = 1; }
384 elsif (_is_control($el, 'spaceoff')) { $space_off = 1; }
385 elsif (_is_control($el, 'spaceon')) { space('on'); }
386 elsif (_is_control($el, 'spacetoggle')) { space() eq 'on' ?
398 if ($space_off) { space('off'); $space_off = 0; }
410 if (defined $arg && $arg =~ /^(on|off)$/) {
411 $sep = ' ' if $arg eq 'on';
412 $sep = '' if $arg eq 'off';
416 return $sep eq '' ? 'off' : 'on';
423 $_ =~ s/^"([^"]+)"$/$1/g for @args;
424 wantarray ? @args : $args[0];
428 my ($macro, @args) = @_;
434 @args = _unquote(@args) if (!$macros{ $macro }{raw});
436 # Call any callable macros in the argument list
438 if ($_ =~ /^[A-Z][a-z]+$/ && exists $macros{ $_ }) {
439 push @ret, call_macro($_, @args[$i+1 .. $#args]);
442 if ($macros{ $macro }{greedy}) {
452 if ($macros{ $macro }{concat_until}) {
453 my ($n_macro, @n_args) = ('');
455 die "EOF was reached and no $macros{ $macro }{concat_until} found"
456 if not defined $n_macro;
457 ($n_macro, @n_args) = parse_line(undef, sub { push @ret, shift });
458 if ($n_macro eq $macros{ $macro }{concat_until}) {
459 push @ret, call_macro($n_macro, @n_args);
464 push @ret, call_macro($n_macro, @n_args) if exists $macros{ $n_macro };
469 if ($macros{ $macro }{greedy}) {
470 #print "MACROG $macro (", (join ', ', @ret), ")\n";
471 return $macros{ $macro }{run}->(@ret);
474 #print "MACRO $macro (", (join ', ', @newargs), ")".(join ', ', @ret)."\n";
475 return $macros{ $macro }{run}->(@newargs), @ret;
480 my ($in_fh, $out_sub, $preprocess_sub);
482 $in_fh = $_[0] if defined $_[0] || !defined $in_fh;
483 $out_sub = $_[1] if defined $_[1] || !defined $out_sub;
484 $preprocess_sub = $_[2] if defined $_[2] || !defined $preprocess_sub;
486 croak 'out_sub not a CODE reference'
487 if not ref $out_sub eq 'CODE';
488 croak 'preprocess_sub not a CODE reference'
489 if defined $preprocess_sub && not ref $preprocess_sub eq 'CODE';
491 while (my $line = <$in_fh>) {
493 if ($line =~ /^\.[A-z][a-z0-9]+/ || $line =~ /^\.%[A-Z]/ ||
497 my ($macro, @args) = quotewords(' ', 1, $line);
498 @args = grep { defined $_ } @args;
499 $preprocess_sub->(@args) if defined $preprocess_sub;
500 if ($macro && exists $macros{ $macro }) {
501 return ($macro, @args);