5 usage: mkutable [-n] [-f#] type... [--] [<] UnicodeData.txt
6 -n = take non-matching types
7 -f = zero-based type field (default 2)
11 use vars qw( $opt_f $opt_n );
15 # Override Unicode tables for certain control chars
16 # that are expected to be found in normal text files.
18 0x08 => 1, # backspace
21 0x0c => 1, # form feed
22 0x0d => 1, # carriage return
25 # Hangul Jamo medial vowels and final consonants should be zero width.
32 exit (main() ? 0 : 1);
35 my $args = join ' ', @ARGV;
36 die $USAGE if not getopts('f:n');
37 $type_field = $opt_f if $opt_f;
41 while ($arg = shift @ARGV) {
45 my %out = ( 'types' => \%types );
48 foreach my $comp (@force_compose) {
49 my ($lo,$hi) = @$comp;
50 for (my $ch = $lo; $ch <= $hi; ++$ch) {
51 $force_compose{$ch} = 1;
57 print "/* Generated by \"$0 $args\" on $date */\n";
63 my @fields = split /;/;
65 my ($lo_code, $hi_code);
66 my $codes = $fields[0];
67 if ($codes =~ /(\w+)\.\.(\w+)/) {
71 $lo_code = $hi_code = hex $codes;
73 my $type = $fields[$type_field];
75 for ($last_code = $lo_code; $last_code <= $hi_code; ++$last_code) {
76 output(\%out, $last_code,
77 $force_space{$last_code} ? 'Zs' : $force_compose{$last_code} ? 'Mn' : $type);
80 output(\%out, $last_code);
85 my ($out, $code, $type) = @_;
86 my $type_ok = ($type and ${${$out}{types}}{$type});
87 $type_ok = not $type_ok if $opt_n;
88 my $prev_code = $$out{prev_code};
91 end_run($out, $prev_code);
92 } elsif (not $$out{in_run} or $type ne $$out{run_type} or $code != $prev_code+1) {
93 end_run($out, $prev_code);
94 start_run($out, $code, $type);
96 $$out{prev_code} = $code;
100 my ($out, $code, $type) = @_;
101 $$out{start_code} = $code;
102 $$out{prev_code} = $code;
103 $$out{run_type} = $type;
108 my ($out, $code) = @_;
109 return if not $$out{in_run};
110 printf "\t{ 0x%04x, 0x%04x }, /* %s */\n", $$out{start_code}, $code, $$out{run_type};