5 usage: mkutable [-n] [-f#] type... [--] [<] UnicodeData.txt
6 -n = take non-matching types
7 -f = zero-based type field (default 2)
11 use vars qw( $opt_f $opt_n );
15 # Override Unicode tables for certain control chars
16 # that are expected to be found in normal text files.
18 0x08 => 1, # backspace
21 0x0c => 1, # form feed
22 0x0d => 1, # carriage return
25 # Hangul Jamo medial vowels and final consonants should be zero width.
32 exit (main() ? 0 : 1);
35 my $args = join ' ', @ARGV;
36 die $USAGE if not getopts('f:n');
37 $type_field = $opt_f if $opt_f;
41 while ($arg = shift @ARGV) {
45 my %out = ( 'types' => \%types );
48 foreach my $comp (@force_compose) {
49 my ($lo,$hi) = @$comp;
50 for (my $ch = $lo; $ch <= $hi; ++$ch) {
51 $force_compose{$ch} = 1;
57 print "/* Generated by \"$0 $args\" on $date */\n";
64 my @fields = split /;/;
66 my ($lo_code, $hi_code);
67 my $codes = $fields[0];
68 if ($codes =~ /(\w+)\.\.(\w+)/) {
72 $lo_code = $hi_code = hex $codes;
74 if ($fields[1] =~ /, First>$/) {
75 die "invalid Unicode data: First with range" if $hi_code != $lo_code;
76 $start_range = $lo_code;
79 if ($fields[1] =~ /, Last>$/) {
80 die "invalid Unicode data: Last without First" if not $start_range;
81 $lo_code = $start_range;
83 } elsif ($start_range) {
84 die "invalid Unicode data: First without Last";
86 my $type = $fields[$type_field];
88 for ($last_code = $lo_code; $last_code <= $hi_code; ++$last_code) {
89 output(\%out, $last_code,
90 $force_space{$last_code} ? 'Zs' : $force_compose{$last_code} ? 'Mn' : $type);
93 output(\%out, $last_code);
98 my ($out, $code, $type) = @_;
99 my $type_ok = ($type and ${${$out}{types}}{$type});
100 $type_ok = not $type_ok if $opt_n;
101 my $prev_code = $$out{prev_code};
104 end_run($out, $prev_code);
105 } elsif (not $$out{in_run} or $type ne $$out{run_type} or $code != $prev_code+1) {
106 end_run($out, $prev_code);
107 start_run($out, $code, $type);
109 $$out{prev_code} = $code;
113 my ($out, $code, $type) = @_;
114 $$out{start_code} = $code;
115 $$out{prev_code} = $code;
116 $$out{run_type} = $type;
121 my ($out, $code) = @_;
122 return if not $$out{in_run};
123 printf "\t{ 0x%04x, 0x%04x }, /* %s */\n", $$out{start_code}, $code, $$out{run_type};