1 #!/usr/local/bin/perl -wC
8 print "Usage: $0 --unidir=<unidir>\n";
14 my $result = GetOptions (
15 "unidir=s" => \$UNIDIR
19 my $outfilename = "$UNIDIR/posix/xx_Comm_C.UTF-8.src";
21 get_utf8map("$UNIDIR/posix/UTF-8.cm");
23 parse_unidata ("$UNIDIR/UnicodeData.txt");
26 ############################
37 foreach my $l (@lines) {
39 next if ($l =~ /^\#/);
42 if ($l eq "CHARMAP") {
47 next if (!$incharmap);
48 last if ($l eq "END CHARMAP");
50 $l =~ /^(<[^\s]+>)\s+(.*)/;
53 $k =~ s/\\x//g; # UTF-8 char code
59 open(FOUT, ">", "$outfilename")
60 or die ("can't write to $outfilename\n");
62 # Warning: Do not edit. This file is automatically generated from the
63 # tools in /usr/src/tools/tools/locale. The data is obtained from the
64 # CLDR project, obtained from http://cldr.unicode.org/
65 # -----------------------------------------------------------------------------
75 print FOUT "\nEND LC_CTYPE\n";
86 if (($wc & ~0x7f) == 0) {
87 return sprintf "%02X", $wc;
88 } elsif (($wc & ~0x7ff) == 0) {
91 } elsif (($wc & ~0xffff) == 0) {
94 } elsif ($wc >= 0 && $wc <= 0x10ffff) {
99 for ($i = $len - 1; $i > 0; $i--) {
100 $ret = (sprintf "%02X", ($wc & 0x3f) | 0x80) . $ret;
103 $ret = (sprintf "%02X", ($wc & 0xff) | $lead) . $ret;
117 foreach my $l (@lines) {
118 my @d = split(/;/, $l, -1);
119 my $mb = wctomb($d[0]);
122 # XXX There are code points present in UnicodeData.txt
123 # and missing from UTF-8.cm
124 next if !defined $utf8map{$mb};
126 # Define the category
127 if ($d[2] =~ /^Lu/) {
129 } elsif ($d[2] =~ /^Ll/) {
131 } elsif ($d[2] =~ /^Nd/) {
133 } elsif ($d[2] =~ /^L/) {
135 } elsif ($d[2] =~ /^P/) {
137 } elsif ($d[2] =~ /^M/ || $d[2] =~ /^N/ || $d[2] =~ /^S/) {
139 } elsif ($d[2] =~ /^C/) {
141 } elsif ($d[2] =~ /^Z/) {
144 $data{$cat}{$mb}{'wc'} = $d[0];
146 # Check if it's a start or end of range
147 if ($d[1] =~ /First>$/) {
148 $data{$cat}{$mb}{'start'} = 1;
149 } elsif ($d[1] =~ /Last>$/) {
150 $data{$cat}{$mb}{'end'} = 1;
153 # Check if there's upper/lower mapping
155 $data{'toupper'}{$mb} = wctomb($d[12]);
156 } elsif ($d[13] ne "") {
157 $data{'tolower'}{$mb} = wctomb($d[13]);
164 # Now write out the categories
165 foreach my $cat (sort keys (%data)) {
168 foreach my $mb (sort keys (%{$data{$cat}})) {
171 } elsif ($inrange == 1) {
173 die "broken range end wc=$data{$cat}{$mb}{'wc'}"
174 if !defined $data{$cat}{$mb}{'end'};
181 if ($cat eq "tolower" || $cat eq "toupper") {
182 print FOUT "($utf8map{$mb},$utf8map{$data{$cat}{$mb}})";
184 if (defined($data{$cat}{$mb}{'start'})) {
187 print FOUT "$utf8map{$mb}";