3 # This file and its contents are supplied under the terms of the
4 # Common Development and Distribution License ("CDDL"), version 1.0.
5 # You may only use this file in accordance with the terms of version
8 # A full copy of the text of the CDDL should have accompanied this
9 # source. A copy is of the CDDL is also available via the Internet
10 # at http://www.illumos.org/license/CDDL.
14 # Copyright 2010 Nexenta Systems, Inc. All rights reserved.
15 # Copyright 2015 John Marino <draco@marino.st>
18 # This converts MAPPING files to localedef character maps
19 # suitable for use with the UTF-8 derived localedef data.
27 $utf8 = sprintf("\\x%02X", $ucs).$utf8;
28 } elsif ($ucs <= 0x7ff) {
29 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
31 $utf8 = sprintf("\\x%02X", $ucs | 0xc0).$utf8;
33 } elsif ($ucs <= 0xffff) {
34 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
36 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
38 $utf8 = sprintf("\\x%02X", $ucs | 0xe0).$utf8;
40 } elsif ($ucs <= 0x1fffff) {
41 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
43 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
45 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
47 $utf8 = sprintf("\\x%02X", $ucs | 0xf0).$utf8;
49 } elsif ($ucs <= 0x03ffffff) {
50 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
52 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
54 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
56 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
58 $utf8 = sprintf("\\x%02X", $ucs | 0xf8).$utf8;
61 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
63 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
65 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
67 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
69 $utf8 = sprintf("\\x%02X", ($ucs & 0x3f) | 0x80).$utf8;
71 $utf8 = sprintf("\\x%02X", $ucs | 0xf8).$utf8;
81 # This is not a general purpose Character Map parser, but its good enough
82 # for the stock one supplied with CLDR.
88 open(UTF8, "$file") || die "open";
93 next if (/^\s*CHARMAP\s*$/);
94 next if (/^\s*END\s*CHARMAP\s*$/);
100 if (defined($unames{$utf8val})) {
101 $unames{$utf8val} .= "\n" .$name;
103 $unames{$utf8val} = $name;
105 $uvalues{$name} = $utf8val;
116 open(MAP, "$file") || die "open";
121 next if (/^0x..\+0x../);
122 next if (/^0x[0-9A-F]{4}\t0x[0-9A-F]{4} 0x[0-9A-F]{4}/);
123 next if (/^0x[0-9A-F]{2}\s+#/);
124 next if (/# ... NO MAPPING .../);
126 @words = split /\s+/;
128 $utf8 =~ s/^\\x[0]*//;
129 $utf8 = ucs_to_utf8(hex($utf8));
131 if (defined ($map{$val})) {
132 $map{$val} .= " ".$utf8;
149 $str = sprintf("\\x%02x", $val & 0xff).$str;
156 $codeset = shift(@ARGV);
159 load_utf8_cm("etc/final-maps/map.UTF-8");
163 if ($codeset eq "SJIS") { $max_mb = 2 }
164 elsif ($codeset eq "eucCN") { $max_mb = 2 }
165 elsif ($codeset eq "eucJP") { $max_mb = 3 }
166 elsif ($codeset eq "eucKR") { $max_mb = 2 }
167 elsif ($codeset eq "GBK") { $max_mb = 2 }
168 elsif ($codeset eq "GB2312") { $max_mb = 2 }
169 elsif ($codeset eq "Big5") { $max_mb = 2 }
170 elsif ($codeset eq "Big5HKSCS") { $max_mb = 2 }
171 else { $max_mb = 1 };
172 print("<code_set_name> \"$codeset\"\n");
173 print("<mb_cur_min> 1\n");
174 print("<mb_cur_max> $max_mb\n");
177 foreach $val (sort (keys (%map))) {
179 foreach $utf8 (split / /, $map{$val}) {
180 $ref = $unames{$utf8};
181 foreach $name (sort (split /\n/, $ref)) {
183 my $nt = int((64 - length($name) + 7) / 8);
188 print mb_str($val)."\n";
192 print "END CHARMAP\n";