]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - tools/tools/locale/tools/cldr2def.pl
MFV r336851:
[FreeBSD/FreeBSD.git] / tools / tools / locale / tools / cldr2def.pl
1 #!/usr/local/bin/perl -wC
2 # $FreeBSD$
3
4 use strict;
5 use File::Copy;
6 use XML::Parser;
7 use Tie::IxHash;
8 use Text::Iconv;
9 use Data::Dumper;
10 use Getopt::Long;
11 use Digest::SHA qw(sha1_hex);
12 require "charmaps.pm";
13
14
15 if ($#ARGV < 2) {
16         print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n";
17         exit(1);
18 }
19
20 my $DEFENCODING = "UTF-8";
21 my @filter = ();
22
23 my $CLDRDIR = undef;
24 my $UNIDATADIR = undef;
25 my $ETCDIR = undef;
26 my $TYPE = undef;
27 my $doonly = undef;
28
29 my $result = GetOptions (
30                 "cldr=s"        => \$CLDRDIR,
31                 "unidata=s"     => \$UNIDATADIR,
32                 "etc=s"         => \$ETCDIR,
33                 "type=s"        => \$TYPE,
34                 "lc=s"          => \$doonly
35             );
36
37 my %convertors = ();
38
39 my %ucd = ();
40 my %values = ();
41 my %hashtable = ();
42 my %languages = ();
43 my %translations = ();
44 my %encodings = ();
45 my %alternativemonths = ();
46 get_languages();
47
48 my %utf8map = ();
49 my %utf8aliases = ();
50 get_unidata($UNIDATADIR);
51 get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm");
52 get_encodings("$ETCDIR/charmaps");
53
54 my %keys = ();
55 tie(%keys, "Tie::IxHash");
56 tie(%hashtable, "Tie::IxHash");
57
58 my %FILESNAMES = (
59         "monetdef"      => "LC_MONETARY",
60         "timedef"       => "LC_TIME",
61         "msgdef"        => "LC_MESSAGES",
62         "numericdef"    => "LC_NUMERIC",
63         "colldef"       => "LC_COLLATE",
64         "ctypedef"      => "LC_CTYPE"
65 );
66
67 my %callback = (
68         mdorder => \&callback_mdorder,
69         altmon => \&callback_altmon,
70         cformat => \&callback_cformat,
71         dformat => \&callback_dformat,
72         dtformat => \&callback_dtformat,
73         cbabmon => \&callback_abmon,
74         cbampm => \&callback_ampm,
75         data => undef,
76 );
77
78 my %DESC = (
79
80         # numericdef
81         "decimal_point" => "decimal_point",
82         "thousands_sep" => "thousands_sep",
83         "grouping"      => "grouping",
84
85         # monetdef
86         "int_curr_symbol"       => "int_curr_symbol (last character always " .
87                                    "SPACE)",
88         "currency_symbol"       => "currency_symbol",
89         "mon_decimal_point"     => "mon_decimal_point",
90         "mon_thousands_sep"     => "mon_thousands_sep",
91         "mon_grouping"          => "mon_grouping",
92         "positive_sign"         => "positive_sign",
93         "negative_sign"         => "negative_sign",
94         "int_frac_digits"       => "int_frac_digits",
95         "frac_digits"           => "frac_digits",
96         "p_cs_precedes"         => "p_cs_precedes",
97         "p_sep_by_space"        => "p_sep_by_space",
98         "n_cs_precedes"         => "n_cs_precedes",
99         "n_sep_by_space"        => "n_sep_by_space",
100         "p_sign_posn"           => "p_sign_posn",
101         "n_sign_posn"           => "n_sign_posn",
102
103         # msgdef
104         "yesexpr"       => "yesexpr",
105         "noexpr"        => "noexpr",
106         "yesstr"        => "yesstr",
107         "nostr"         => "nostr",
108
109         # timedef
110         "abmon"         => "Short month names",
111         "mon"           => "Long month names (as in a date)",
112         "abday"         => "Short weekday names",
113         "day"           => "Long weekday names",
114         "t_fmt"         => "X_fmt",
115         "d_fmt"         => "x_fmt",
116         "c_fmt"         => "c_fmt",
117         "am_pm"         => "AM/PM",
118         "d_t_fmt"       => "date_fmt",
119         "altmon"        => "Long month names (without case ending)",
120         "md_order"      => "md_order",
121         "t_fmt_ampm"    => "ampm_fmt",
122 );
123
124 if ($TYPE eq "colldef") {
125         transform_collation();
126         make_makefile();
127 }
128
129 if ($TYPE eq "ctypedef") {
130         transform_ctypes();
131         make_makefile();
132 }
133
134 if ($TYPE eq "numericdef") {
135         %keys = (
136             "decimal_point"     => "s",
137             "thousands_sep"     => "s",
138             "grouping"          => "ai",
139         );
140         get_fields();
141         print_fields();
142         make_makefile();
143 }
144
145 if ($TYPE eq "monetdef") {
146         %keys = (
147             "int_curr_symbol"   => "s",
148             "currency_symbol"   => "s",
149             "mon_decimal_point" => "s",
150             "mon_thousands_sep" => "s",
151             "mon_grouping"      => "ai",
152             "positive_sign"     => "s",
153             "negative_sign"     => "s",
154             "int_frac_digits"   => "i",
155             "frac_digits"       => "i",
156             "p_cs_precedes"     => "i",
157             "p_sep_by_space"    => "i",
158             "n_cs_precedes"     => "i",
159             "n_sep_by_space"    => "i",
160             "p_sign_posn"       => "i",
161             "n_sign_posn"       => "i"
162         );
163         get_fields();
164         print_fields();
165         make_makefile();
166 }
167
168 if ($TYPE eq "msgdef") {
169         %keys = (
170             "yesexpr"           => "s",
171             "noexpr"            => "s",
172             "yesstr"            => "s",
173             "nostr"             => "s"
174         );
175         get_fields();
176         print_fields();
177         make_makefile();
178 }
179
180 if ($TYPE eq "timedef") {
181         %keys = (
182             "abmon"             => "<cbabmon<abmon<as",
183             "mon"               => "as",
184             "abday"             => "as",
185             "day"               => "as",
186             "t_fmt"             => "s",
187             "d_fmt"             => "<dformat<d_fmt<s",
188             "c_fmt"             => "<cformat<d_t_fmt<s",
189             "am_pm"             => "<cbampm<am_pm<as",
190             "d_t_fmt"           => "<dtformat<d_t_fmt<s",
191             "altmon"            => "<altmon<mon<as",
192             "md_order"          => "<mdorder<d_fmt<s",
193             "t_fmt_ampm"        => "s",
194         );
195         get_fields();
196         print_fields();
197         make_makefile();
198 }
199
200 sub callback_ampm {
201         my $s = shift;
202         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
203         my $enc = $callback{data}{e};
204
205         if ($nl eq 'ru_RU') {
206                 if ($enc eq 'UTF-8') {
207                         $s = 'дп;пп';
208                 } else {
209                         my  $converter = Text::Iconv->new("utf-8", "$enc");
210                         $s = $converter->convert("дп;пп");
211                 }
212         }
213         return $s;
214 }
215
216 sub callback_cformat {
217         my $s = shift;
218         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
219
220         if ($nl eq 'ko_KR') {
221                 $s =~ s/(> )(%p)/$1%A $2/;
222         }
223         $s =~ s/\.,/\./;
224         $s =~ s/ %Z//;
225         $s =~ s/ %z//;
226         $s =~ s/^"%e\./%A %e/;
227         $s =~ s/^"(%B %e, )/"%A, $1/;
228         $s =~ s/^"(%e %B )/"%A $1/;
229         return $s;
230 };
231
232 sub callback_dformat {
233         my $s = shift;
234
235         $s =~ s/(%m(<SOLIDUS>|[-.]))%e/$1%d/;
236         $s =~ s/%e((<SOLIDUS>|[-.])%m)/%d$1/;
237         return $s;
238 };
239
240 sub callback_dtformat {
241         my $s = shift;
242         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
243
244         if ($nl eq 'ja_JP') {
245                 $s =~ s/(> )(%H)/$1%A $2/;
246         } elsif ($nl eq 'ko_KR' || $nl eq 'zh_CN' || $nl eq 'zh_TW') {
247                 if ($nl ne 'ko_KR') {
248                         $s =~ s/%m/%_m/;
249                 }
250                 $s =~ s/(> )(%p)/$1%A $2/;
251         }
252         $s =~ s/\.,/\./;
253         $s =~ s/^"%e\./%A %e/;
254         $s =~ s/^"(%B %e, )/"%A, $1/;
255         $s =~ s/^"(%e %B )/"%A $1/;
256         return $s;
257 };
258
259 sub callback_mdorder {
260         my $s = shift;
261         return undef if (!defined $s);
262         $s =~ s/[^dem]//g;
263         $s =~ s/e/d/g;
264         return $s;
265 };
266
267 sub callback_altmon {
268         # if the language/country is known in %alternative months then
269         # return that, otherwise repeat mon
270         my $s = shift;
271
272         if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
273                 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
274                 my @cleaned;
275                 foreach (@altnames)
276                 {
277                         $_ =~ s/^\s+//;
278                         $_ =~ s/\s+$//;
279                         push @cleaned, $_;
280                 }
281                 return join(";",@cleaned);
282         }
283
284         return $s;
285 }
286
287 sub callback_abmon {
288         # for specified CJK locales, pad result with a space to enable
289         # columns to line up (style established in FreeBSD in 2001)
290         my $s = shift;
291         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
292
293         if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' ||
294             $nl eq 'zh_HK' || $nl eq 'zh_TW') {
295                 my @monthnames = split(";", $s);
296                 my @cleaned;
297                 foreach (@monthnames)
298                 {
299                         if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ ||
300                            ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/))
301                         {
302                                 $_ =~ s/^"/"<space>/;
303                         }
304                         push @cleaned, $_;
305                 }
306                 return join(";",@cleaned);
307         }
308         return $s;
309 }
310
311 ############################
312
313 sub get_unidata {
314         my $directory = shift;
315
316         open(FIN, "$directory/UnicodeData.txt")
317             or die("Cannot open $directory/UnicodeData.txt");;
318         my @lines = <FIN>;
319         chomp(@lines);
320         close(FIN);
321
322         foreach my $l (@lines) {
323                 my @a = split(/;/, $l);
324
325                 $ucd{code2name}{"$a[0]"} = $a[1];       # Unicode name
326                 $ucd{name2code}{"$a[1]"} = $a[0];       # Unicode code
327         }
328 }
329
330 sub get_utf8map {
331         my $file = shift;
332
333         open(FIN, $file);
334         my @lines = <FIN>;
335         close(FIN);
336         chomp(@lines);
337
338         my $prev_k = undef;
339         my $prev_v = "";
340         my $incharmap = 0;
341         foreach my $l (@lines) {
342                 $l =~ s/\r//;
343                 next if ($l =~ /^\#/);
344                 next if ($l eq "");
345
346                 if ($l eq "CHARMAP") {
347                         $incharmap = 1;
348                         next;
349                 }
350
351                 next if (!$incharmap);
352                 last if ($l eq "END CHARMAP");
353
354                 $l =~ /^<([^\s]+)>\s+(.*)/;
355                 my $k = $1;
356                 my $v = $2;
357                 $k =~ s/_/ /g;          # unicode char string
358                 $v =~ s/\\x//g;         # UTF-8 char code
359                 $utf8map{$k} = $v;
360
361                 $utf8aliases{$k} = $prev_k if ($prev_v eq $v);
362
363                 $prev_v = $v;
364                 $prev_k = $k;
365         }
366 }
367
368 sub get_encodings {
369         my $dir = shift;
370         foreach my $e (sort(keys(%encodings))) {
371                 if (!open(FIN, "$dir/$e.TXT")) {
372                         print "Cannot open charmap for $e\n";
373                         next;
374
375                 }
376                 $encodings{$e} = 1;
377                 my @lines = <FIN>;
378                 close(FIN);
379                 chomp(@lines);
380                 foreach my $l (@lines) {
381                         $l =~ s/\r//;
382                         next if ($l =~ /^\#/);
383                         next if ($l eq "");
384
385                         my @a = split(" ", $l);
386                         next if ($#a < 1);
387                         $a[0] =~ s/^0[xX]//;    # local char code
388                         $a[1] =~ s/^0[xX]//;    # unicode char code
389                         $convertors{$e}{uc($a[1])} = uc($a[0]);
390                 }
391         }
392 }
393
394 sub get_languages {
395         my %data = get_xmldata($ETCDIR);
396         %languages = %{$data{L}}; 
397         %translations = %{$data{T}}; 
398         %alternativemonths = %{$data{AM}}; 
399         %encodings = %{$data{E}}; 
400
401         return if (!defined $doonly);
402
403         my @a = split(/_/, $doonly);
404         if ($#a == 1) {
405                 $filter[0] = $a[0];
406                 $filter[1] = "x";
407                 $filter[2] = $a[1];
408         } elsif ($#a == 2) {
409                 $filter[0] = $a[0];
410                 $filter[1] = $a[1];
411                 $filter[2] = $a[2];
412         }
413
414         print Dumper(@filter);
415         return;
416 }
417
418 sub transform_ctypes {
419         foreach my $l (sort keys(%languages)) {
420         foreach my $f (sort keys(%{$languages{$l}})) {
421         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
422                 next if ($#filter == 2 && ($filter[0] ne $l
423                     || $filter[1] ne $f || $filter[2] ne $c));
424                 next if (defined $languages{$l}{$f}{definitions}
425                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
426                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
427                 my $file;
428                 $file = $l . "_";
429                 $file .= $f . "_" if ($f ne "x");
430                 $file .= $c;
431                 my $actfile = $file;
432
433                 my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src";
434                 if (! -f $filename) {
435                         print STDERR "Cannot open $filename\n";
436                         next;
437                 }
438                 open(FIN, "$filename");
439                 print "Reading from $filename for ${l}_${f}_${c}\n";
440                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
441                 my @lines;
442                 my $shex;
443                 my $uhex;
444                 while (<FIN>) {
445                         push @lines, $_;
446                 }
447                 close(FIN);
448                 $shex = sha1_hex(join("\n", @lines));
449                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
450                 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
451                 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
452                 print FOUT @lines;
453                 close(FOUT);
454                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
455                         next if ($enc eq $DEFENCODING);
456                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
457                         if (! -f $filename) {
458                                 print STDERR "Cannot open $filename\n";
459                                 next;
460                         }
461                         @lines = ();
462                         open(FIN, "$filename");
463                         while (<FIN>) {
464                                 if ((/^comment_char\s/) || (/^escape_char\s/)){
465                                         push @lines, $_;
466                                 }
467                                 if (/^LC_CTYPE/../^END LC_CTYPE/) {
468                                         push @lines, $_;
469                                 }
470                         }
471                         close(FIN);
472                         $uhex = sha1_hex(join("\n", @lines) . $enc);
473                         $languages{$l}{$f}{data}{$c}{$enc} = $uhex;
474                         $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
475                         open(FOUT, ">$TYPE.draft/$actfile.$enc.src");
476                         print FOUT <<EOF;
477 # Warning: Do not edit. This file is automatically extracted from the
478 # tools in /usr/src/tools/tools/locale. The data is obtained from the
479 # CLDR project, obtained from http://cldr.unicode.org/
480 # -----------------------------------------------------------------------------
481 EOF
482                         print FOUT @lines;
483                         close(FOUT);
484                 }
485         }
486         }
487         }
488 }
489
490
491 sub transform_collation {
492         foreach my $l (sort keys(%languages)) {
493         foreach my $f (sort keys(%{$languages{$l}})) {
494         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
495                 next if ($#filter == 2 && ($filter[0] ne $l
496                     || $filter[1] ne $f || $filter[2] ne $c));
497                 next if (defined $languages{$l}{$f}{definitions}
498                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
499                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
500                 my $file;
501                 $file = $l . "_";
502                 $file .= $f . "_" if ($f ne "x");
503                 $file .= $c;
504                 my $actfile = $file;
505
506                 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
507                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
508                     if (! -f $filename);
509                 if (! -f $filename
510                  && defined $languages{$l}{$f}{fallback}) {
511                         $file = $languages{$l}{$f}{fallback};
512                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
513                 }
514                 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
515                     if (! -f $filename);
516                 if (! -f $filename) {
517                         print STDERR
518                             "Cannot open $file.$DEFENCODING.src or fallback\n";
519                         next;
520                 }
521                 open(FIN, "$filename");
522                 print "Reading from $filename for ${l}_${f}_${c}\n";
523                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
524                 my @lines;
525                 my $shex;
526                 while (<FIN>) {
527                         if ((/^comment_char\s/) || (/^escape_char\s/)){
528                                 push @lines, $_;
529                         }
530                         if (/^LC_COLLATE/../^END LC_COLLATE/) {
531                                 $_ =~ s/[ ]+/ /g;
532                                 push @lines, $_;
533                         }
534                 }
535                 close(FIN);
536                 $shex = sha1_hex(join("\n", @lines));
537                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
538                 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
539                 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
540                 print FOUT <<EOF;
541 # Warning: Do not edit. This file is automatically extracted from the
542 # tools in /usr/src/tools/tools/locale. The data is obtained from the
543 # CLDR project, obtained from http://cldr.unicode.org/
544 # -----------------------------------------------------------------------------
545 EOF
546                 print FOUT @lines;
547                 close(FOUT);
548
549                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
550                         next if ($enc eq $DEFENCODING);
551                         copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
552                               "$TYPE.draft/$actfile.$enc.src");
553                         $languages{$l}{$f}{data}{$c}{$enc} = $shex;
554                         $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
555                 }
556         }
557         }
558         }
559 }
560
561 sub get_fields {
562         foreach my $l (sort keys(%languages)) {
563         foreach my $f (sort keys(%{$languages{$l}})) {
564         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
565                 next if ($#filter == 2 && ($filter[0] ne $l
566                     || $filter[1] ne $f || $filter[2] ne $c));
567                 next if (defined $languages{$l}{$f}{definitions}
568                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
569
570                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
571                 my $file;
572                 $file = $l . "_";
573                 $file .= $f . "_" if ($f ne "x");
574                 $file .= $c;
575
576                 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
577                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
578                     if (! -f $filename);
579                 if (! -f $filename
580                  && defined $languages{$l}{$f}{fallback}) {
581                         $file = $languages{$l}{$f}{fallback};
582                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
583                 }
584                 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
585                     if (! -f $filename);
586                 if (! -f $filename) {
587                         print STDERR
588                             "Cannot open $file.$DEFENCODING.src or fallback\n";
589                         next;
590                 }
591                 open(FIN, "$filename");
592                 print "Reading from $filename for ${l}_${f}_${c}\n";
593                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
594                 my @lines = <FIN>;
595                 chomp(@lines);
596                 close(FIN);
597                 my $continue = 0;
598                 foreach my $k (keys(%keys)) {
599                         foreach my $line (@lines) {
600                                 $line =~ s/\r//;
601                                 next if (!$continue && $line !~ /^$k\s/);
602                                 if ($continue) {
603                                         $line =~ s/^\s+//;
604                                 } else {
605                                         $line =~ s/^$k\s+//;
606                                 }
607
608                                 $values{$l}{$f}{$c}{$k} = ""
609                                         if (!defined $values{$l}{$f}{$c}{$k});
610
611                                 $continue = ($line =~ /\/$/);
612                                 $line =~ s/\/$// if ($continue);
613
614                                 while ($line =~ /_/) {
615                                         $line =~
616                                             s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
617                                 }
618                                 die "_ in data - $line" if ($line =~ /_/);
619                                 $values{$l}{$f}{$c}{$k} .= $line;
620
621                                 last if (!$continue);
622                         }
623                 }
624         }
625         }
626         }
627 }
628
629 sub decodecldr {
630         my $e = shift;
631         my $s = shift;
632
633         my $v = undef;
634
635         if ($e eq "UTF-8") {
636                 #
637                 # Conversion to UTF-8 can be done from the Unicode name to
638                 # the UTF-8 character code.
639                 #
640                 $v = $utf8map{$s};
641                 die "Cannot convert $s in $e (charmap)" if (!defined $v);
642         } else {
643                 #
644                 # Conversion to these encodings can be done from the Unicode
645                 # name to Unicode code to the encodings code.
646                 #
647                 my $ucc = undef;
648                 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
649                 $ucc = $ucd{name2code}{$utf8aliases{$s}}
650                         if (!defined $ucc
651                          && $utf8aliases{$s}
652                          && defined $ucd{name2code}{$utf8aliases{$s}});
653
654                 if (!defined $ucc) {
655                         if (defined $translations{$e}{$s}{hex}) {
656                                 $v = $translations{$e}{$s}{hex};
657                                 $ucc = 0;
658                         } elsif (defined $translations{$e}{$s}{ucc}) {
659                                 $ucc = $translations{$e}{$s}{ucc};
660                         }
661                 }
662
663                 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
664                 $v = $convertors{$e}{$ucc} if (!defined $v);
665
666                 $v = $translations{$e}{$s}{hex}
667                         if (!defined $v && defined $translations{$e}{$s}{hex});
668
669                 if (!defined $v && defined $translations{$e}{$s}{unicode}) {
670                         my $ucn = $translations{$e}{$s}{unicode};
671                         $ucc = $ucd{name2code}{$ucn}
672                                 if (defined $ucd{name2code}{$ucn});
673                         $ucc = $ucd{name2code}{$utf8aliases{$ucn}}
674                                 if (!defined $ucc
675                                  && defined $ucd{name2code}{$utf8aliases{$ucn}});
676                         $v = $convertors{$e}{$ucc};
677                 }
678
679                 die "Cannot convert $s in $e (charmap)" if (!defined $v);
680         }
681
682         return pack("C", hex($v)) if (length($v) == 2);
683         return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
684                 if (length($v) == 4);
685         return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
686             hex(substr($v, 4, 2))) if (length($v) == 6);
687         print STDERR "Cannot convert $e $s\n";
688         return "length = " . length($v);
689
690 }
691
692 sub translate {
693         my $enc = shift;
694         my $v = shift;
695
696         return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
697         return undef;
698 }
699
700 sub print_fields {
701         foreach my $l (sort keys(%languages)) {
702         foreach my $f (sort keys(%{$languages{$l}})) {
703         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
704                 next if ($#filter == 2 && ($filter[0] ne $l
705                     || $filter[1] ne $f || $filter[2] ne $c));
706                 next if (defined $languages{$l}{$f}{definitions}
707                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
708                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
709                         if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
710                                 print "Skipping ${l}_" .
711                                     ($f eq "x" ? "" : "${f}_") .
712                                     "${c} - not read\n";
713                                 next;
714                         }
715                         my $file = $l;
716                         $file .= "_" . $f if ($f ne "x");
717                         $file .= "_" . $c;
718                         print "Writing to $file in $enc\n";
719
720                         if ($enc ne $DEFENCODING &&
721                             !defined $convertors{$enc}) {
722                                 print "Failed! Cannot convert to $enc.\n";
723                                 next;
724                         };
725
726                         open(FOUT, ">$TYPE.draft/$file.$enc.new");
727                         my $okay = 1;
728                         my $output = "";
729                         print FOUT <<EOF;
730 # Warning: Do not edit. This file is automatically generated from the
731 # tools in /usr/src/tools/tools/locale. The data is obtained from the
732 # CLDR project, obtained from http://cldr.unicode.org/
733 # -----------------------------------------------------------------------------
734 EOF
735                         foreach my $k (keys(%keys)) {
736                                 my $g = $keys{$k};
737
738                                 die("Unknown $k in \%DESC")
739                                         if (!defined $DESC{$k});
740
741                                 $output .= "#\n# $DESC{$k}\n";
742
743                                 # Replace one row with another
744                                 if ($g =~ /^>/) {
745                                         $k = substr($g, 1);
746                                         $g = $keys{$k};
747                                 }
748
749                                 # Callback function
750                                 if ($g =~ /^\</) {
751                                         $callback{data}{c} = $c;
752                                         $callback{data}{k} = $k;
753                                         $callback{data}{f} = $f;
754                                         $callback{data}{l} = $l;
755                                         $callback{data}{e} = $enc;
756                                         my @a = split(/\</, substr($g, 1));
757                                         my $rv =
758                                             &{$callback{$a[0]}}($values{$l}{$f}{$c}{$a[1]});
759                                         $values{$l}{$f}{$c}{$k} = $rv;
760                                         $g = $a[2];
761                                         $callback{data} = ();
762                                 }
763
764                                 my $v = $values{$l}{$f}{$c}{$k};
765                                 $v = "undef" if (!defined $v);
766
767                                 if ($g eq "i") {
768                                         $output .= "$v\n";
769                                         next;
770                                 }
771                                 if ($g eq "ai") {
772                                         $output .= "$v\n";
773                                         next;
774                                 }
775                                 if ($g eq "s") {
776                                         $v =~ s/^"//;
777                                         $v =~ s/"$//;
778                                         my $cm = "";
779                                         while ($v =~ /^(.*?)<(.*?)>(.*)/) {
780                                                 my $p1 = $1;
781                                                 $cm = $2;
782                                                 my $p3 = $3;
783
784                                                 my $rv = decodecldr($enc, $cm);
785 #                                               $rv = translate($enc, $cm)
786 #                                                       if (!defined $rv);
787                                                 if (!defined $rv) {
788                                                         print STDERR 
789 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
790                                                         $okay = 0;
791                                                         next;
792                                                 }
793
794                                                 $v = $p1 . $rv . $p3;
795                                         }
796                                         $output .= "$v\n";
797                                         next;
798                                 }
799                                 if ($g eq "as") {
800                                         foreach my $v (split(/;/, $v)) {
801                                                 $v =~ s/^"//;
802                                                 $v =~ s/"$//;
803                                                 my $cm = "";
804                                                 while ($v =~ /^(.*?)<(.*?)>(.*)/) {
805                                                         my $p1 = $1;
806                                                         $cm = $2;
807                                                         my $p3 = $3;
808
809                                                         my $rv =
810                                                             decodecldr($enc,
811                                                                 $cm);
812 #                                                       $rv = translate($enc,
813 #                                                           $cm)
814 #                                                           if (!defined $rv);
815                                                         if (!defined $rv) {
816                                                                 print STDERR 
817 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
818                                                                 $okay = 0;
819                                                                 next;
820                                                         }
821
822                                                         $v = $1 . $rv . $3;
823                                                 }
824                                                 $output .= "$v\n";
825                                         }
826                                         next;
827                                 }
828
829                                 die("$k is '$g'");
830
831                         }
832
833                         $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
834                         $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
835                         print FOUT "$output# EOF\n";
836                         close(FOUT);
837
838                         if ($okay) {
839                                 rename("$TYPE.draft/$file.$enc.new",
840                                     "$TYPE.draft/$file.$enc.src");
841                         } else {
842                                 rename("$TYPE.draft/$file.$enc.new",
843                                     "$TYPE.draft/$file.$enc.failed");
844                         }
845                 }
846         }
847         }
848         }
849 }
850
851 sub make_makefile {
852         return if ($#filter > -1);
853         print "Creating Makefile for $TYPE\n";
854         my $SRCOUT;
855         my $SRCOUT2;
856         my $SRCOUT3 = "";
857         my $SRCOUT4 = "";
858         my $MAPLOC;
859         if ($TYPE eq "colldef") {
860                 $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
861                         "\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " .
862                         "\${.OBJDIR}/\${.IMPSRC:T:R}";
863                 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
864                                 "locale/etc/final-maps\n";
865                 $SRCOUT2 = "LC_COLLATE";
866                 $SRCOUT3 = "" .
867                         ".for f t in \${LOCALES_MAPPED}\n" .
868                         "FILES+=\t\$t.LC_COLLATE\n" .
869                         "FILESDIR_\$t.LC_COLLATE=\t\${LOCALEDIR}/\$t\n" .
870                         "\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" .
871                         "\tlocaledef -D -U -i \${.ALLSRC} \\\n" .
872                         "\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" .
873                         "\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" .
874                         ".endfor\n\n";
875                 $SRCOUT4 = "## LOCALES_MAPPED\n";
876         }
877         elsif ($TYPE eq "ctypedef") {
878                 $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
879                         "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:E} " .
880                         "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
881                         " || true";
882                 $SRCOUT2 = "LC_CTYPE";
883                 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
884                                 "locale/etc/final-maps\n";
885                 $SRCOUT3 = "## SYMPAIRS\n\n" .
886                         ".for s t in \${SYMPAIRS}\n" .
887                         "\${t:S/src\$/LC_CTYPE/}: " .
888                         "\$s\n" .
889                         "\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
890                         "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " .
891                         "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " .
892                         " || true\n" .
893                         ".endfor\n\n";
894         }
895         else {
896                 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
897                 $SRCOUT2 = "out";
898                 $MAPLOC = "";
899         }
900         open(FOUT, ">$TYPE.draft/Makefile");
901         print FOUT <<EOF;
902 # \$FreeBSD\$
903 # Warning: Do not edit. This file is automatically generated from the
904 # tools in /usr/src/tools/tools/locale.
905
906 LOCALEDIR=      \${SHAREDIR}/locale
907 FILESNAME=      $FILESNAMES{$TYPE}
908 .SUFFIXES:      .src .${SRCOUT2}
909 ${MAPLOC}
910 .src.${SRCOUT2}:
911         $SRCOUT
912
913 ## PLACEHOLDER
914
915 ${SRCOUT4}
916
917 EOF
918
919         foreach my $hash (keys(%hashtable)) {
920                 # For colldef, weight LOCALES to UTF-8
921                 #     Sort as upper-case and reverse to achieve it
922                 #     Make en_US, ru_RU, and ca_AD preferred
923                 my @files;
924                 if ($TYPE eq "colldef") {
925                         @files = sort {
926                                 if ($a eq 'en_x_US.UTF-8' ||
927                                     $a eq 'ru_x_RU.UTF-8' ||
928                                     $a eq 'ca_x_AD.UTF-8') { return -1; }
929                                 elsif ($b eq 'en_x_US.UTF-8' ||
930                                        $b eq 'ru_x_RU.UTF-8' ||
931                                        $b eq 'ca_x_AD.UTF-8') { return 1; }
932                                 else { return uc($b) cmp uc($a); }
933                                 } keys(%{$hashtable{$hash}});
934                 } elsif ($TYPE eq "ctypedef") {
935                         @files = sort {
936                                 if ($a eq 'en_x_US.UTF-8') { return -1; }
937                                 elsif ($b eq 'en_x_US.UTF-8') { return 1; }
938                                 if ($a =~ /^en_x_US/) { return -1; }
939                                 elsif ($b =~ /^en_x_US/) { return 1; }
940
941                                 if ($a =~ /^en_x_GB.ISO8859-15/ ||
942                                     $a =~ /^ru_x_RU/) { return -1; }
943                                 elsif ($b =~ /^en_x_GB.ISO8859-15/ ||
944                                        $b =~ /ru_x_RU/) { return 1; }
945                                 else { return uc($b) cmp uc($a); }
946
947                                 } keys(%{$hashtable{$hash}});
948                 } else {
949                         @files = sort {
950                                 if ($a =~ /_Comm_/ ||
951                                     $b eq 'en_x_US.UTF-8') { return 1; }
952                                 elsif ($b =~ /_Comm_/ ||
953                                        $a eq 'en_x_US.UTF-8') { return -1; }
954                                 else { return uc($b) cmp uc($a); }
955                                 } keys(%{$hashtable{$hash}});
956                 }
957                 if ($#files > 0) {
958                         my $link = shift(@files);
959                         $link =~ s/_x_/_/;      # strip family if none there
960                         foreach my $file (@files) {
961                                 my @a = split(/_/, $file);
962                                 my @b = split(/\./, $a[-1]);
963                                 $file =~ s/_x_/_/;
964                                 print FOUT "SAME+=\t\t$link $file\n";
965                                 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
966                         }
967                 }
968         }
969
970         foreach my $l (sort keys(%languages)) {
971         foreach my $f (sort keys(%{$languages{$l}})) {
972         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
973                 next if ($#filter == 2 && ($filter[0] ne $l
974                     || $filter[1] ne $f || $filter[2] ne $c));
975                 next if (defined $languages{$l}{$f}{definitions}
976                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
977                 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
978                  && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
979                         print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
980                             "${c} - not read\n";
981                         next;
982                 }
983                 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
984                         my $file = $l . "_";
985                         $file .= $f . "_" if ($f ne "x");
986                         $file .= $c;
987                         next if (!defined $languages{$l}{$f}{data}{$c}{$e});
988                         print FOUT "LOCALES+=\t$file.$e\n";
989                 }
990
991                 if (defined $languages{$l}{$f}{nc_link}) {
992                         foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
993                                 my $file = $l . "_";
994                                 $file .= $f . "_" if ($f ne "x");
995                                 $file .= $c;
996                                 print FOUT "SAME+=\t\t$file.$e $languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
997                         }
998                 }
999
1000                 if (defined $languages{$l}{$f}{e_link}) {
1001                         foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
1002                                 my @a = split(/:/, $el);
1003                                 my $file = $l . "_";
1004                                 $file .= $f . "_" if ($f ne "x");
1005                                 $file .= $c;
1006                                 print FOUT "SAME+=\t\t$file.$a[0] $file.$a[1]\t# legacy (same charset)\n";
1007                         }
1008                 }
1009
1010         }
1011         }
1012         }
1013
1014         print FOUT <<EOF;
1015
1016 FILES=          \${LOCALES:S/\$/.${SRCOUT2}/}
1017 CLEANFILES=     \${FILES}
1018
1019 .for f t in \${SAME}
1020 SYMLINKS+=      ../\$f/\${FILESNAME} \\
1021     \${LOCALEDIR}/\$t/\${FILESNAME}
1022 .endfor
1023
1024 .for f in \${LOCALES}
1025 FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
1026 .endfor
1027
1028 ${SRCOUT3}.include <bsd.prog.mk>
1029 EOF
1030
1031         close(FOUT);
1032 }