]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - tools/tools/locale/tools/cldr2def.pl
MFV r303083: 7164 zdb should be able to open the root dataset
[FreeBSD/FreeBSD.git] / tools / tools / locale / tools / cldr2def.pl
1 #!/usr/local/bin/perl -wC
2 # $FreeBSD$
3
4 use strict;
5 use File::Copy;
6 use XML::Parser;
7 use Tie::IxHash;
8 use Text::Iconv;
9 use Data::Dumper;
10 use Getopt::Long;
11 use Digest::SHA qw(sha1_hex);
12 require "charmaps.pm";
13
14
15 if ($#ARGV < 2) {
16         print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n";
17         exit(1);
18 }
19
20 my $DEFENCODING = "UTF-8";
21 my @filter = ();
22
23 my $CLDRDIR = undef;
24 my $UNIDATADIR = undef;
25 my $ETCDIR = undef;
26 my $TYPE = undef;
27 my $doonly = undef;
28
29 my $result = GetOptions (
30                 "cldr=s"        => \$CLDRDIR,
31                 "unidata=s"     => \$UNIDATADIR,
32                 "etc=s"         => \$ETCDIR,
33                 "type=s"        => \$TYPE,
34                 "lc=s"          => \$doonly
35             );
36
37 my %convertors = ();
38
39 my %ucd = ();
40 my %values = ();
41 my %hashtable = ();
42 my %languages = ();
43 my %translations = ();
44 my %encodings = ();
45 my %alternativemonths = ();
46 get_languages();
47
48 my %utf8map = ();
49 my %utf8aliases = ();
50 get_unidata($UNIDATADIR);
51 get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm");
52 get_encodings("$ETCDIR/charmaps");
53
54 my %keys = ();
55 tie(%keys, "Tie::IxHash");
56 tie(%hashtable, "Tie::IxHash");
57
58 my %FILESNAMES = (
59         "monetdef"      => "LC_MONETARY",
60         "timedef"       => "LC_TIME",
61         "msgdef"        => "LC_MESSAGES",
62         "numericdef"    => "LC_NUMERIC",
63         "colldef"       => "LC_COLLATE",
64         "ctypedef"      => "LC_CTYPE"
65 );
66
67 my %callback = (
68         mdorder => \&callback_mdorder,
69         altmon => \&callback_altmon,
70         cformat => \&callback_cformat,
71         dformat => \&callback_dformat,
72         dtformat => \&callback_dtformat,
73         cbabmon => \&callback_abmon,
74         cbampm => \&callback_ampm,
75         data => undef,
76 );
77
78 my %DESC = (
79
80         # numericdef
81         "decimal_point" => "decimal_point",
82         "thousands_sep" => "thousands_sep",
83         "grouping"      => "grouping",
84
85         # monetdef
86         "int_curr_symbol"       => "int_curr_symbol (last character always " .
87                                    "SPACE)",
88         "currency_symbol"       => "currency_symbol",
89         "mon_decimal_point"     => "mon_decimal_point",
90         "mon_thousands_sep"     => "mon_thousands_sep",
91         "mon_grouping"          => "mon_grouping",
92         "positive_sign"         => "positive_sign",
93         "negative_sign"         => "negative_sign",
94         "int_frac_digits"       => "int_frac_digits",
95         "frac_digits"           => "frac_digits",
96         "p_cs_precedes"         => "p_cs_precedes",
97         "p_sep_by_space"        => "p_sep_by_space",
98         "n_cs_precedes"         => "n_cs_precedes",
99         "n_sep_by_space"        => "n_sep_by_space",
100         "p_sign_posn"           => "p_sign_posn",
101         "n_sign_posn"           => "n_sign_posn",
102
103         # msgdef
104         "yesexpr"       => "yesexpr",
105         "noexpr"        => "noexpr",
106         "yesstr"        => "yesstr",
107         "nostr"         => "nostr",
108
109         # timedef
110         "abmon"         => "Short month names",
111         "mon"           => "Long month names (as in a date)",
112         "abday"         => "Short weekday names",
113         "day"           => "Long weekday names",
114         "t_fmt"         => "X_fmt",
115         "d_fmt"         => "x_fmt",
116         "c_fmt"         => "c_fmt",
117         "am_pm"         => "AM/PM",
118         "d_t_fmt"       => "date_fmt",
119         "altmon"        => "Long month names (without case ending)",
120         "md_order"      => "md_order",
121         "t_fmt_ampm"    => "ampm_fmt",
122 );
123
124 if ($TYPE eq "colldef") {
125         transform_collation();
126         make_makefile();
127 }
128
129 if ($TYPE eq "ctypedef") {
130         transform_ctypes();
131         make_makefile();
132 }
133
134 if ($TYPE eq "numericdef") {
135         %keys = (
136             "decimal_point"     => "s",
137             "thousands_sep"     => "s",
138             "grouping"          => "ai",
139         );
140         get_fields();
141         print_fields();
142         make_makefile();
143 }
144
145 if ($TYPE eq "monetdef") {
146         %keys = (
147             "int_curr_symbol"   => "s",
148             "currency_symbol"   => "s",
149             "mon_decimal_point" => "s",
150             "mon_thousands_sep" => "s",
151             "mon_grouping"      => "ai",
152             "positive_sign"     => "s",
153             "negative_sign"     => "s",
154             "int_frac_digits"   => "i",
155             "frac_digits"       => "i",
156             "p_cs_precedes"     => "i",
157             "p_sep_by_space"    => "i",
158             "n_cs_precedes"     => "i",
159             "n_sep_by_space"    => "i",
160             "p_sign_posn"       => "i",
161             "n_sign_posn"       => "i"
162         );
163         get_fields();
164         print_fields();
165         make_makefile();
166 }
167
168 if ($TYPE eq "msgdef") {
169         %keys = (
170             "yesexpr"           => "s",
171             "noexpr"            => "s",
172             "yesstr"            => "s",
173             "nostr"             => "s"
174         );
175         get_fields();
176         print_fields();
177         make_makefile();
178 }
179
180 if ($TYPE eq "timedef") {
181         %keys = (
182             "abmon"             => "<cbabmon<abmon<as",
183             "mon"               => "as",
184             "abday"             => "as",
185             "day"               => "as",
186             "t_fmt"             => "s",
187             "d_fmt"             => "<dformat<d_fmt<s",
188             "c_fmt"             => "<cformat<d_t_fmt<s",
189             "am_pm"             => "<cbampm<am_pm<as",
190             "d_t_fmt"           => "<dtformat<d_t_fmt<s",
191             "altmon"            => "<altmon<mon<as",
192             "md_order"          => "<mdorder<d_fmt<s",
193             "t_fmt_ampm"        => "s",
194         );
195         get_fields();
196         print_fields();
197         make_makefile();
198 }
199
200 sub callback_ampm {
201         my $s = shift;
202         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
203         my $enc = $callback{data}{e};
204         my  $converter = Text::Iconv->new("utf-8", "$enc");
205
206         if ($nl eq 'ru_RU') {
207                 if ($enc eq 'UTF-8') {
208                         $s = 'дп;пп';
209                 } else {
210                         $s = $converter->convert("дп;пп");
211                 }
212         }
213         return $s;
214 }
215
216 sub callback_cformat {
217         my $s = shift;
218         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
219
220         $s =~ s/\.,/\./;
221         $s =~ s/ %Z//;
222         $s =~ s/ %z//;
223         $s =~ s/^"(%B %e, )/"%A, $1/;
224         $s =~ s/^"(%e %B )/"%A $1/;
225         return $s;
226 };
227
228 sub callback_dformat {
229         my $s = shift;
230
231         $s =~ s/(%m(<SOLIDUS>|[-.]))%e/$1%d/;
232         $s =~ s/%e((<SOLIDUS>|[-.])%m)/%d$1/;
233         return $s;
234 };
235
236 sub callback_dtformat {
237         my $s = shift;
238         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
239
240         if ($nl eq 'ja_JP') {
241                 $s =~ s/(> )(%H)/$1%A $2/;
242         }
243         $s =~ s/\.,/\./;
244         $s =~ s/^"(%B %e, )/"%A, $1/;
245         $s =~ s/^"(%e %B )/"%A $1/;
246         return $s;
247 };
248
249 sub callback_mdorder {
250         my $s = shift;
251         return undef if (!defined $s);
252         $s =~ s/[^dem]//g;
253         $s =~ s/e/d/g;
254         return $s;
255 };
256
257 sub callback_altmon {
258         # if the language/country is known in %alternative months then
259         # return that, otherwise repeat mon
260         my $s = shift;
261
262         if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
263                 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
264                 my @cleaned;
265                 foreach (@altnames)
266                 {
267                         $_ =~ s/^\s+//;
268                         $_ =~ s/\s+$//;
269                         push @cleaned, $_;
270                 }
271                 return join(";",@cleaned);
272         }
273
274         return $s;
275 }
276
277 sub callback_abmon {
278         # for specified CJK locales, pad result with a space to enable
279         # columns to line up (style established in FreeBSD in 2001)
280         my $s = shift;
281         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
282
283         if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' ||
284             $nl eq 'zh_HK' || $nl eq 'zh_TW') {
285                 my @monthnames = split(";", $s);
286                 my @cleaned;
287                 foreach (@monthnames)
288                 {
289                         if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ ||
290                            ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/))
291                         {
292                                 $_ =~ s/^"/"<space>/;
293                         }
294                         push @cleaned, $_;
295                 }
296                 return join(";",@cleaned);
297         }
298         return $s;
299 }
300
301 ############################
302
303 sub get_unidata {
304         my $directory = shift;
305
306         open(FIN, "$directory/UnicodeData.txt")
307             or die("Cannot open $directory/UnicodeData.txt");;
308         my @lines = <FIN>;
309         chomp(@lines);
310         close(FIN);
311
312         foreach my $l (@lines) {
313                 my @a = split(/;/, $l);
314
315                 $ucd{code2name}{"$a[0]"} = $a[1];       # Unicode name
316                 $ucd{name2code}{"$a[1]"} = $a[0];       # Unicode code
317         }
318 }
319
320 sub get_utf8map {
321         my $file = shift;
322
323         open(FIN, $file);
324         my @lines = <FIN>;
325         close(FIN);
326         chomp(@lines);
327
328         my $prev_k = undef;
329         my $prev_v = "";
330         my $incharmap = 0;
331         foreach my $l (@lines) {
332                 $l =~ s/\r//;
333                 next if ($l =~ /^\#/);
334                 next if ($l eq "");
335
336                 if ($l eq "CHARMAP") {
337                         $incharmap = 1;
338                         next;
339                 }
340
341                 next if (!$incharmap);
342                 last if ($l eq "END CHARMAP");
343
344                 $l =~ /^<([^\s]+)>\s+(.*)/;
345                 my $k = $1;
346                 my $v = $2;
347                 $k =~ s/_/ /g;          # unicode char string
348                 $v =~ s/\\x//g;         # UTF-8 char code
349                 $utf8map{$k} = $v;
350
351                 $utf8aliases{$k} = $prev_k if ($prev_v eq $v);
352
353                 $prev_v = $v;
354                 $prev_k = $k;
355         }
356 }
357
358 sub get_encodings {
359         my $dir = shift;
360         foreach my $e (sort(keys(%encodings))) {
361                 if (!open(FIN, "$dir/$e.TXT")) {
362                         print "Cannot open charmap for $e\n";
363                         next;
364
365                 }
366                 $encodings{$e} = 1;
367                 my @lines = <FIN>;
368                 close(FIN);
369                 chomp(@lines);
370                 foreach my $l (@lines) {
371                         $l =~ s/\r//;
372                         next if ($l =~ /^\#/);
373                         next if ($l eq "");
374
375                         my @a = split(" ", $l);
376                         next if ($#a < 1);
377                         $a[0] =~ s/^0[xX]//;    # local char code
378                         $a[1] =~ s/^0[xX]//;    # unicode char code
379                         $convertors{$e}{uc($a[1])} = uc($a[0]);
380                 }
381         }
382 }
383
384 sub get_languages {
385         my %data = get_xmldata($ETCDIR);
386         %languages = %{$data{L}}; 
387         %translations = %{$data{T}}; 
388         %alternativemonths = %{$data{AM}}; 
389         %encodings = %{$data{E}}; 
390
391         return if (!defined $doonly);
392
393         my @a = split(/_/, $doonly);
394         if ($#a == 1) {
395                 $filter[0] = $a[0];
396                 $filter[1] = "x";
397                 $filter[2] = $a[1];
398         } elsif ($#a == 2) {
399                 $filter[0] = $a[0];
400                 $filter[1] = $a[1];
401                 $filter[2] = $a[2];
402         }
403
404         print Dumper(@filter);
405         return;
406 }
407
408 sub transform_ctypes {
409         foreach my $l (sort keys(%languages)) {
410         foreach my $f (sort keys(%{$languages{$l}})) {
411         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
412                 next if ($#filter == 2 && ($filter[0] ne $l
413                     || $filter[1] ne $f || $filter[2] ne $c));
414                 next if (defined $languages{$l}{$f}{definitions}
415                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
416                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
417                 my $file;
418                 $file = $l . "_";
419                 $file .= $f . "_" if ($f ne "x");
420                 $file .= $c;
421                 my $actfile = $file;
422
423                 my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src";
424                 if (! -f $filename) {
425                         print STDERR "Cannot open $filename\n";
426                         next;
427                 }
428                 open(FIN, "$filename");
429                 print "Reading from $filename for ${l}_${f}_${c}\n";
430                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
431                 my @lines;
432                 my $shex;
433                 my $uhex;
434                 while (<FIN>) {
435                         push @lines, $_;
436                 }
437                 close(FIN);
438                 $shex = sha1_hex(join("\n", @lines));
439                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
440                 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
441                 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
442                 print FOUT @lines;
443                 close(FOUT);
444                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
445                         next if ($enc eq $DEFENCODING);
446                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
447                         if (! -f $filename) {
448                                 print STDERR "Cannot open $filename\n";
449                                 next;
450                         }
451                         @lines = ();
452                         open(FIN, "$filename");
453                         while (<FIN>) {
454                                 if ((/^comment_char\s/) || (/^escape_char\s/)){
455                                         push @lines, $_;
456                                 }
457                                 if (/^LC_CTYPE/../^END LC_CTYPE/) {
458                                         push @lines, $_;
459                                 }
460                         }
461                         close(FIN);
462                         $uhex = sha1_hex(join("\n", @lines) . $enc);
463                         $languages{$l}{$f}{data}{$c}{$enc} = $uhex;
464                         $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
465                         open(FOUT, ">$TYPE.draft/$actfile.$enc.src");
466                         print FOUT <<EOF;
467 # Warning: Do not edit. This file is automatically extracted from the
468 # tools in /usr/src/tools/tools/locale. The data is obtained from the
469 # CLDR project, obtained from http://cldr.unicode.org/
470 # -----------------------------------------------------------------------------
471 EOF
472                         print FOUT @lines;
473                         close(FOUT);
474                 }
475         }
476         }
477         }
478 }
479
480
481 sub transform_collation {
482         foreach my $l (sort keys(%languages)) {
483         foreach my $f (sort keys(%{$languages{$l}})) {
484         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
485                 next if ($#filter == 2 && ($filter[0] ne $l
486                     || $filter[1] ne $f || $filter[2] ne $c));
487                 next if (defined $languages{$l}{$f}{definitions}
488                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
489                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
490                 my $file;
491                 $file = $l . "_";
492                 $file .= $f . "_" if ($f ne "x");
493                 $file .= $c;
494                 my $actfile = $file;
495
496                 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
497                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
498                     if (! -f $filename);
499                 if (! -f $filename
500                  && defined $languages{$l}{$f}{fallback}) {
501                         $file = $languages{$l}{$f}{fallback};
502                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
503                 }
504                 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
505                     if (! -f $filename);
506                 if (! -f $filename) {
507                         print STDERR
508                             "Cannot open $file.$DEFENCODING.src or fallback\n";
509                         next;
510                 }
511                 open(FIN, "$filename");
512                 print "Reading from $filename for ${l}_${f}_${c}\n";
513                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
514                 my @lines;
515                 my $shex;
516                 while (<FIN>) {
517                         if ((/^comment_char\s/) || (/^escape_char\s/)){
518                                 push @lines, $_;
519                         }
520                         if (/^LC_COLLATE/../^END LC_COLLATE/) {
521                                 $_ =~ s/[ ]+/ /g;
522                                 push @lines, $_;
523                         }
524                 }
525                 close(FIN);
526                 $shex = sha1_hex(join("\n", @lines));
527                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
528                 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
529                 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
530                 print FOUT <<EOF;
531 # Warning: Do not edit. This file is automatically extracted from the
532 # tools in /usr/src/tools/tools/locale. The data is obtained from the
533 # CLDR project, obtained from http://cldr.unicode.org/
534 # -----------------------------------------------------------------------------
535 EOF
536                 print FOUT @lines;
537                 close(FOUT);
538
539                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
540                         next if ($enc eq $DEFENCODING);
541                         copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
542                               "$TYPE.draft/$actfile.$enc.src");
543                         $languages{$l}{$f}{data}{$c}{$enc} = $shex;
544                         $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
545                 }
546         }
547         }
548         }
549 }
550
551 sub get_fields {
552         foreach my $l (sort keys(%languages)) {
553         foreach my $f (sort keys(%{$languages{$l}})) {
554         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
555                 next if ($#filter == 2 && ($filter[0] ne $l
556                     || $filter[1] ne $f || $filter[2] ne $c));
557                 next if (defined $languages{$l}{$f}{definitions}
558                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
559
560                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
561                 my $file;
562                 $file = $l . "_";
563                 $file .= $f . "_" if ($f ne "x");
564                 $file .= $c;
565
566                 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
567                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
568                     if (! -f $filename);
569                 if (! -f $filename
570                  && defined $languages{$l}{$f}{fallback}) {
571                         $file = $languages{$l}{$f}{fallback};
572                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
573                 }
574                 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
575                     if (! -f $filename);
576                 if (! -f $filename) {
577                         print STDERR
578                             "Cannot open $file.$DEFENCODING.src or fallback\n";
579                         next;
580                 }
581                 open(FIN, "$filename");
582                 print "Reading from $filename for ${l}_${f}_${c}\n";
583                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
584                 my @lines = <FIN>;
585                 chomp(@lines);
586                 close(FIN);
587                 my $continue = 0;
588                 foreach my $k (keys(%keys)) {
589                         foreach my $line (@lines) {
590                                 $line =~ s/\r//;
591                                 next if (!$continue && $line !~ /^$k\s/);
592                                 if ($continue) {
593                                         $line =~ s/^\s+//;
594                                 } else {
595                                         $line =~ s/^$k\s+//;
596                                 }
597
598                                 $values{$l}{$f}{$c}{$k} = ""
599                                         if (!defined $values{$l}{$f}{$c}{$k});
600
601                                 $continue = ($line =~ /\/$/);
602                                 $line =~ s/\/$// if ($continue);
603
604                                 while ($line =~ /_/) {
605                                         $line =~
606                                             s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
607                                 }
608                                 die "_ in data - $line" if ($line =~ /_/);
609                                 $values{$l}{$f}{$c}{$k} .= $line;
610
611                                 last if (!$continue);
612                         }
613                 }
614         }
615         }
616         }
617 }
618
619 sub decodecldr {
620         my $e = shift;
621         my $s = shift;
622
623         my $v = undef;
624
625         if ($e eq "UTF-8") {
626                 #
627                 # Conversion to UTF-8 can be done from the Unicode name to
628                 # the UTF-8 character code.
629                 #
630                 $v = $utf8map{$s};
631                 die "Cannot convert $s in $e (charmap)" if (!defined $v);
632         } else {
633                 #
634                 # Conversion to these encodings can be done from the Unicode
635                 # name to Unicode code to the encodings code.
636                 #
637                 my $ucc = undef;
638                 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
639                 $ucc = $ucd{name2code}{$utf8aliases{$s}}
640                         if (!defined $ucc
641                          && $utf8aliases{$s}
642                          && defined $ucd{name2code}{$utf8aliases{$s}});
643
644                 if (!defined $ucc) {
645                         if (defined $translations{$e}{$s}{hex}) {
646                                 $v = $translations{$e}{$s}{hex};
647                                 $ucc = 0;
648                         } elsif (defined $translations{$e}{$s}{ucc}) {
649                                 $ucc = $translations{$e}{$s}{ucc};
650                         }
651                 }
652
653                 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
654                 $v = $convertors{$e}{$ucc} if (!defined $v);
655
656                 $v = $translations{$e}{$s}{hex}
657                         if (!defined $v && defined $translations{$e}{$s}{hex});
658
659                 if (!defined $v && defined $translations{$e}{$s}{unicode}) {
660                         my $ucn = $translations{$e}{$s}{unicode};
661                         $ucc = $ucd{name2code}{$ucn}
662                                 if (defined $ucd{name2code}{$ucn});
663                         $ucc = $ucd{name2code}{$utf8aliases{$ucn}}
664                                 if (!defined $ucc
665                                  && defined $ucd{name2code}{$utf8aliases{$ucn}});
666                         $v = $convertors{$e}{$ucc};
667                 }
668
669                 die "Cannot convert $s in $e (charmap)" if (!defined $v);
670         }
671
672         return pack("C", hex($v)) if (length($v) == 2);
673         return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
674                 if (length($v) == 4);
675         return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
676             hex(substr($v, 4, 2))) if (length($v) == 6);
677         print STDERR "Cannot convert $e $s\n";
678         return "length = " . length($v);
679
680 }
681
682 sub translate {
683         my $enc = shift;
684         my $v = shift;
685
686         return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
687         return undef;
688 }
689
690 sub print_fields {
691         foreach my $l (sort keys(%languages)) {
692         foreach my $f (sort keys(%{$languages{$l}})) {
693         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
694                 next if ($#filter == 2 && ($filter[0] ne $l
695                     || $filter[1] ne $f || $filter[2] ne $c));
696                 next if (defined $languages{$l}{$f}{definitions}
697                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
698                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
699                         if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
700                                 print "Skipping ${l}_" .
701                                     ($f eq "x" ? "" : "${f}_") .
702                                     "${c} - not read\n";
703                                 next;
704                         }
705                         my $file = $l;
706                         $file .= "_" . $f if ($f ne "x");
707                         $file .= "_" . $c;
708                         print "Writing to $file in $enc\n";
709
710                         if ($enc ne $DEFENCODING &&
711                             !defined $convertors{$enc}) {
712                                 print "Failed! Cannot convert to $enc.\n";
713                                 next;
714                         };
715
716                         open(FOUT, ">$TYPE.draft/$file.$enc.new");
717                         my $okay = 1;
718                         my $output = "";
719                         print FOUT <<EOF;
720 # Warning: Do not edit. This file is automatically generated from the
721 # tools in /usr/src/tools/tools/locale. The data is obtained from the
722 # CLDR project, obtained from http://cldr.unicode.org/
723 # -----------------------------------------------------------------------------
724 EOF
725                         foreach my $k (keys(%keys)) {
726                                 my $g = $keys{$k};
727
728                                 die("Unknown $k in \%DESC")
729                                         if (!defined $DESC{$k});
730
731                                 $output .= "#\n# $DESC{$k}\n";
732
733                                 # Replace one row with another
734                                 if ($g =~ /^>/) {
735                                         $k = substr($g, 1);
736                                         $g = $keys{$k};
737                                 }
738
739                                 # Callback function
740                                 if ($g =~ /^\</) {
741                                         $callback{data}{c} = $c;
742                                         $callback{data}{k} = $k;
743                                         $callback{data}{f} = $f;
744                                         $callback{data}{l} = $l;
745                                         $callback{data}{e} = $enc;
746                                         my @a = split(/\</, substr($g, 1));
747                                         my $rv =
748                                             &{$callback{$a[0]}}($values{$l}{$f}{$c}{$a[1]});
749                                         $values{$l}{$f}{$c}{$k} = $rv;
750                                         $g = $a[2];
751                                         $callback{data} = ();
752                                 }
753
754                                 my $v = $values{$l}{$f}{$c}{$k};
755                                 $v = "undef" if (!defined $v);
756
757                                 if ($g eq "i") {
758                                         $output .= "$v\n";
759                                         next;
760                                 }
761                                 if ($g eq "ai") {
762                                         $output .= "$v\n";
763                                         next;
764                                 }
765                                 if ($g eq "s") {
766                                         $v =~ s/^"//;
767                                         $v =~ s/"$//;
768                                         my $cm = "";
769                                         while ($v =~ /^(.*?)<(.*?)>(.*)/) {
770                                                 my $p1 = $1;
771                                                 $cm = $2;
772                                                 my $p3 = $3;
773
774                                                 my $rv = decodecldr($enc, $cm);
775 #                                               $rv = translate($enc, $cm)
776 #                                                       if (!defined $rv);
777                                                 if (!defined $rv) {
778                                                         print STDERR 
779 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
780                                                         $okay = 0;
781                                                         next;
782                                                 }
783
784                                                 $v = $p1 . $rv . $p3;
785                                         }
786                                         $output .= "$v\n";
787                                         next;
788                                 }
789                                 if ($g eq "as") {
790                                         foreach my $v (split(/;/, $v)) {
791                                                 $v =~ s/^"//;
792                                                 $v =~ s/"$//;
793                                                 my $cm = "";
794                                                 while ($v =~ /^(.*?)<(.*?)>(.*)/) {
795                                                         my $p1 = $1;
796                                                         $cm = $2;
797                                                         my $p3 = $3;
798
799                                                         my $rv =
800                                                             decodecldr($enc,
801                                                                 $cm);
802 #                                                       $rv = translate($enc,
803 #                                                           $cm)
804 #                                                           if (!defined $rv);
805                                                         if (!defined $rv) {
806                                                                 print STDERR 
807 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
808                                                                 $okay = 0;
809                                                                 next;
810                                                         }
811
812                                                         $v = $1 . $rv . $3;
813                                                 }
814                                                 $output .= "$v\n";
815                                         }
816                                         next;
817                                 }
818
819                                 die("$k is '$g'");
820
821                         }
822
823                         $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
824                         $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
825                         print FOUT "$output# EOF\n";
826                         close(FOUT);
827
828                         if ($okay) {
829                                 rename("$TYPE.draft/$file.$enc.new",
830                                     "$TYPE.draft/$file.$enc.src");
831                         } else {
832                                 rename("$TYPE.draft/$file.$enc.new",
833                                     "$TYPE.draft/$file.$enc.failed");
834                         }
835                 }
836         }
837         }
838         }
839 }
840
841 sub make_makefile {
842         return if ($#filter > -1);
843         print "Creating Makefile for $TYPE\n";
844         my $SRCOUT;
845         my $SRCOUT2;
846         my $SRCOUT3 = "";
847         my $SRCOUT4 = "";
848         my $MAPLOC;
849         if ($TYPE eq "colldef") {
850                 $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
851                         "\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " .
852                         "\${.OBJDIR}/\${.IMPSRC:T:R}";
853                 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
854                                 "locale/etc/final-maps\n";
855                 $SRCOUT2 = "LC_COLLATE";
856                 $SRCOUT3 = "" .
857                         ".for f t in \${LOCALES_MAPPED}\n" .
858                         "FILES+=\t\$t.LC_COLLATE\n" .
859                         "FILESDIR_\$t.LC_COLLATE=\t\${LOCALEDIR}/\$t\n" .
860                         "\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" .
861                         "\tlocaledef -D -U -i \${.ALLSRC} \\\n" .
862                         "\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" .
863                         "\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" .
864                         ".endfor\n\n";
865                 $SRCOUT4 = "## LOCALES_MAPPED\n";
866         }
867         elsif ($TYPE eq "ctypedef") {
868                 $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
869                         "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:E} " .
870                         "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
871                         " || true";
872                 $SRCOUT2 = "LC_CTYPE";
873                 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
874                                 "locale/etc/final-maps\n";
875                 $SRCOUT3 = "## SYMPAIRS\n\n" .
876                         ".for s t in \${SYMPAIRS}\n" .
877                         "\${t:S/src\$/LC_CTYPE/}: " .
878                         "\$s\n" .
879                         "\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
880                         "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " .
881                         "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " .
882                         " || true\n" .
883                         ".endfor\n\n";
884         }
885         else {
886                 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
887                 $SRCOUT2 = "out";
888                 $MAPLOC = "";
889         }
890         open(FOUT, ">$TYPE.draft/Makefile");
891         print FOUT <<EOF;
892 # \$FreeBSD\$
893 # Warning: Do not edit. This file is automatically generated from the
894 # tools in /usr/src/tools/tools/locale.
895
896 LOCALEDIR=      \${SHAREDIR}/locale
897 FILESNAME=      $FILESNAMES{$TYPE}
898 .SUFFIXES:      .src .${SRCOUT2}
899 ${MAPLOC}
900 .src.${SRCOUT2}:
901         $SRCOUT
902
903 ## PLACEHOLDER
904
905 ${SRCOUT4}
906
907 EOF
908
909         foreach my $hash (keys(%hashtable)) {
910                 # For colldef, weight LOCALES to UTF-8
911                 #     Sort as upper-case and reverse to achieve it
912                 #     Make en_US, ru_RU, and ca_AD preferred
913                 my @files;
914                 if ($TYPE eq "colldef") {
915                         @files = sort {
916                                 if ($a eq 'en_x_US.UTF-8' ||
917                                     $a eq 'ru_x_RU.UTF-8' ||
918                                     $a eq 'ca_x_AD.UTF-8') { return -1; }
919                                 elsif ($b eq 'en_x_US.UTF-8' ||
920                                        $b eq 'ru_x_RU.UTF-8' ||
921                                        $b eq 'ca_x_AD.UTF-8') { return 1; }
922                                 else { return uc($b) cmp uc($a); }
923                                 } keys(%{$hashtable{$hash}});
924                 } elsif ($TYPE eq "ctypedef") {
925                         @files = sort {
926                                 if ($a eq 'en_x_US.UTF-8') { return -1; }
927                                 elsif ($b eq 'en_x_US.UTF-8') { return 1; }
928                                 if ($a =~ /^en_x_US/) { return -1; }
929                                 elsif ($b =~ /^en_x_US/) { return 1; }
930
931                                 if ($a =~ /^en_x_GB.ISO8859-15/ ||
932                                     $a =~ /^ru_x_RU/) { return -1; }
933                                 elsif ($b =~ /^en_x_GB.ISO8859-15/ ||
934                                        $b =~ /ru_x_RU/) { return 1; }
935                                 else { return uc($b) cmp uc($a); }
936
937                                 } keys(%{$hashtable{$hash}});
938                 } else {
939                         @files = sort {
940                                 if ($a =~ /_Comm_/ ||
941                                     $b eq 'en_x_US.UTF-8') { return 1; }
942                                 elsif ($b =~ /_Comm_/ ||
943                                        $a eq 'en_x_US.UTF-8') { return -1; }
944                                 else { return uc($b) cmp uc($a); }
945                                 } keys(%{$hashtable{$hash}});
946                 }
947                 if ($#files > 0) {
948                         my $link = shift(@files);
949                         $link =~ s/_x_/_/;      # strip family if none there
950                         foreach my $file (@files) {
951                                 my @a = split(/_/, $file);
952                                 my @b = split(/\./, $a[-1]);
953                                 $file =~ s/_x_/_/;
954                                 print FOUT "SAME+=\t\t$link $file\n";
955                                 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
956                         }
957                 }
958         }
959
960         foreach my $l (sort keys(%languages)) {
961         foreach my $f (sort keys(%{$languages{$l}})) {
962         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
963                 next if ($#filter == 2 && ($filter[0] ne $l
964                     || $filter[1] ne $f || $filter[2] ne $c));
965                 next if (defined $languages{$l}{$f}{definitions}
966                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
967                 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
968                  && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
969                         print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
970                             "${c} - not read\n";
971                         next;
972                 }
973                 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
974                         my $file = $l . "_";
975                         $file .= $f . "_" if ($f ne "x");
976                         $file .= $c;
977                         next if (!defined $languages{$l}{$f}{data}{$c}{$e});
978                         print FOUT "LOCALES+=\t$file.$e\n";
979                 }
980
981                 if (defined $languages{$l}{$f}{nc_link}) {
982                         foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
983                                 my $file = $l . "_";
984                                 $file .= $f . "_" if ($f ne "x");
985                                 $file .= $c;
986                                 print FOUT "SAME+=\t\t$file.$e $languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
987                         }
988                 }
989
990                 if (defined $languages{$l}{$f}{e_link}) {
991                         foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
992                                 my @a = split(/:/, $el);
993                                 my $file = $l . "_";
994                                 $file .= $f . "_" if ($f ne "x");
995                                 $file .= $c;
996                                 print FOUT "SAME+=\t\t$file.$a[0] $file.$a[1]\t# legacy (same charset)\n";
997                         }
998                 }
999
1000         }
1001         }
1002         }
1003
1004         print FOUT <<EOF;
1005
1006 FILES=          \${LOCALES:S/\$/.${SRCOUT2}/}
1007 CLEANFILES=     \${FILES}
1008
1009 .for f t in \${SAME}
1010 SYMLINKS+=      ../\$f/\${FILESNAME} \\
1011     \${LOCALEDIR}/\$t/\${FILESNAME}
1012 .endfor
1013
1014 .for f in \${LOCALES}
1015 FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
1016 .endfor
1017
1018 ${SRCOUT3}.include <bsd.prog.mk>
1019 EOF
1020
1021         close(FOUT);
1022 }