]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - tools/tools/locale/tools/cldr2def.pl
Merge bmake-20160606
[FreeBSD/FreeBSD.git] / tools / tools / locale / tools / cldr2def.pl
1 #!/usr/local/bin/perl -wC
2 # $FreeBSD$
3
4 use strict;
5 use File::Copy;
6 use XML::Parser;
7 use Tie::IxHash;
8 use Text::Iconv;
9 use Data::Dumper;
10 use Getopt::Long;
11 use Digest::SHA qw(sha1_hex);
12 require "charmaps.pm";
13
14
15 if ($#ARGV < 2) {
16         print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n";
17         exit(1);
18 }
19
20 my $DEFENCODING = "UTF-8";
21 my @filter = ();
22
23 my $CLDRDIR = undef;
24 my $UNIDATADIR = undef;
25 my $ETCDIR = undef;
26 my $TYPE = undef;
27 my $doonly = undef;
28
29 my $result = GetOptions (
30                 "cldr=s"        => \$CLDRDIR,
31                 "unidata=s"     => \$UNIDATADIR,
32                 "etc=s"         => \$ETCDIR,
33                 "type=s"        => \$TYPE,
34                 "lc=s"          => \$doonly
35             );
36
37 my %convertors = ();
38
39 my %ucd = ();
40 my %values = ();
41 my %hashtable = ();
42 my %languages = ();
43 my %translations = ();
44 my %encodings = ();
45 my %alternativemonths = ();
46 get_languages();
47
48 my %utf8map = ();
49 my %utf8aliases = ();
50 get_unidata($UNIDATADIR);
51 get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm");
52 get_encodings("$ETCDIR/charmaps");
53
54 my %keys = ();
55 tie(%keys, "Tie::IxHash");
56 tie(%hashtable, "Tie::IxHash");
57
58 my %FILESNAMES = (
59         "monetdef"      => "LC_MONETARY",
60         "timedef"       => "LC_TIME",
61         "msgdef"        => "LC_MESSAGES",
62         "numericdef"    => "LC_NUMERIC",
63         "colldef"       => "LC_COLLATE",
64         "ctypedef"      => "LC_CTYPE"
65 );
66
67 my %callback = (
68         mdorder => \&callback_mdorder,
69         altmon => \&callback_altmon,
70         cformat => \&callback_cformat,
71         dtformat => \&callback_dtformat,
72         cbabmon => \&callback_abmon,
73         cbampm => \&callback_ampm,
74         data => undef,
75 );
76
77 my %DESC = (
78
79         # numericdef
80         "decimal_point" => "decimal_point",
81         "thousands_sep" => "thousands_sep",
82         "grouping"      => "grouping",
83
84         # monetdef
85         "int_curr_symbol"       => "int_curr_symbol (last character always " .
86                                    "SPACE)",
87         "currency_symbol"       => "currency_symbol",
88         "mon_decimal_point"     => "mon_decimal_point",
89         "mon_thousands_sep"     => "mon_thousands_sep",
90         "mon_grouping"          => "mon_grouping",
91         "positive_sign"         => "positive_sign",
92         "negative_sign"         => "negative_sign",
93         "int_frac_digits"       => "int_frac_digits",
94         "frac_digits"           => "frac_digits",
95         "p_cs_precedes"         => "p_cs_precedes",
96         "p_sep_by_space"        => "p_sep_by_space",
97         "n_cs_precedes"         => "n_cs_precedes",
98         "n_sep_by_space"        => "n_sep_by_space",
99         "p_sign_posn"           => "p_sign_posn",
100         "n_sign_posn"           => "n_sign_posn",
101
102         # msgdef
103         "yesexpr"       => "yesexpr",
104         "noexpr"        => "noexpr",
105         "yesstr"        => "yesstr",
106         "nostr"         => "nostr",
107
108         # timedef
109         "abmon"         => "Short month names",
110         "mon"           => "Long month names (as in a date)",
111         "abday"         => "Short weekday names",
112         "day"           => "Long weekday names",
113         "t_fmt"         => "X_fmt",
114         "d_fmt"         => "x_fmt",
115         "c_fmt"         => "c_fmt",
116         "am_pm"         => "AM/PM",
117         "d_t_fmt"       => "date_fmt",
118         "altmon"        => "Long month names (without case ending)",
119         "md_order"      => "md_order",
120         "t_fmt_ampm"    => "ampm_fmt",
121 );
122
123 if ($TYPE eq "colldef") {
124         transform_collation();
125         make_makefile();
126 }
127
128 if ($TYPE eq "ctypedef") {
129         transform_ctypes();
130         make_makefile();
131 }
132
133 if ($TYPE eq "numericdef") {
134         %keys = (
135             "decimal_point"     => "s",
136             "thousands_sep"     => "s",
137             "grouping"          => "ai",
138         );
139         get_fields();
140         print_fields();
141         make_makefile();
142 }
143
144 if ($TYPE eq "monetdef") {
145         %keys = (
146             "int_curr_symbol"   => "s",
147             "currency_symbol"   => "s",
148             "mon_decimal_point" => "s",
149             "mon_thousands_sep" => "s",
150             "mon_grouping"      => "ai",
151             "positive_sign"     => "s",
152             "negative_sign"     => "s",
153             "int_frac_digits"   => "i",
154             "frac_digits"       => "i",
155             "p_cs_precedes"     => "i",
156             "p_sep_by_space"    => "i",
157             "n_cs_precedes"     => "i",
158             "n_sep_by_space"    => "i",
159             "p_sign_posn"       => "i",
160             "n_sign_posn"       => "i"
161         );
162         get_fields();
163         print_fields();
164         make_makefile();
165 }
166
167 if ($TYPE eq "msgdef") {
168         %keys = (
169             "yesexpr"           => "s",
170             "noexpr"            => "s",
171             "yesstr"            => "s",
172             "nostr"             => "s"
173         );
174         get_fields();
175         print_fields();
176         make_makefile();
177 }
178
179 if ($TYPE eq "timedef") {
180         %keys = (
181             "abmon"             => "<cbabmon<abmon<as",
182             "mon"               => "as",
183             "abday"             => "as",
184             "day"               => "as",
185             "t_fmt"             => "s",
186             "d_fmt"             => "s",
187             "c_fmt"             => "<cformat<d_t_fmt<s",
188             "am_pm"             => "<cbampm<am_pm<as",
189             "d_fmt"             => "s",
190             "d_t_fmt"           => "<dtformat<d_t_fmt<s",
191             "altmon"            => "<altmon<mon<as",
192             "md_order"          => "<mdorder<d_fmt<s",
193             "t_fmt_ampm"        => "s",
194         );
195         get_fields();
196         print_fields();
197         make_makefile();
198 }
199
200 sub callback_ampm {
201         my $s = shift;
202         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
203         my $enc = $callback{data}{e};
204         my  $converter = Text::Iconv->new("utf-8", "$enc");
205
206         if ($nl eq 'ru_RU') {
207                 if ($enc eq 'UTF-8') {
208                         $s = 'дп;пп';
209                 } else {
210                         $s = $converter->convert("дп;пп");
211                 }
212         }
213         return $s;
214 }
215
216 sub callback_cformat {
217         my $s = shift;
218         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
219
220         $s =~ s/\.,/\./;
221         $s =~ s/ %Z//;
222         $s =~ s/ %z//;
223         $s =~ s/^"(%B %e, )/"%A, $1/;
224         $s =~ s/^"(%e %B )/"%A $1/;
225         return $s;
226 };
227
228 sub callback_dtformat {
229         my $s = shift;
230         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
231
232         if ($nl eq 'ja_JP') {
233                 $s =~ s/(> )(%H)/$1%A $2/;
234         }
235         $s =~ s/\.,/\./;
236         $s =~ s/^"(%B %e, )/"%A, $1/;
237         $s =~ s/^"(%e %B )/"%A $1/;
238         return $s;
239 };
240
241 sub callback_mdorder {
242         my $s = shift;
243         return undef if (!defined $s);
244         $s =~ s/[^dm]//g;
245         return $s;
246 };
247
248 sub callback_altmon {
249         # if the language/country is known in %alternative months then
250         # return that, otherwise repeat mon
251         my $s = shift;
252
253         if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
254                 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
255                 my @cleaned;
256                 foreach (@altnames)
257                 {
258                         $_ =~ s/^\s+//;
259                         $_ =~ s/\s+$//;
260                         push @cleaned, $_;
261                 }
262                 return join(";",@cleaned);
263         }
264
265         return $s;
266 }
267
268 sub callback_abmon {
269         # for specified CJK locales, pad result with a space to enable
270         # columns to line up (style established in FreeBSD in 2001)
271         my $s = shift;
272         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
273
274         if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' ||
275             $nl eq 'zh_HK' || $nl eq 'zh_TW') {
276                 my @monthnames = split(";", $s);
277                 my @cleaned;
278                 foreach (@monthnames)
279                 {
280                         if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ ||
281                            ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/))
282                         {
283                                 $_ =~ s/^"/"<space>/;
284                         }
285                         push @cleaned, $_;
286                 }
287                 return join(";",@cleaned);
288         }
289         return $s;
290 }
291
292 ############################
293
294 sub get_unidata {
295         my $directory = shift;
296
297         open(FIN, "$directory/UnicodeData.txt")
298             or die("Cannot open $directory/UnicodeData.txt");;
299         my @lines = <FIN>;
300         chomp(@lines);
301         close(FIN);
302
303         foreach my $l (@lines) {
304                 my @a = split(/;/, $l);
305
306                 $ucd{code2name}{"$a[0]"} = $a[1];       # Unicode name
307                 $ucd{name2code}{"$a[1]"} = $a[0];       # Unicode code
308         }
309 }
310
311 sub get_utf8map {
312         my $file = shift;
313
314         open(FIN, $file);
315         my @lines = <FIN>;
316         close(FIN);
317         chomp(@lines);
318
319         my $prev_k = undef;
320         my $prev_v = "";
321         my $incharmap = 0;
322         foreach my $l (@lines) {
323                 $l =~ s/\r//;
324                 next if ($l =~ /^\#/);
325                 next if ($l eq "");
326
327                 if ($l eq "CHARMAP") {
328                         $incharmap = 1;
329                         next;
330                 }
331
332                 next if (!$incharmap);
333                 last if ($l eq "END CHARMAP");
334
335                 $l =~ /^<([^\s]+)>\s+(.*)/;
336                 my $k = $1;
337                 my $v = $2;
338                 $k =~ s/_/ /g;          # unicode char string
339                 $v =~ s/\\x//g;         # UTF-8 char code
340                 $utf8map{$k} = $v;
341
342                 $utf8aliases{$k} = $prev_k if ($prev_v eq $v);
343
344                 $prev_v = $v;
345                 $prev_k = $k;
346         }
347 }
348
349 sub get_encodings {
350         my $dir = shift;
351         foreach my $e (sort(keys(%encodings))) {
352                 if (!open(FIN, "$dir/$e.TXT")) {
353                         print "Cannot open charmap for $e\n";
354                         next;
355
356                 }
357                 $encodings{$e} = 1;
358                 my @lines = <FIN>;
359                 close(FIN);
360                 chomp(@lines);
361                 foreach my $l (@lines) {
362                         $l =~ s/\r//;
363                         next if ($l =~ /^\#/);
364                         next if ($l eq "");
365
366                         my @a = split(" ", $l);
367                         next if ($#a < 1);
368                         $a[0] =~ s/^0[xX]//;    # local char code
369                         $a[1] =~ s/^0[xX]//;    # unicode char code
370                         $convertors{$e}{uc($a[1])} = uc($a[0]);
371                 }
372         }
373 }
374
375 sub get_languages {
376         my %data = get_xmldata($ETCDIR);
377         %languages = %{$data{L}}; 
378         %translations = %{$data{T}}; 
379         %alternativemonths = %{$data{AM}}; 
380         %encodings = %{$data{E}}; 
381
382         return if (!defined $doonly);
383
384         my @a = split(/_/, $doonly);
385         if ($#a == 1) {
386                 $filter[0] = $a[0];
387                 $filter[1] = "x";
388                 $filter[2] = $a[1];
389         } elsif ($#a == 2) {
390                 $filter[0] = $a[0];
391                 $filter[1] = $a[1];
392                 $filter[2] = $a[2];
393         }
394
395         print Dumper(@filter);
396         return;
397 }
398
399 sub transform_ctypes {
400         foreach my $l (sort keys(%languages)) {
401         foreach my $f (sort keys(%{$languages{$l}})) {
402         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
403                 next if ($#filter == 2 && ($filter[0] ne $l
404                     || $filter[1] ne $f || $filter[2] ne $c));
405                 next if (defined $languages{$l}{$f}{definitions}
406                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
407                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
408                 my $file;
409                 $file = $l . "_";
410                 $file .= $f . "_" if ($f ne "x");
411                 $file .= $c;
412                 my $actfile = $file;
413
414                 my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src";
415                 if (! -f $filename) {
416                         print STDERR "Cannot open $filename\n";
417                         next;
418                 }
419                 open(FIN, "$filename");
420                 print "Reading from $filename for ${l}_${f}_${c}\n";
421                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
422                 my @lines;
423                 my $shex;
424                 my $uhex;
425                 while (<FIN>) {
426                         push @lines, $_;
427                 }
428                 close(FIN);
429                 $shex = sha1_hex(join("\n", @lines));
430                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
431                 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
432                 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
433                 print FOUT @lines;
434                 close(FOUT);
435                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
436                         next if ($enc eq $DEFENCODING);
437                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
438                         if (! -f $filename) {
439                                 print STDERR "Cannot open $filename\n";
440                                 next;
441                         }
442                         @lines = ();
443                         open(FIN, "$filename");
444                         while (<FIN>) {
445                                 if ((/^comment_char\s/) || (/^escape_char\s/)){
446                                         push @lines, $_;
447                                 }
448                                 if (/^LC_CTYPE/../^END LC_CTYPE/) {
449                                         push @lines, $_;
450                                 }
451                         }
452                         close(FIN);
453                         $uhex = sha1_hex(join("\n", @lines) . $enc);
454                         $languages{$l}{$f}{data}{$c}{$enc} = $uhex;
455                         $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
456                         open(FOUT, ">$TYPE.draft/$actfile.$enc.src");
457                         print FOUT <<EOF;
458 # Warning: Do not edit. This file is automatically extracted from the
459 # tools in /usr/src/tools/tools/locale. The data is obtained from the
460 # CLDR project, obtained from http://cldr.unicode.org/
461 # -----------------------------------------------------------------------------
462 EOF
463                         print FOUT @lines;
464                         close(FOUT);
465                 }
466         }
467         }
468         }
469 }
470
471
472 sub transform_collation {
473         foreach my $l (sort keys(%languages)) {
474         foreach my $f (sort keys(%{$languages{$l}})) {
475         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
476                 next if ($#filter == 2 && ($filter[0] ne $l
477                     || $filter[1] ne $f || $filter[2] ne $c));
478                 next if (defined $languages{$l}{$f}{definitions}
479                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
480                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
481                 my $file;
482                 $file = $l . "_";
483                 $file .= $f . "_" if ($f ne "x");
484                 $file .= $c;
485                 my $actfile = $file;
486
487                 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
488                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
489                     if (! -f $filename);
490                 if (! -f $filename
491                  && defined $languages{$l}{$f}{fallback}) {
492                         $file = $languages{$l}{$f}{fallback};
493                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
494                 }
495                 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
496                     if (! -f $filename);
497                 if (! -f $filename) {
498                         print STDERR
499                             "Cannot open $file.$DEFENCODING.src or fallback\n";
500                         next;
501                 }
502                 open(FIN, "$filename");
503                 print "Reading from $filename for ${l}_${f}_${c}\n";
504                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
505                 my @lines;
506                 my $shex;
507                 while (<FIN>) {
508                         if ((/^comment_char\s/) || (/^escape_char\s/)){
509                                 push @lines, $_;
510                         }
511                         if (/^LC_COLLATE/../^END LC_COLLATE/) {
512                                 $_ =~ s/[ ]+/ /g;
513                                 push @lines, $_;
514                         }
515                 }
516                 close(FIN);
517                 $shex = sha1_hex(join("\n", @lines));
518                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
519                 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
520                 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
521                 print FOUT <<EOF;
522 # Warning: Do not edit. This file is automatically extracted from the
523 # tools in /usr/src/tools/tools/locale. The data is obtained from the
524 # CLDR project, obtained from http://cldr.unicode.org/
525 # -----------------------------------------------------------------------------
526 EOF
527                 print FOUT @lines;
528                 close(FOUT);
529
530                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
531                         next if ($enc eq $DEFENCODING);
532                         copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
533                               "$TYPE.draft/$actfile.$enc.src");
534                         $languages{$l}{$f}{data}{$c}{$enc} = $shex;
535                         $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
536                 }
537         }
538         }
539         }
540 }
541
542 sub get_fields {
543         foreach my $l (sort keys(%languages)) {
544         foreach my $f (sort keys(%{$languages{$l}})) {
545         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
546                 next if ($#filter == 2 && ($filter[0] ne $l
547                     || $filter[1] ne $f || $filter[2] ne $c));
548                 next if (defined $languages{$l}{$f}{definitions}
549                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
550
551                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
552                 my $file;
553                 $file = $l . "_";
554                 $file .= $f . "_" if ($f ne "x");
555                 $file .= $c;
556
557                 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
558                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
559                     if (! -f $filename);
560                 if (! -f $filename
561                  && defined $languages{$l}{$f}{fallback}) {
562                         $file = $languages{$l}{$f}{fallback};
563                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
564                 }
565                 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
566                     if (! -f $filename);
567                 if (! -f $filename) {
568                         print STDERR
569                             "Cannot open $file.$DEFENCODING.src or fallback\n";
570                         next;
571                 }
572                 open(FIN, "$filename");
573                 print "Reading from $filename for ${l}_${f}_${c}\n";
574                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
575                 my @lines = <FIN>;
576                 chomp(@lines);
577                 close(FIN);
578                 my $continue = 0;
579                 foreach my $k (keys(%keys)) {
580                         foreach my $line (@lines) {
581                                 $line =~ s/\r//;
582                                 next if (!$continue && $line !~ /^$k\s/);
583                                 if ($continue) {
584                                         $line =~ s/^\s+//;
585                                 } else {
586                                         $line =~ s/^$k\s+//;
587                                 }
588
589                                 $values{$l}{$c}{$k} = ""
590                                         if (!defined $values{$l}{$c}{$k});
591
592                                 $continue = ($line =~ /\/$/);
593                                 $line =~ s/\/$// if ($continue);
594
595                                 while ($line =~ /_/) {
596                                         $line =~
597                                             s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
598                                 }
599                                 die "_ in data - $line" if ($line =~ /_/);
600                                 $values{$l}{$c}{$k} .= $line;
601
602                                 last if (!$continue);
603                         }
604                 }
605         }
606         }
607         }
608 }
609
610 sub decodecldr {
611         my $e = shift;
612         my $s = shift;
613
614         my $v = undef;
615
616         if ($e eq "UTF-8") {
617                 #
618                 # Conversion to UTF-8 can be done from the Unicode name to
619                 # the UTF-8 character code.
620                 #
621                 $v = $utf8map{$s};
622                 die "Cannot convert $s in $e (charmap)" if (!defined $v);
623         } else {
624                 #
625                 # Conversion to these encodings can be done from the Unicode
626                 # name to Unicode code to the encodings code.
627                 #
628                 my $ucc = undef;
629                 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
630                 $ucc = $ucd{name2code}{$utf8aliases{$s}}
631                         if (!defined $ucc
632                          && $utf8aliases{$s}
633                          && defined $ucd{name2code}{$utf8aliases{$s}});
634
635                 if (!defined $ucc) {
636                         if (defined $translations{$e}{$s}{hex}) {
637                                 $v = $translations{$e}{$s}{hex};
638                                 $ucc = 0;
639                         } elsif (defined $translations{$e}{$s}{ucc}) {
640                                 $ucc = $translations{$e}{$s}{ucc};
641                         }
642                 }
643
644                 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
645                 $v = $convertors{$e}{$ucc} if (!defined $v);
646
647                 $v = $translations{$e}{$s}{hex}
648                         if (!defined $v && defined $translations{$e}{$s}{hex});
649
650                 if (!defined $v && defined $translations{$e}{$s}{unicode}) {
651                         my $ucn = $translations{$e}{$s}{unicode};
652                         $ucc = $ucd{name2code}{$ucn}
653                                 if (defined $ucd{name2code}{$ucn});
654                         $ucc = $ucd{name2code}{$utf8aliases{$ucn}}
655                                 if (!defined $ucc
656                                  && defined $ucd{name2code}{$utf8aliases{$ucn}});
657                         $v = $convertors{$e}{$ucc};
658                 }
659
660                 die "Cannot convert $s in $e (charmap)" if (!defined $v);
661         }
662
663         return pack("C", hex($v)) if (length($v) == 2);
664         return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
665                 if (length($v) == 4);
666         return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
667             hex(substr($v, 4, 2))) if (length($v) == 6);
668         print STDERR "Cannot convert $e $s\n";
669         return "length = " . length($v);
670
671 }
672
673 sub translate {
674         my $enc = shift;
675         my $v = shift;
676
677         return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
678         return undef;
679 }
680
681 sub print_fields {
682         foreach my $l (sort keys(%languages)) {
683         foreach my $f (sort keys(%{$languages{$l}})) {
684         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
685                 next if ($#filter == 2 && ($filter[0] ne $l
686                     || $filter[1] ne $f || $filter[2] ne $c));
687                 next if (defined $languages{$l}{$f}{definitions}
688                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
689                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
690                         if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
691                                 print "Skipping ${l}_" .
692                                     ($f eq "x" ? "" : "${f}_") .
693                                     "${c} - not read\n";
694                                 next;
695                         }
696                         my $file = $l;
697                         $file .= "_" . $f if ($f ne "x");
698                         $file .= "_" . $c;
699                         print "Writing to $file in $enc\n";
700
701                         if ($enc ne $DEFENCODING &&
702                             !defined $convertors{$enc}) {
703                                 print "Failed! Cannot convert to $enc.\n";
704                                 next;
705                         };
706
707                         open(FOUT, ">$TYPE.draft/$file.$enc.new");
708                         my $okay = 1;
709                         my $output = "";
710                         print FOUT <<EOF;
711 # Warning: Do not edit. This file is automatically generated from the
712 # tools in /usr/src/tools/tools/locale. The data is obtained from the
713 # CLDR project, obtained from http://cldr.unicode.org/
714 # -----------------------------------------------------------------------------
715 EOF
716                         foreach my $k (keys(%keys)) {
717                                 my $f = $keys{$k};
718
719                                 die("Unknown $k in \%DESC")
720                                         if (!defined $DESC{$k});
721
722                                 $output .= "#\n# $DESC{$k}\n";
723
724                                 # Replace one row with another
725                                 if ($f =~ /^>/) {
726                                         $k = substr($f, 1);
727                                         $f = $keys{$k};
728                                 }
729
730                                 # Callback function
731                                 if ($f =~ /^\</) {
732                                         $callback{data}{c} = $c;
733                                         $callback{data}{k} = $k;
734                                         $callback{data}{l} = $l;
735                                         $callback{data}{e} = $enc;
736                                         my @a = split(/\</, substr($f, 1));
737                                         my $rv =
738                                             &{$callback{$a[0]}}($values{$l}{$c}{$a[1]});
739                                         $values{$l}{$c}{$k} = $rv;
740                                         $f = $a[2];
741                                         $callback{data} = ();
742                                 }
743
744                                 my $v = $values{$l}{$c}{$k};
745                                 $v = "undef" if (!defined $v);
746
747                                 if ($f eq "i") {
748                                         $output .= "$v\n";
749                                         next;
750                                 }
751                                 if ($f eq "ai") {
752                                         $output .= "$v\n";
753                                         next;
754                                 }
755                                 if ($f eq "s") {
756                                         $v =~ s/^"//;
757                                         $v =~ s/"$//;
758                                         my $cm = "";
759                                         while ($v =~ /^(.*?)<(.*?)>(.*)/) {
760                                                 my $p1 = $1;
761                                                 $cm = $2;
762                                                 my $p3 = $3;
763
764                                                 my $rv = decodecldr($enc, $cm);
765 #                                               $rv = translate($enc, $cm)
766 #                                                       if (!defined $rv);
767                                                 if (!defined $rv) {
768                                                         print STDERR 
769 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
770                                                         $okay = 0;
771                                                         next;
772                                                 }
773
774                                                 $v = $p1 . $rv . $p3;
775                                         }
776                                         $output .= "$v\n";
777                                         next;
778                                 }
779                                 if ($f eq "as") {
780                                         foreach my $v (split(/;/, $v)) {
781                                                 $v =~ s/^"//;
782                                                 $v =~ s/"$//;
783                                                 my $cm = "";
784                                                 while ($v =~ /^(.*?)<(.*?)>(.*)/) {
785                                                         my $p1 = $1;
786                                                         $cm = $2;
787                                                         my $p3 = $3;
788
789                                                         my $rv =
790                                                             decodecldr($enc,
791                                                                 $cm);
792 #                                                       $rv = translate($enc,
793 #                                                           $cm)
794 #                                                           if (!defined $rv);
795                                                         if (!defined $rv) {
796                                                                 print STDERR 
797 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
798                                                                 $okay = 0;
799                                                                 next;
800                                                         }
801
802                                                         $v = $1 . $rv . $3;
803                                                 }
804                                                 $output .= "$v\n";
805                                         }
806                                         next;
807                                 }
808
809                                 die("$k is '$f'");
810
811                         }
812
813                         $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
814                         $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
815                         print FOUT "$output# EOF\n";
816                         close(FOUT);
817
818                         if ($okay) {
819                                 rename("$TYPE.draft/$file.$enc.new",
820                                     "$TYPE.draft/$file.$enc.src");
821                         } else {
822                                 rename("$TYPE.draft/$file.$enc.new",
823                                     "$TYPE.draft/$file.$enc.failed");
824                         }
825                 }
826         }
827         }
828         }
829 }
830
831 sub make_makefile {
832         return if ($#filter > -1);
833         print "Creating Makefile for $TYPE\n";
834         my $SRCOUT;
835         my $SRCOUT2;
836         my $SRCOUT3 = "";
837         my $SRCOUT4 = "";
838         my $MAPLOC;
839         if ($TYPE eq "colldef") {
840                 $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
841                         "\t-f \${MAPLOC}/map.\${.TARGET:T:R:E} " .
842                         "\${.OBJDIR}/\${.IMPSRC:T:R}";
843                 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
844                                 "locale/etc/final-maps\n";
845                 $SRCOUT2 = "LC_COLLATE";
846                 $SRCOUT3 = "" .
847                         ".for f t in \${LOCALES_MAPPED}\n" .
848                         "FILES+=\t\$t.LC_COLLATE\n" .
849                         "FILESDIR_\$t.LC_COLLATE=\t\${LOCALEDIR}/\$t\n" .
850                         "\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" .
851                         "\tlocaledef -D -U -i \${.ALLSRC} \\\n" .
852                         "\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E} \\\n" .
853                         "\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" .
854                         ".endfor\n\n";
855                 $SRCOUT4 = "## LOCALES_MAPPED\n";
856         }
857         elsif ($TYPE eq "ctypedef") {
858                 $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
859                         "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:E} " .
860                         "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
861                         " || true";
862                 $SRCOUT2 = "LC_CTYPE";
863                 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
864                                 "locale/etc/final-maps\n";
865                 $SRCOUT3 = "## SYMPAIRS\n\n" .
866                         ".for s t in \${SYMPAIRS}\n" .
867                         "\${t:S/src\$/LC_CTYPE/}: " .
868                         "\$s\n" .
869                         "\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
870                         "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " .
871                         "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " .
872                         " || true\n" .
873                         ".endfor\n\n";
874         }
875         else {
876                 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
877                 $SRCOUT2 = "out";
878                 $MAPLOC = "";
879         }
880         open(FOUT, ">$TYPE.draft/Makefile");
881         print FOUT <<EOF;
882 # \$FreeBSD\$
883 # Warning: Do not edit. This file is automatically generated from the
884 # tools in /usr/src/tools/tools/locale.
885
886 LOCALEDIR=      \${SHAREDIR}/locale
887 FILESNAME=      $FILESNAMES{$TYPE}
888 .SUFFIXES:      .src .${SRCOUT2}
889 ${MAPLOC}
890 .src.${SRCOUT2}:
891         $SRCOUT
892
893 ## PLACEHOLDER
894
895 ${SRCOUT4}
896
897 EOF
898
899         foreach my $hash (keys(%hashtable)) {
900                 # For colldef, weight LOCALES to UTF-8
901                 #     Sort as upper-case and reverse to achieve it
902                 #     Make en_US, ru_RU, and ca_AD preferred
903                 my @files;
904                 if ($TYPE eq "colldef") {
905                         @files = sort {
906                                 if ($a eq 'en_x_US.UTF-8' ||
907                                     $a eq 'ru_x_RU.UTF-8' ||
908                                     $a eq 'ca_x_AD.UTF-8') { return -1; }
909                                 elsif ($b eq 'en_x_US.UTF-8' ||
910                                        $b eq 'ru_x_RU.UTF-8' ||
911                                        $b eq 'ca_x_AD.UTF-8') { return 1; }
912                                 else { return uc($b) cmp uc($a); }
913                                 } keys(%{$hashtable{$hash}});
914                 } elsif ($TYPE eq "ctypedef") {
915                         @files = sort {
916                                 if ($a eq 'en_x_US.UTF-8') { return -1; }
917                                 elsif ($b eq 'en_x_US.UTF-8') { return 1; }
918                                 if ($a =~ /^en_x_US/) { return -1; }
919                                 elsif ($b =~ /^en_x_US/) { return 1; }
920
921                                 if ($a =~ /^en_x_GB.ISO8859-15/ ||
922                                     $a =~ /^ru_x_RU/) { return -1; }
923                                 elsif ($b =~ /^en_x_GB.ISO8859-15/ ||
924                                        $b =~ /ru_x_RU/) { return 1; }
925                                 else { return uc($b) cmp uc($a); }
926
927                                 } keys(%{$hashtable{$hash}});
928                 } else {
929                         @files = sort {
930                                 if ($a =~ /_Comm_/ ||
931                                     $b eq 'en_x_US.UTF-8') { return 1; }
932                                 elsif ($b =~ /_Comm_/ ||
933                                        $a eq 'en_x_US.UTF-8') { return -1; }
934                                 else { return uc($b) cmp uc($a); }
935                                 } keys(%{$hashtable{$hash}});
936                 }
937                 if ($#files > 0) {
938                         my $link = shift(@files);
939                         $link =~ s/_x_/_/;      # strip family if none there
940                         foreach my $file (@files) {
941                                 my @a = split(/_/, $file);
942                                 my @b = split(/\./, $a[-1]);
943                                 $file =~ s/_x_/_/;
944                                 print FOUT "SAME+=\t\t$link $file\n";
945                                 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
946                         }
947                 }
948         }
949
950         foreach my $l (sort keys(%languages)) {
951         foreach my $f (sort keys(%{$languages{$l}})) {
952         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
953                 next if ($#filter == 2 && ($filter[0] ne $l
954                     || $filter[1] ne $f || $filter[2] ne $c));
955                 next if (defined $languages{$l}{$f}{definitions}
956                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
957                 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
958                  && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
959                         print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
960                             "${c} - not read\n";
961                         next;
962                 }
963                 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
964                         my $file = $l . "_";
965                         $file .= $f . "_" if ($f ne "x");
966                         $file .= $c;
967                         next if (!defined $languages{$l}{$f}{data}{$c}{$e});
968                         print FOUT "LOCALES+=\t$file.$e\n";
969                 }
970
971                 if (defined $languages{$l}{$f}{nc_link}) {
972                         foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
973                                 my $file = $l . "_";
974                                 $file .= $f . "_" if ($f ne "x");
975                                 $file .= $c;
976                                 print FOUT "SAME+=\t\t$file.$e $languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
977                         }
978                 }
979
980                 if (defined $languages{$l}{$f}{e_link}) {
981                         foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
982                                 my @a = split(/:/, $el);
983                                 my $file = $l . "_";
984                                 $file .= $f . "_" if ($f ne "x");
985                                 $file .= $c;
986                                 print FOUT "SAME+=\t\t$file.$a[0] $file.$a[1]\t# legacy (same charset)\n";
987                         }
988                 }
989
990         }
991         }
992         }
993
994         print FOUT <<EOF;
995
996 FILES=          \${LOCALES:S/\$/.${SRCOUT2}/}
997 CLEANFILES=     \${FILES}
998
999 .for f t in \${SAME}
1000 SYMLINKS+=      ../\$f/\${FILESNAME} \\
1001     \${LOCALEDIR}/\$t/\${FILESNAME}
1002 .endfor
1003
1004 .for f in \${LOCALES}
1005 FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
1006 .endfor
1007
1008 ${SRCOUT3}.include <bsd.prog.mk>
1009 EOF
1010
1011         close(FOUT);
1012 }