]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - tools/tools/locale/tools/cldr2def.pl
Update to bmake-20160315
[FreeBSD/FreeBSD.git] / tools / tools / locale / tools / cldr2def.pl
1 #!/usr/local/bin/perl -wC
2 # $FreeBSD$
3
4 use strict;
5 use File::Copy;
6 use XML::Parser;
7 use Tie::IxHash;
8 use Data::Dumper;
9 use Getopt::Long;
10 use Digest::SHA qw(sha1_hex);
11 require "charmaps.pm";
12
13
14 if ($#ARGV < 2) {
15         print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n";
16         exit(1);
17 }
18
19 my $DEFENCODING = "UTF-8";
20 my @filter = ();
21
22 my $CLDRDIR = undef;
23 my $UNIDATADIR = undef;
24 my $ETCDIR = undef;
25 my $TYPE = undef;
26 my $doonly = undef;
27
28 my $result = GetOptions (
29                 "cldr=s"        => \$CLDRDIR,
30                 "unidata=s"     => \$UNIDATADIR,
31                 "etc=s"         => \$ETCDIR,
32                 "type=s"        => \$TYPE,
33                 "lc=s"          => \$doonly
34             );
35
36 my %convertors = ();
37
38 my %ucd = ();
39 my %values = ();
40 my %hashtable = ();
41 my %languages = ();
42 my %translations = ();
43 my %encodings = ();
44 my %alternativemonths = ();
45 get_languages();
46
47 my %utf8map = ();
48 my %utf8aliases = ();
49 get_unidata($UNIDATADIR);
50 get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm");
51 get_encodings("$ETCDIR/charmaps");
52
53 my %keys = ();
54 tie(%keys, "Tie::IxHash");
55 tie(%hashtable, "Tie::IxHash");
56
57 my %FILESNAMES = (
58         "monetdef"      => "LC_MONETARY",
59         "timedef"       => "LC_TIME",
60         "msgdef"        => "LC_MESSAGES",
61         "numericdef"    => "LC_NUMERIC",
62         "colldef"       => "LC_COLLATE",
63         "ctypedef"      => "LC_CTYPE"
64 );
65
66 my %callback = (
67         mdorder => \&callback_mdorder,
68         altmon => \&callback_altmon,
69         cformat => \&callback_cformat,
70         dtformat => \&callback_dtformat,
71         cbabmon => \&callback_abmon,
72         data => undef,
73 );
74
75 my %DESC = (
76
77         # numericdef
78         "decimal_point" => "decimal_point",
79         "thousands_sep" => "thousands_sep",
80         "grouping"      => "grouping",
81
82         # monetdef
83         "int_curr_symbol"       => "int_curr_symbol (last character always " .
84                                    "SPACE)",
85         "currency_symbol"       => "currency_symbol",
86         "mon_decimal_point"     => "mon_decimal_point",
87         "mon_thousands_sep"     => "mon_thousands_sep",
88         "mon_grouping"          => "mon_grouping",
89         "positive_sign"         => "positive_sign",
90         "negative_sign"         => "negative_sign",
91         "int_frac_digits"       => "int_frac_digits",
92         "frac_digits"           => "frac_digits",
93         "p_cs_precedes"         => "p_cs_precedes",
94         "p_sep_by_space"        => "p_sep_by_space",
95         "n_cs_precedes"         => "n_cs_precedes",
96         "n_sep_by_space"        => "n_sep_by_space",
97         "p_sign_posn"           => "p_sign_posn",
98         "n_sign_posn"           => "n_sign_posn",
99
100         # msgdef
101         "yesexpr"       => "yesexpr",
102         "noexpr"        => "noexpr",
103         "yesstr"        => "yesstr",
104         "nostr"         => "nostr",
105
106         # timedef
107         "abmon"         => "Short month names",
108         "mon"           => "Long month names (as in a date)",
109         "abday"         => "Short weekday names",
110         "day"           => "Long weekday names",
111         "t_fmt"         => "X_fmt",
112         "d_fmt"         => "x_fmt",
113         "c_fmt"         => "c_fmt",
114         "am_pm"         => "AM/PM",
115         "d_t_fmt"       => "date_fmt",
116         "altmon"        => "Long month names (without case ending)",
117         "md_order"      => "md_order",
118         "t_fmt_ampm"    => "ampm_fmt",
119 );
120
121 if ($TYPE eq "colldef") {
122         transform_collation();
123         make_makefile();
124 }
125
126 if ($TYPE eq "ctypedef") {
127         transform_ctypes();
128         make_makefile();
129 }
130
131 if ($TYPE eq "numericdef") {
132         %keys = (
133             "decimal_point"     => "s",
134             "thousands_sep"     => "s",
135             "grouping"          => "ai",
136         );
137         get_fields();
138         print_fields();
139         make_makefile();
140 }
141
142 if ($TYPE eq "monetdef") {
143         %keys = (
144             "int_curr_symbol"   => "s",
145             "currency_symbol"   => "s",
146             "mon_decimal_point" => "s",
147             "mon_thousands_sep" => "s",
148             "mon_grouping"      => "ai",
149             "positive_sign"     => "s",
150             "negative_sign"     => "s",
151             "int_frac_digits"   => "i",
152             "frac_digits"       => "i",
153             "p_cs_precedes"     => "i",
154             "p_sep_by_space"    => "i",
155             "n_cs_precedes"     => "i",
156             "n_sep_by_space"    => "i",
157             "p_sign_posn"       => "i",
158             "n_sign_posn"       => "i"
159         );
160         get_fields();
161         print_fields();
162         make_makefile();
163 }
164
165 if ($TYPE eq "msgdef") {
166         %keys = (
167             "yesexpr"           => "s",
168             "noexpr"            => "s",
169             "yesstr"            => "s",
170             "nostr"             => "s"
171         );
172         get_fields();
173         print_fields();
174         make_makefile();
175 }
176
177 if ($TYPE eq "timedef") {
178         %keys = (
179             "abmon"             => "<cbabmon<abmon<as",
180             "mon"               => "as",
181             "abday"             => "as",
182             "day"               => "as",
183             "t_fmt"             => "s",
184             "d_fmt"             => "s",
185             "c_fmt"             => "<cformat<d_t_fmt<s",
186             "am_pm"             => "as",
187             "d_fmt"             => "s",
188             "d_t_fmt"           => "<dtformat<d_t_fmt<s",
189             "altmon"            => "<altmon<mon<as",
190             "md_order"          => "<mdorder<d_fmt<s",
191             "t_fmt_ampm"        => "s",
192         );
193         get_fields();
194         print_fields();
195         make_makefile();
196 }
197
198 sub callback_cformat {
199         my $s = shift;
200         $s =~ s/ %Z//;
201         $s =~ s/ %z//;
202         return $s;
203 };
204
205 sub callback_dtformat {
206         my $s = shift;
207         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
208
209         if ($nl eq 'ja_JP') {
210             $s =~ s/(> )(%H)/$1%A $2/;
211         }
212         return $s;
213 };
214
215 sub callback_mdorder {
216         my $s = shift;
217         return undef if (!defined $s);
218         $s =~ s/[^dm]//g;
219         return $s;
220 };
221
222 sub callback_altmon {
223         # if the language/country is known in %alternative months then
224         # return that, otherwise repeat mon
225         my $s = shift;
226
227         if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
228                 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
229                 my @cleaned;
230                 foreach (@altnames)
231                 {
232                         $_ =~ s/^\s+//;
233                         $_ =~ s/\s+$//;
234                         push @cleaned, $_;
235                 }
236                 return join(";",@cleaned);
237         }
238
239         return $s;
240 }
241
242 sub callback_abmon {
243         # for specified CJK locales, pad result with a space to enable
244         # columns to line up (style established in FreeBSD in 2001)
245         my $s = shift;
246         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
247
248         if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' ||
249             $nl eq 'zh_HK' || $nl eq 'zh_TW') {
250                 my @monthnames = split(";", $s);
251                 my @cleaned;
252                 foreach (@monthnames)
253                 {
254                         if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ ||
255                            ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/))
256                         {
257                                 $_ =~ s/^"/"<space>/;
258                         }
259                         push @cleaned, $_;
260                 }
261                 return join(";",@cleaned);
262         }
263         return $s;
264 }
265
266 ############################
267
268 sub get_unidata {
269         my $directory = shift;
270
271         open(FIN, "$directory/UnicodeData.txt")
272             or die("Cannot open $directory/UnicodeData.txt");;
273         my @lines = <FIN>;
274         chomp(@lines);
275         close(FIN);
276
277         foreach my $l (@lines) {
278                 my @a = split(/;/, $l);
279
280                 $ucd{code2name}{"$a[0]"} = $a[1];       # Unicode name
281                 $ucd{name2code}{"$a[1]"} = $a[0];       # Unicode code
282         }
283 }
284
285 sub get_utf8map {
286         my $file = shift;
287
288         open(FIN, $file);
289         my @lines = <FIN>;
290         close(FIN);
291         chomp(@lines);
292
293         my $prev_k = undef;
294         my $prev_v = "";
295         my $incharmap = 0;
296         foreach my $l (@lines) {
297                 $l =~ s/\r//;
298                 next if ($l =~ /^\#/);
299                 next if ($l eq "");
300
301                 if ($l eq "CHARMAP") {
302                         $incharmap = 1;
303                         next;
304                 }
305
306                 next if (!$incharmap);
307                 last if ($l eq "END CHARMAP");
308
309                 $l =~ /^<([^\s]+)>\s+(.*)/;
310                 my $k = $1;
311                 my $v = $2;
312                 $k =~ s/_/ /g;          # unicode char string
313                 $v =~ s/\\x//g;         # UTF-8 char code
314                 $utf8map{$k} = $v;
315
316                 $utf8aliases{$k} = $prev_k if ($prev_v eq $v);
317
318                 $prev_v = $v;
319                 $prev_k = $k;
320         }
321 }
322
323 sub get_encodings {
324         my $dir = shift;
325         foreach my $e (sort(keys(%encodings))) {
326                 if (!open(FIN, "$dir/$e.TXT")) {
327                         print "Cannot open charmap for $e\n";
328                         next;
329
330                 }
331                 $encodings{$e} = 1;
332                 my @lines = <FIN>;
333                 close(FIN);
334                 chomp(@lines);
335                 foreach my $l (@lines) {
336                         $l =~ s/\r//;
337                         next if ($l =~ /^\#/);
338                         next if ($l eq "");
339
340                         my @a = split(" ", $l);
341                         next if ($#a < 1);
342                         $a[0] =~ s/^0[xX]//;    # local char code
343                         $a[1] =~ s/^0[xX]//;    # unicode char code
344                         $convertors{$e}{uc($a[1])} = uc($a[0]);
345                 }
346         }
347 }
348
349 sub get_languages {
350         my %data = get_xmldata($ETCDIR);
351         %languages = %{$data{L}}; 
352         %translations = %{$data{T}}; 
353         %alternativemonths = %{$data{AM}}; 
354         %encodings = %{$data{E}}; 
355
356         return if (!defined $doonly);
357
358         my @a = split(/_/, $doonly);
359         if ($#a == 1) {
360                 $filter[0] = $a[0];
361                 $filter[1] = "x";
362                 $filter[2] = $a[1];
363         } elsif ($#a == 2) {
364                 $filter[0] = $a[0];
365                 $filter[1] = $a[1];
366                 $filter[2] = $a[2];
367         }
368
369         print Dumper(@filter);
370         return;
371 }
372
373 sub transform_ctypes {
374         foreach my $l (sort keys(%languages)) {
375         foreach my $f (sort keys(%{$languages{$l}})) {
376         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
377                 next if ($#filter == 2 && ($filter[0] ne $l
378                     || $filter[1] ne $f || $filter[2] ne $c));
379                 next if (defined $languages{$l}{$f}{definitions}
380                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
381                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
382                 my $file;
383                 $file = $l . "_";
384                 $file .= $f . "_" if ($f ne "x");
385                 $file .= $c;
386                 my $actfile = $file;
387
388                 my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src";
389                 if (! -f $filename) {
390                         print STDERR "Cannot open $filename\n";
391                         next;
392                 }
393                 open(FIN, "$filename");
394                 print "Reading from $filename for ${l}_${f}_${c}\n";
395                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
396                 my @lines;
397                 my $shex;
398                 my $uhex;
399                 while (<FIN>) {
400                         push @lines, $_;
401                 }
402                 close(FIN);
403                 $shex = sha1_hex(join("\n", @lines));
404                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
405                 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
406                 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
407                 print FOUT @lines;
408                 close(FOUT);
409                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
410                         next if ($enc eq $DEFENCODING);
411                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
412                         if (! -f $filename) {
413                                 print STDERR "Cannot open $filename\n";
414                                 next;
415                         }
416                         @lines = ();
417                         open(FIN, "$filename");
418                         while (<FIN>) {
419                                 if ((/^comment_char\s/) || (/^escape_char\s/)){
420                                         push @lines, $_;
421                                 }
422                                 if (/^LC_CTYPE/../^END LC_CTYPE/) {
423                                         push @lines, $_;
424                                 }
425                         }
426                         close(FIN);
427                         $uhex = sha1_hex(join("\n", @lines) . $enc);
428                         $languages{$l}{$f}{data}{$c}{$enc} = $uhex;
429                         $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
430                         open(FOUT, ">$TYPE.draft/$actfile.$enc.src");
431                         print FOUT <<EOF;
432 # Warning: Do not edit. This file is automatically extracted from the
433 # tools in /usr/src/tools/tools/locale. The data is obtained from the
434 # CLDR project, obtained from http://cldr.unicode.org/
435 # -----------------------------------------------------------------------------
436 EOF
437                         print FOUT @lines;
438                         close(FOUT);
439                 }
440         }
441         }
442         }
443 }
444
445
446 sub transform_collation {
447         foreach my $l (sort keys(%languages)) {
448         foreach my $f (sort keys(%{$languages{$l}})) {
449         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
450                 next if ($#filter == 2 && ($filter[0] ne $l
451                     || $filter[1] ne $f || $filter[2] ne $c));
452                 next if (defined $languages{$l}{$f}{definitions}
453                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
454                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
455                 my $file;
456                 $file = $l . "_";
457                 $file .= $f . "_" if ($f ne "x");
458                 $file .= $c;
459                 my $actfile = $file;
460
461                 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
462                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
463                     if (! -f $filename);
464                 if (! -f $filename
465                  && defined $languages{$l}{$f}{fallback}) {
466                         $file = $languages{$l}{$f}{fallback};
467                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
468                 }
469                 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
470                     if (! -f $filename);
471                 if (! -f $filename) {
472                         print STDERR
473                             "Cannot open $file.$DEFENCODING.src or fallback\n";
474                         next;
475                 }
476                 open(FIN, "$filename");
477                 print "Reading from $filename for ${l}_${f}_${c}\n";
478                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
479                 my @lines;
480                 my $shex;
481                 while (<FIN>) {
482                         if ((/^comment_char\s/) || (/^escape_char\s/)){
483                                 push @lines, $_;
484                         }
485                         if (/^LC_COLLATE/../^END LC_COLLATE/) {
486                                 $_ =~ s/[ ]+/ /g;
487                                 push @lines, $_;
488                         }
489                 }
490                 close(FIN);
491                 $shex = sha1_hex(join("\n", @lines));
492                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
493                 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
494                 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
495                 print FOUT <<EOF;
496 # Warning: Do not edit. This file is automatically extracted from the
497 # tools in /usr/src/tools/tools/locale. The data is obtained from the
498 # CLDR project, obtained from http://cldr.unicode.org/
499 # -----------------------------------------------------------------------------
500 EOF
501                 print FOUT @lines;
502                 close(FOUT);
503
504                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
505                         next if ($enc eq $DEFENCODING);
506                         copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
507                               "$TYPE.draft/$actfile.$enc.src");
508                         $languages{$l}{$f}{data}{$c}{$enc} = $shex;
509                         $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
510                 }
511         }
512         }
513         }
514 }
515
516 sub get_fields {
517         foreach my $l (sort keys(%languages)) {
518         foreach my $f (sort keys(%{$languages{$l}})) {
519         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
520                 next if ($#filter == 2 && ($filter[0] ne $l
521                     || $filter[1] ne $f || $filter[2] ne $c));
522                 next if (defined $languages{$l}{$f}{definitions}
523                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
524
525                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
526                 my $file;
527                 $file = $l . "_";
528                 $file .= $f . "_" if ($f ne "x");
529                 $file .= $c;
530
531                 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
532                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
533                     if (! -f $filename);
534                 if (! -f $filename
535                  && defined $languages{$l}{$f}{fallback}) {
536                         $file = $languages{$l}{$f}{fallback};
537                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
538                 }
539                 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
540                     if (! -f $filename);
541                 if (! -f $filename) {
542                         print STDERR
543                             "Cannot open $file.$DEFENCODING.src or fallback\n";
544                         next;
545                 }
546                 open(FIN, "$filename");
547                 print "Reading from $filename for ${l}_${f}_${c}\n";
548                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
549                 my @lines = <FIN>;
550                 chomp(@lines);
551                 close(FIN);
552                 my $continue = 0;
553                 foreach my $k (keys(%keys)) {
554                         foreach my $line (@lines) {
555                                 $line =~ s/\r//;
556                                 next if (!$continue && $line !~ /^$k\s/);
557                                 if ($continue) {
558                                         $line =~ s/^\s+//;
559                                 } else {
560                                         $line =~ s/^$k\s+//;
561                                 }
562
563                                 $values{$l}{$c}{$k} = ""
564                                         if (!defined $values{$l}{$c}{$k});
565
566                                 $continue = ($line =~ /\/$/);
567                                 $line =~ s/\/$// if ($continue);
568
569                                 while ($line =~ /_/) {
570                                         $line =~
571                                             s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
572                                 }
573                                 die "_ in data - $line" if ($line =~ /_/);
574                                 $values{$l}{$c}{$k} .= $line;
575
576                                 last if (!$continue);
577                         }
578                 }
579         }
580         }
581         }
582 }
583
584 sub decodecldr {
585         my $e = shift;
586         my $s = shift;
587
588         my $v = undef;
589
590         if ($e eq "UTF-8") {
591                 #
592                 # Conversion to UTF-8 can be done from the Unicode name to
593                 # the UTF-8 character code.
594                 #
595                 $v = $utf8map{$s};
596                 die "Cannot convert $s in $e (charmap)" if (!defined $v);
597         } else {
598                 #
599                 # Conversion to these encodings can be done from the Unicode
600                 # name to Unicode code to the encodings code.
601                 #
602                 my $ucc = undef;
603                 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
604                 $ucc = $ucd{name2code}{$utf8aliases{$s}}
605                         if (!defined $ucc
606                          && $utf8aliases{$s}
607                          && defined $ucd{name2code}{$utf8aliases{$s}});
608
609                 if (!defined $ucc) {
610                         if (defined $translations{$e}{$s}{hex}) {
611                                 $v = $translations{$e}{$s}{hex};
612                                 $ucc = 0;
613                         } elsif (defined $translations{$e}{$s}{ucc}) {
614                                 $ucc = $translations{$e}{$s}{ucc};
615                         }
616                 }
617
618                 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
619                 $v = $convertors{$e}{$ucc} if (!defined $v);
620
621                 $v = $translations{$e}{$s}{hex}
622                         if (!defined $v && defined $translations{$e}{$s}{hex});
623
624                 if (!defined $v && defined $translations{$e}{$s}{unicode}) {
625                         my $ucn = $translations{$e}{$s}{unicode};
626                         $ucc = $ucd{name2code}{$ucn}
627                                 if (defined $ucd{name2code}{$ucn});
628                         $ucc = $ucd{name2code}{$utf8aliases{$ucn}}
629                                 if (!defined $ucc
630                                  && defined $ucd{name2code}{$utf8aliases{$ucn}});
631                         $v = $convertors{$e}{$ucc};
632                 }
633
634                 die "Cannot convert $s in $e (charmap)" if (!defined $v);
635         }
636
637         return pack("C", hex($v)) if (length($v) == 2);
638         return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
639                 if (length($v) == 4);
640         return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
641             hex(substr($v, 4, 2))) if (length($v) == 6);
642         print STDERR "Cannot convert $e $s\n";
643         return "length = " . length($v);
644
645 }
646
647 sub translate {
648         my $enc = shift;
649         my $v = shift;
650
651         return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
652         return undef;
653 }
654
655 sub print_fields {
656         foreach my $l (sort keys(%languages)) {
657         foreach my $f (sort keys(%{$languages{$l}})) {
658         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
659                 next if ($#filter == 2 && ($filter[0] ne $l
660                     || $filter[1] ne $f || $filter[2] ne $c));
661                 next if (defined $languages{$l}{$f}{definitions}
662                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
663                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
664                         if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
665                                 print "Skipping ${l}_" .
666                                     ($f eq "x" ? "" : "${f}_") .
667                                     "${c} - not read\n";
668                                 next;
669                         }
670                         my $file = $l;
671                         $file .= "_" . $f if ($f ne "x");
672                         $file .= "_" . $c;
673                         print "Writing to $file in $enc\n";
674
675                         if ($enc ne $DEFENCODING &&
676                             !defined $convertors{$enc}) {
677                                 print "Failed! Cannot convert to $enc.\n";
678                                 next;
679                         };
680
681                         open(FOUT, ">$TYPE.draft/$file.$enc.new");
682                         my $okay = 1;
683                         my $output = "";
684                         print FOUT <<EOF;
685 # Warning: Do not edit. This file is automatically generated from the
686 # tools in /usr/src/tools/tools/locale. The data is obtained from the
687 # CLDR project, obtained from http://cldr.unicode.org/
688 # -----------------------------------------------------------------------------
689 EOF
690                         foreach my $k (keys(%keys)) {
691                                 my $f = $keys{$k};
692
693                                 die("Unknown $k in \%DESC")
694                                         if (!defined $DESC{$k});
695
696                                 $output .= "#\n# $DESC{$k}\n";
697
698                                 # Replace one row with another
699                                 if ($f =~ /^>/) {
700                                         $k = substr($f, 1);
701                                         $f = $keys{$k};
702                                 }
703
704                                 # Callback function
705                                 if ($f =~ /^\</) {
706                                         $callback{data}{c} = $c;
707                                         $callback{data}{k} = $k;
708                                         $callback{data}{l} = $l;
709                                         $callback{data}{e} = $enc;
710                                         my @a = split(/\</, substr($f, 1));
711                                         my $rv =
712                                             &{$callback{$a[0]}}($values{$l}{$c}{$a[1]});
713                                         $values{$l}{$c}{$k} = $rv;
714                                         $f = $a[2];
715                                         $callback{data} = ();
716                                 }
717
718                                 my $v = $values{$l}{$c}{$k};
719                                 $v = "undef" if (!defined $v);
720
721                                 if ($f eq "i") {
722                                         $output .= "$v\n";
723                                         next;
724                                 }
725                                 if ($f eq "ai") {
726                                         $output .= "$v\n";
727                                         next;
728                                 }
729                                 if ($f eq "s") {
730                                         $v =~ s/^"//;
731                                         $v =~ s/"$//;
732                                         my $cm = "";
733                                         while ($v =~ /^(.*?)<(.*?)>(.*)/) {
734                                                 my $p1 = $1;
735                                                 $cm = $2;
736                                                 my $p3 = $3;
737
738                                                 my $rv = decodecldr($enc, $cm);
739 #                                               $rv = translate($enc, $cm)
740 #                                                       if (!defined $rv);
741                                                 if (!defined $rv) {
742                                                         print STDERR 
743 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
744                                                         $okay = 0;
745                                                         next;
746                                                 }
747
748                                                 $v = $p1 . $rv . $p3;
749                                         }
750                                         $output .= "$v\n";
751                                         next;
752                                 }
753                                 if ($f eq "as") {
754                                         foreach my $v (split(/;/, $v)) {
755                                                 $v =~ s/^"//;
756                                                 $v =~ s/"$//;
757                                                 my $cm = "";
758                                                 while ($v =~ /^(.*?)<(.*?)>(.*)/) {
759                                                         my $p1 = $1;
760                                                         $cm = $2;
761                                                         my $p3 = $3;
762
763                                                         my $rv =
764                                                             decodecldr($enc,
765                                                                 $cm);
766 #                                                       $rv = translate($enc,
767 #                                                           $cm)
768 #                                                           if (!defined $rv);
769                                                         if (!defined $rv) {
770                                                                 print STDERR 
771 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
772                                                                 $okay = 0;
773                                                                 next;
774                                                         }
775
776                                                         $v = $1 . $rv . $3;
777                                                 }
778                                                 $output .= "$v\n";
779                                         }
780                                         next;
781                                 }
782
783                                 die("$k is '$f'");
784
785                         }
786
787                         $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
788                         $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
789                         print FOUT "$output# EOF\n";
790                         close(FOUT);
791
792                         if ($okay) {
793                                 rename("$TYPE.draft/$file.$enc.new",
794                                     "$TYPE.draft/$file.$enc.src");
795                         } else {
796                                 rename("$TYPE.draft/$file.$enc.new",
797                                     "$TYPE.draft/$file.$enc.failed");
798                         }
799                 }
800         }
801         }
802         }
803 }
804
805 sub make_makefile {
806         return if ($#filter > -1);
807         print "Creating Makefile for $TYPE\n";
808         my $SRCOUT;
809         my $SRCOUT2;
810         my $SRCOUT3 = "";
811         my $MAPLOC;
812         if ($TYPE eq "colldef") {
813                 $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
814                         "\t-f \${MAPLOC}/map.UTF-8 " .
815                         "\${.OBJDIR}/\${.IMPSRC:T:R}";
816                 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
817                                 "locale/etc/final-maps\n";
818                 $SRCOUT2 = "LC_COLLATE";
819         }
820         elsif ($TYPE eq "ctypedef") {
821                 $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
822                         "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:E} " .
823                         "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
824                         " || true";
825                 $SRCOUT2 = "LC_CTYPE";
826                 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
827                                 "locale/etc/final-maps\n";
828                 $SRCOUT3 = "## SYMPAIRS\n\n" .
829                         ".for s t in \${SYMPAIRS}\n" .
830                         "\${t:S/src\$/LC_CTYPE/}: " .
831                         "\$s\n" .
832                         "\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
833                         "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " .
834                         "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " .
835                         " || true\n" .
836                         ".endfor\n\n";
837         }
838         else {
839                 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
840                 $SRCOUT2 = "out";
841                 $MAPLOC = "";
842         }
843         open(FOUT, ">$TYPE.draft/Makefile");
844         print FOUT <<EOF;
845 # \$FreeBSD\$
846 # Warning: Do not edit. This file is automatically generated from the
847 # tools in /usr/src/tools/tools/locale.
848
849 LOCALEDIR=      \${SHAREDIR}/locale
850 FILESNAME=      $FILESNAMES{$TYPE}
851 .SUFFIXES:      .src .${SRCOUT2}
852 ${MAPLOC}
853 .src.${SRCOUT2}:
854         $SRCOUT
855
856 ## PLACEHOLDER
857
858 EOF
859
860         foreach my $hash (keys(%hashtable)) {
861                 # For colldef, weight LOCALES to UTF-8
862                 #     Sort as upper-case and reverse to achieve it
863                 #     Make en_US, ru_RU, and ca_AD preferred
864                 my @files;
865                 if ($TYPE eq "colldef") {
866                         @files = sort {
867                                 if ($a eq 'en_x_US.UTF-8' ||
868                                     $a eq 'ru_x_RU.UTF-8' ||
869                                     $a eq 'ca_x_AD.UTF-8') { return -1; }
870                                 elsif ($b eq 'en_x_US.UTF-8' ||
871                                        $b eq 'ru_x_RU.UTF-8' ||
872                                        $b eq 'ca_x_AD.UTF-8') { return 1; }
873                                 else { return uc($b) cmp uc($a); }
874                                 } keys(%{$hashtable{$hash}});
875                 } elsif ($TYPE eq "ctypedef") {
876                         @files = sort {
877                                 if ($a eq 'en_x_US.UTF-8') { return -1; }
878                                 elsif ($b eq 'en_x_US.UTF-8') { return 1; }
879                                 if ($a =~ /^en_x_US/) { return -1; }
880                                 elsif ($b =~ /^en_x_US/) { return 1; }
881
882                                 if ($a =~ /^en_x_GB.ISO8859-15/ ||
883                                     $a =~ /^ru_x_RU/) { return -1; }
884                                 elsif ($b =~ /^en_x_GB.ISO8859-15/ ||
885                                        $b =~ /ru_x_RU/) { return 1; }
886                                 else { return uc($b) cmp uc($a); }
887
888                                 } keys(%{$hashtable{$hash}});
889                 } else {
890                         @files = sort {
891                                 if ($a =~ /_Comm_/ ||
892                                     $b eq 'en_x_US.UTF-8') { return 1; }
893                                 elsif ($b =~ /_Comm_/ ||
894                                        $a eq 'en_x_US.UTF-8') { return -1; }
895                                 else { return uc($b) cmp uc($a); }
896                                 } keys(%{$hashtable{$hash}});
897                 }
898                 if ($#files > 0) {
899                         my $link = shift(@files);
900                         $link =~ s/_x_/_/;      # strip family if none there
901                         foreach my $file (@files) {
902                                 my @a = split(/_/, $file);
903                                 my @b = split(/\./, $a[-1]);
904                                 $file =~ s/_x_/_/;
905                                 print FOUT "SAME+=\t\t$link $file\n";
906                                 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
907                         }
908                 }
909         }
910
911         foreach my $l (sort keys(%languages)) {
912         foreach my $f (sort keys(%{$languages{$l}})) {
913         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
914                 next if ($#filter == 2 && ($filter[0] ne $l
915                     || $filter[1] ne $f || $filter[2] ne $c));
916                 next if (defined $languages{$l}{$f}{definitions}
917                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
918                 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
919                  && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
920                         print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
921                             "${c} - not read\n";
922                         next;
923                 }
924                 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
925                         my $file = $l . "_";
926                         $file .= $f . "_" if ($f ne "x");
927                         $file .= $c;
928                         next if (!defined $languages{$l}{$f}{data}{$c}{$e});
929                         print FOUT "LOCALES+=\t$file.$e\n";
930                 }
931
932                 if (defined $languages{$l}{$f}{nc_link}) {
933                         foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
934                                 my $file = $l . "_";
935                                 $file .= $f . "_" if ($f ne "x");
936                                 $file .= $c;
937                                 print FOUT "SAME+=\t\t$file.$e $languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
938                         }
939                 }
940
941                 if (defined $languages{$l}{$f}{e_link}) {
942                         foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
943                                 my @a = split(/:/, $el);
944                                 my $file = $l . "_";
945                                 $file .= $f . "_" if ($f ne "x");
946                                 $file .= $c;
947                                 print FOUT "SAME+=\t\t$file.$a[0] $file.$a[1]\t# legacy (same charset)\n";
948                         }
949                 }
950
951         }
952         }
953         }
954
955         print FOUT <<EOF;
956
957 FILES=          \${LOCALES:S/\$/.${SRCOUT2}/}
958 CLEANFILES=     \${FILES}
959
960 .for f t in \${SAME}
961 SYMLINKS+=      ../\$f/\${FILESNAME} \\
962     \${LOCALEDIR}/\$t/\${FILESNAME}
963 .endfor
964
965 .for f in \${LOCALES}
966 FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
967 .endfor
968
969 ${SRCOUT3}.include <bsd.prog.mk>
970 EOF
971
972         close(FOUT);
973 }