]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - tools/tools/locale/tools/cldr2def.pl
Improve collation string and locales support
[FreeBSD/FreeBSD.git] / tools / tools / locale / tools / cldr2def.pl
1 #!/usr/local/bin/perl -wC
2
3 use strict;
4 use File::Copy;
5 use XML::Parser;
6 use Tie::IxHash;
7 use Data::Dumper;
8 use Getopt::Long;
9 use Digest::SHA qw(sha1_hex);
10 require "charmaps.pm";
11
12
13 if ($#ARGV < 2) {
14         print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n";
15         exit(1);
16 }
17
18 my $DEFENCODING = "UTF-8";
19 my @filter = ();
20
21 my $CLDRDIR = undef;
22 my $UNIDATADIR = undef;
23 my $ETCDIR = undef;
24 my $TYPE = undef;
25 my $doonly = undef;
26
27 my $result = GetOptions (
28                 "cldr=s"        => \$CLDRDIR,
29                 "unidata=s"     => \$UNIDATADIR,
30                 "etc=s"         => \$ETCDIR,
31                 "type=s"        => \$TYPE,
32                 "lc=s"          => \$doonly
33             );
34
35 my %convertors = ();
36
37 my %ucd = ();
38 my %values = ();
39 my %hashtable = ();
40 my %languages = ();
41 my %translations = ();
42 my %encodings = ();
43 my %alternativemonths = ();
44 get_languages();
45
46 my %utf8map = ();
47 my %utf8aliases = ();
48 get_unidata($UNIDATADIR);
49 get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm");
50 get_encodings("$ETCDIR/charmaps");
51
52 my %keys = ();
53 tie(%keys, "Tie::IxHash");
54 tie(%hashtable, "Tie::IxHash");
55
56 my %FILESNAMES = (
57         "monetdef"      => "LC_MONETARY",
58         "timedef"       => "LC_TIME",
59         "msgdef"        => "LC_MESSAGES",
60         "numericdef"    => "LC_NUMERIC",
61         "colldef"       => "LC_COLLATE",
62         "ctypedef"      => "LC_CTYPE"
63 );
64
65 my %callback = (
66         mdorder => \&callback_mdorder,
67         altmon => \&callback_altmon,
68         cformat => \&callback_cformat,
69         data => undef,
70 );
71
72 my %DESC = (
73
74         # numericdef
75         "decimal_point" => "decimal_point",
76         "thousands_sep" => "thousands_sep",
77         "grouping"      => "grouping",
78
79         # monetdef
80         "int_curr_symbol"       => "int_curr_symbol (last character always " .
81                                    "SPACE)",
82         "currency_symbol"       => "currency_symbol",
83         "mon_decimal_point"     => "mon_decimal_point",
84         "mon_thousands_sep"     => "mon_thousands_sep",
85         "mon_grouping"          => "mon_grouping",
86         "positive_sign"         => "positive_sign",
87         "negative_sign"         => "negative_sign",
88         "int_frac_digits"       => "int_frac_digits",
89         "frac_digits"           => "frac_digits",
90         "p_cs_precedes"         => "p_cs_precedes",
91         "p_sep_by_space"        => "p_sep_by_space",
92         "n_cs_precedes"         => "n_cs_precedes",
93         "n_sep_by_space"        => "n_sep_by_space",
94         "p_sign_posn"           => "p_sign_posn",
95         "n_sign_posn"           => "n_sign_posn",
96
97         # msgdef
98         "yesexpr"       => "yesexpr",
99         "noexpr"        => "noexpr",
100         "yesstr"        => "yesstr",
101         "nostr"         => "nostr",
102
103         # timedef
104         "abmon"         => "Short month names",
105         "mon"           => "Long month names (as in a date)",
106         "abday"         => "Short weekday names",
107         "day"           => "Long weekday names",
108         "t_fmt"         => "X_fmt",
109         "d_fmt"         => "x_fmt",
110         "c_fmt"         => "c_fmt",
111         "am_pm"         => "AM/PM",
112         "d_t_fmt"       => "date_fmt",
113         "altmon"        => "Long month names (without case ending)",
114         "md_order"      => "md_order",
115         "t_fmt_ampm"    => "ampm_fmt",
116 );
117
118 if ($TYPE eq "colldef") {
119         transform_collation();
120         make_makefile();
121 }
122
123 if ($TYPE eq "ctypedef") {
124         transform_ctypes();
125         make_makefile();
126 }
127
128 if ($TYPE eq "numericdef") {
129         %keys = (
130             "decimal_point"     => "s",
131             "thousands_sep"     => "s",
132             "grouping"          => "ai",
133         );
134         get_fields();
135         print_fields();
136         make_makefile();
137 }
138
139 if ($TYPE eq "monetdef") {
140         %keys = (
141             "int_curr_symbol"   => "s",
142             "currency_symbol"   => "s",
143             "mon_decimal_point" => "s",
144             "mon_thousands_sep" => "s",
145             "mon_grouping"      => "ai",
146             "positive_sign"     => "s",
147             "negative_sign"     => "s",
148             "int_frac_digits"   => "i",
149             "frac_digits"       => "i",
150             "p_cs_precedes"     => "i",
151             "p_sep_by_space"    => "i",
152             "n_cs_precedes"     => "i",
153             "n_sep_by_space"    => "i",
154             "p_sign_posn"       => "i",
155             "n_sign_posn"       => "i"
156         );
157         get_fields();
158         print_fields();
159         make_makefile();
160 }
161
162 if ($TYPE eq "msgdef") {
163         %keys = (
164             "yesexpr"           => "s",
165             "noexpr"            => "s",
166             "yesstr"            => "s",
167             "nostr"             => "s"
168         );
169         get_fields();
170         print_fields();
171         make_makefile();
172 }
173
174 if ($TYPE eq "timedef") {
175         %keys = (
176             "abmon"             => "as",
177             "mon"               => "as",
178             "abday"             => "as",
179             "day"               => "as",
180             "t_fmt"             => "s",
181             "d_fmt"             => "s",
182             "c_fmt"             => "<cformat<d_t_fmt<s",
183             "am_pm"             => "as",
184             "d_fmt"             => "s",
185             "d_t_fmt"           => "s",
186             "altmon"            => "<altmon<mon<as",
187             "md_order"          => "<mdorder<d_fmt<s",
188             "t_fmt_ampm"        => "s",
189         );
190         get_fields();
191         print_fields();
192         make_makefile();
193 }
194
195 sub callback_cformat {
196         my $s = shift;
197         $s =~ s/ %Z//;
198         $s =~ s/ %z//;
199         return $s;
200 };
201
202 sub callback_mdorder {
203         my $s = shift;
204         return undef if (!defined $s);
205         $s =~ s/[^dm]//g;
206         return $s;
207 };
208
209 sub callback_altmon {
210         # if the language/country is known in %alternative months then
211         # return that, otherwise repeat mon
212         my $s = shift;
213
214         if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
215                 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
216                 my @cleaned;
217                 foreach (@altnames)
218                 {
219                         $_ =~ s/^\s+//;
220                         $_ =~ s/\s+$//;
221                         push @cleaned, $_;
222                 }
223                 return join(";",@cleaned);
224         }
225
226         return $s;
227 }
228
229 ############################
230
231 sub get_unidata {
232         my $directory = shift;
233
234         open(FIN, "$directory/UnicodeData.txt")
235             or die("Cannot open $directory/UnicodeData.txt");;
236         my @lines = <FIN>;
237         chomp(@lines);
238         close(FIN);
239
240         foreach my $l (@lines) {
241                 my @a = split(/;/, $l);
242
243                 $ucd{code2name}{"$a[0]"} = $a[1];       # Unicode name
244                 $ucd{name2code}{"$a[1]"} = $a[0];       # Unicode code
245         }
246 }
247
248 sub get_utf8map {
249         my $file = shift;
250
251         open(FIN, $file);
252         my @lines = <FIN>;
253         close(FIN);
254         chomp(@lines);
255
256         my $prev_k = undef;
257         my $prev_v = "";
258         my $incharmap = 0;
259         foreach my $l (@lines) {
260                 $l =~ s/\r//;
261                 next if ($l =~ /^\#/);
262                 next if ($l eq "");
263
264                 if ($l eq "CHARMAP") {
265                         $incharmap = 1;
266                         next;
267                 }
268
269                 next if (!$incharmap);
270                 last if ($l eq "END CHARMAP");
271
272                 $l =~ /^<([^\s]+)>\s+(.*)/;
273                 my $k = $1;
274                 my $v = $2;
275                 $k =~ s/_/ /g;          # unicode char string
276                 $v =~ s/\\x//g;         # UTF-8 char code
277                 $utf8map{$k} = $v;
278
279                 $utf8aliases{$k} = $prev_k if ($prev_v eq $v);
280
281                 $prev_v = $v;
282                 $prev_k = $k;
283         }
284 }
285
286 sub get_encodings {
287         my $dir = shift;
288         foreach my $e (sort(keys(%encodings))) {
289                 if (!open(FIN, "$dir/$e.TXT")) {
290                         print "Cannot open charmap for $e\n";
291                         next;
292
293                 }
294                 $encodings{$e} = 1;
295                 my @lines = <FIN>;
296                 close(FIN);
297                 chomp(@lines);
298                 foreach my $l (@lines) {
299                         $l =~ s/\r//;
300                         next if ($l =~ /^\#/);
301                         next if ($l eq "");
302
303                         my @a = split(" ", $l);
304                         next if ($#a < 1);
305                         $a[0] =~ s/^0[xX]//;    # local char code
306                         $a[1] =~ s/^0[xX]//;    # unicode char code
307                         $convertors{$e}{uc($a[1])} = uc($a[0]);
308                 }
309         }
310 }
311
312 sub get_languages {
313         my %data = get_xmldata($ETCDIR);
314         %languages = %{$data{L}}; 
315         %translations = %{$data{T}}; 
316         %alternativemonths = %{$data{AM}}; 
317         %encodings = %{$data{E}}; 
318
319         return if (!defined $doonly);
320
321         my @a = split(/_/, $doonly);
322         if ($#a == 1) {
323                 $filter[0] = $a[0];
324                 $filter[1] = "x";
325                 $filter[2] = $a[1];
326         } elsif ($#a == 2) {
327                 $filter[0] = $a[0];
328                 $filter[1] = $a[1];
329                 $filter[2] = $a[2];
330         }
331
332         print Dumper(@filter);
333         return;
334 }
335
336 sub transform_ctypes {
337         foreach my $l (sort keys(%languages)) {
338         foreach my $f (sort keys(%{$languages{$l}})) {
339         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
340                 next if ($#filter == 2 && ($filter[0] ne $l
341                     || $filter[1] ne $f || $filter[2] ne $c));
342                 next if (defined $languages{$l}{$f}{definitions}
343                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
344                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
345                 my $file;
346                 $file = $l . "_";
347                 $file .= $f . "_" if ($f ne "x");
348                 $file .= $c;
349                 my $actfile = $file;
350
351                 my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src";
352                 if (! -f $filename) {
353                         print STDERR "Cannot open $filename\n";
354                         next;
355                 }
356                 open(FIN, "$filename");
357                 print "Reading from $filename for ${l}_${f}_${c}\n";
358                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
359                 my @lines;
360                 my $shex;
361                 my $uhex;
362                 while (<FIN>) {
363                         push @lines, $_;
364                 }
365                 close(FIN);
366                 $shex = sha1_hex(join("\n", @lines));
367                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
368                 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
369                 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
370                 print FOUT @lines;
371                 close(FOUT);
372                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
373                         next if ($enc eq $DEFENCODING);
374                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
375                         if (! -f $filename) {
376                                 print STDERR "Cannot open $filename\n";
377                                 next;
378                         }
379                         @lines = ();
380                         open(FIN, "$filename");
381                         while (<FIN>) {
382                                 if ((/^comment_char\s/) || (/^escape_char\s/)){
383                                         push @lines, $_;
384                                 }
385                                 if (/^LC_CTYPE/../^END LC_CTYPE/) {
386                                         push @lines, $_;
387                                 }
388                         }
389                         close(FIN);
390                         $uhex = sha1_hex(join("\n", @lines) . $enc);
391                         $languages{$l}{$f}{data}{$c}{$enc} = $uhex;
392                         $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
393                         open(FOUT, ">$TYPE.draft/$actfile.$enc.src");
394                         print FOUT <<EOF;
395 # Warning: Do not edit. This file is automatically extracted from the
396 # tools in /usr/src/tools/tools/locale. The data is obtained from the
397 # CLDR project, obtained from http://cldr.unicode.org/
398 # -----------------------------------------------------------------------------
399 EOF
400                         print FOUT @lines;
401                         close(FOUT);
402                 }
403         }
404         }
405         }
406 }
407
408
409 sub transform_collation {
410         foreach my $l (sort keys(%languages)) {
411         foreach my $f (sort keys(%{$languages{$l}})) {
412         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
413                 next if ($#filter == 2 && ($filter[0] ne $l
414                     || $filter[1] ne $f || $filter[2] ne $c));
415                 next if (defined $languages{$l}{$f}{definitions}
416                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
417                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
418                 my $file;
419                 $file = $l . "_";
420                 $file .= $f . "_" if ($f ne "x");
421                 $file .= $c;
422                 my $actfile = $file;
423
424                 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
425                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
426                     if (! -f $filename);
427                 if (! -f $filename
428                  && defined $languages{$l}{$f}{fallback}) {
429                         $file = $languages{$l}{$f}{fallback};
430                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
431                 }
432                 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
433                     if (! -f $filename);
434                 if (! -f $filename) {
435                         print STDERR
436                             "Cannot open $file.$DEFENCODING.src or fallback\n";
437                         next;
438                 }
439                 open(FIN, "$filename");
440                 print "Reading from $filename for ${l}_${f}_${c}\n";
441                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
442                 my @lines;
443                 my $shex;
444                 while (<FIN>) {
445                         if ((/^comment_char\s/) || (/^escape_char\s/)){
446                                 push @lines, $_;
447                         }
448                         if (/^LC_COLLATE/../^END LC_COLLATE/) {
449                                 $_ =~ s/[ ]+/ /g;
450                                 push @lines, $_;
451                         }
452                 }
453                 close(FIN);
454                 $shex = sha1_hex(join("\n", @lines));
455                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
456                 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
457                 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
458                 print FOUT <<EOF;
459 # Warning: Do not edit. This file is automatically extracted from the
460 # tools in /usr/src/tools/tools/locale. The data is obtained from the
461 # CLDR project, obtained from http://cldr.unicode.org/
462 # -----------------------------------------------------------------------------
463 EOF
464                 print FOUT @lines;
465                 close(FOUT);
466
467                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
468                         next if ($enc eq $DEFENCODING);
469                         copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
470                               "$TYPE.draft/$actfile.$enc.src");
471                         $languages{$l}{$f}{data}{$c}{$enc} = $shex;
472                         $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
473                 }
474         }
475         }
476         }
477 }
478
479 sub get_fields {
480         foreach my $l (sort keys(%languages)) {
481         foreach my $f (sort keys(%{$languages{$l}})) {
482         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
483                 next if ($#filter == 2 && ($filter[0] ne $l
484                     || $filter[1] ne $f || $filter[2] ne $c));
485                 next if (defined $languages{$l}{$f}{definitions}
486                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
487
488                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
489                 my $file;
490                 $file = $l . "_";
491                 $file .= $f . "_" if ($f ne "x");
492                 $file .= $c;
493
494                 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
495                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
496                     if (! -f $filename);
497                 if (! -f $filename
498                  && defined $languages{$l}{$f}{fallback}) {
499                         $file = $languages{$l}{$f}{fallback};
500                         $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
501                 }
502                 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
503                     if (! -f $filename);
504                 if (! -f $filename) {
505                         print STDERR
506                             "Cannot open $file.$DEFENCODING.src or fallback\n";
507                         next;
508                 }
509                 open(FIN, "$filename");
510                 print "Reading from $filename for ${l}_${f}_${c}\n";
511                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
512                 my @lines = <FIN>;
513                 chomp(@lines);
514                 close(FIN);
515                 my $continue = 0;
516                 foreach my $k (keys(%keys)) {
517                         foreach my $line (@lines) {
518                                 $line =~ s/\r//;
519                                 next if (!$continue && $line !~ /^$k\s/);
520                                 if ($continue) {
521                                         $line =~ s/^\s+//;
522                                 } else {
523                                         $line =~ s/^$k\s+//;
524                                 }
525
526                                 $values{$l}{$c}{$k} = ""
527                                         if (!defined $values{$l}{$c}{$k});
528
529                                 $continue = ($line =~ /\/$/);
530                                 $line =~ s/\/$// if ($continue);
531
532                                 while ($line =~ /_/) {
533                                         $line =~
534                                             s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
535                                 }
536                                 die "_ in data - $line" if ($line =~ /_/);
537                                 $values{$l}{$c}{$k} .= $line;
538
539                                 last if (!$continue);
540                         }
541                 }
542         }
543         }
544         }
545 }
546
547 sub decodecldr {
548         my $e = shift;
549         my $s = shift;
550
551         my $v = undef;
552
553         if ($e eq "UTF-8") {
554                 #
555                 # Conversion to UTF-8 can be done from the Unicode name to
556                 # the UTF-8 character code.
557                 #
558                 $v = $utf8map{$s};
559                 die "Cannot convert $s in $e (charmap)" if (!defined $v);
560         } else {
561                 #
562                 # Conversion to these encodings can be done from the Unicode
563                 # name to Unicode code to the encodings code.
564                 #
565                 my $ucc = undef;
566                 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
567                 $ucc = $ucd{name2code}{$utf8aliases{$s}}
568                         if (!defined $ucc
569                          && $utf8aliases{$s}
570                          && defined $ucd{name2code}{$utf8aliases{$s}});
571
572                 if (!defined $ucc) {
573                         if (defined $translations{$e}{$s}{hex}) {
574                                 $v = $translations{$e}{$s}{hex};
575                                 $ucc = 0;
576                         } elsif (defined $translations{$e}{$s}{ucc}) {
577                                 $ucc = $translations{$e}{$s}{ucc};
578                         }
579                 }
580
581                 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
582                 $v = $convertors{$e}{$ucc} if (!defined $v);
583
584                 $v = $translations{$e}{$s}{hex}
585                         if (!defined $v && defined $translations{$e}{$s}{hex});
586
587                 if (!defined $v && defined $translations{$e}{$s}{unicode}) {
588                         my $ucn = $translations{$e}{$s}{unicode};
589                         $ucc = $ucd{name2code}{$ucn}
590                                 if (defined $ucd{name2code}{$ucn});
591                         $ucc = $ucd{name2code}{$utf8aliases{$ucn}}
592                                 if (!defined $ucc
593                                  && defined $ucd{name2code}{$utf8aliases{$ucn}});
594                         $v = $convertors{$e}{$ucc};
595                 }
596
597                 die "Cannot convert $s in $e (charmap)" if (!defined $v);
598         }
599
600         return pack("C", hex($v)) if (length($v) == 2);
601         return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
602                 if (length($v) == 4);
603         return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
604             hex(substr($v, 4, 2))) if (length($v) == 6);
605         print STDERR "Cannot convert $e $s\n";
606         return "length = " . length($v);
607
608 }
609
610 sub translate {
611         my $enc = shift;
612         my $v = shift;
613
614         return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
615         return undef;
616 }
617
618 sub print_fields {
619         foreach my $l (sort keys(%languages)) {
620         foreach my $f (sort keys(%{$languages{$l}})) {
621         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
622                 next if ($#filter == 2 && ($filter[0] ne $l
623                     || $filter[1] ne $f || $filter[2] ne $c));
624                 next if (defined $languages{$l}{$f}{definitions}
625                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
626                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
627                         if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
628                                 print "Skipping ${l}_" .
629                                     ($f eq "x" ? "" : "${f}_") .
630                                     "${c} - not read\n";
631                                 next;
632                         }
633                         my $file = $l;
634                         $file .= "_" . $f if ($f ne "x");
635                         $file .= "_" . $c;
636                         print "Writing to $file in $enc\n";
637
638                         if ($enc ne $DEFENCODING &&
639                             !defined $convertors{$enc}) {
640                                 print "Failed! Cannot convert to $enc.\n";
641                                 next;
642                         };
643
644                         open(FOUT, ">$TYPE.draft/$file.$enc.new");
645                         my $okay = 1;
646                         my $output = "";
647                         print FOUT <<EOF;
648 # Warning: Do not edit. This file is automatically generated from the
649 # tools in /usr/src/tools/tools/locale. The data is obtained from the
650 # CLDR project, obtained from http://cldr.unicode.org/
651 # -----------------------------------------------------------------------------
652 EOF
653                         foreach my $k (keys(%keys)) {
654                                 my $f = $keys{$k};
655
656                                 die("Unknown $k in \%DESC")
657                                         if (!defined $DESC{$k});
658
659                                 $output .= "#\n# $DESC{$k}\n";
660
661                                 # Replace one row with another
662                                 if ($f =~ /^>/) {
663                                         $k = substr($f, 1);
664                                         $f = $keys{$k};
665                                 }
666
667                                 # Callback function
668                                 if ($f =~ /^\</) {
669                                         $callback{data}{c} = $c;
670                                         $callback{data}{k} = $k;
671                                         $callback{data}{l} = $l;
672                                         $callback{data}{e} = $enc;
673                                         my @a = split(/\</, substr($f, 1));
674                                         my $rv =
675                                             &{$callback{$a[0]}}($values{$l}{$c}{$a[1]});
676                                         $values{$l}{$c}{$k} = $rv;
677                                         $f = $a[2];
678                                         $callback{data} = ();
679                                 }
680
681                                 my $v = $values{$l}{$c}{$k};
682                                 $v = "undef" if (!defined $v);
683
684                                 if ($f eq "i") {
685                                         $output .= "$v\n";
686                                         next;
687                                 }
688                                 if ($f eq "ai") {
689                                         $output .= "$v\n";
690                                         next;
691                                 }
692                                 if ($f eq "s") {
693                                         $v =~ s/^"//;
694                                         $v =~ s/"$//;
695                                         my $cm = "";
696                                         while ($v =~ /^(.*?)<(.*?)>(.*)/) {
697                                                 my $p1 = $1;
698                                                 $cm = $2;
699                                                 my $p3 = $3;
700
701                                                 my $rv = decodecldr($enc, $cm);
702 #                                               $rv = translate($enc, $cm)
703 #                                                       if (!defined $rv);
704                                                 if (!defined $rv) {
705                                                         print STDERR 
706 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
707                                                         $okay = 0;
708                                                         next;
709                                                 }
710
711                                                 $v = $p1 . $rv . $p3;
712                                         }
713                                         $output .= "$v\n";
714                                         next;
715                                 }
716                                 if ($f eq "as") {
717                                         foreach my $v (split(/;/, $v)) {
718                                                 $v =~ s/^"//;
719                                                 $v =~ s/"$//;
720                                                 my $cm = "";
721                                                 while ($v =~ /^(.*?)<(.*?)>(.*)/) {
722                                                         my $p1 = $1;
723                                                         $cm = $2;
724                                                         my $p3 = $3;
725
726                                                         my $rv =
727                                                             decodecldr($enc,
728                                                                 $cm);
729 #                                                       $rv = translate($enc,
730 #                                                           $cm)
731 #                                                           if (!defined $rv);
732                                                         if (!defined $rv) {
733                                                                 print STDERR 
734 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
735                                                                 $okay = 0;
736                                                                 next;
737                                                         }
738
739                                                         $v = $1 . $rv . $3;
740                                                 }
741                                                 $output .= "$v\n";
742                                         }
743                                         next;
744                                 }
745
746                                 die("$k is '$f'");
747
748                         }
749
750                         $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
751                         $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
752                         print FOUT "$output# EOF\n";
753                         close(FOUT);
754
755                         if ($okay) {
756                                 rename("$TYPE.draft/$file.$enc.new",
757                                     "$TYPE.draft/$file.$enc.src");
758                         } else {
759                                 rename("$TYPE.draft/$file.$enc.new",
760                                     "$TYPE.draft/$file.$enc.failed");
761                         }
762                 }
763         }
764         }
765         }
766 }
767
768 sub make_makefile {
769         return if ($#filter > -1);
770         print "Creating Makefile for $TYPE\n";
771         my $SRCOUT;
772         my $SRCOUT2;
773         my $SRCOUT3;
774         my $MAPLOC;
775         if ($TYPE eq "colldef") {
776                 $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
777                         "\t-f \${MAPLOC}/map.UTF-8 " .
778                         "\${.OBJDIR}/\${.IMPSRC:T:R}";
779                 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
780                                 "locale/etc/final-maps\n";
781                 $SRCOUT2 = "LC_COLLATE";
782         }
783         elsif ($TYPE eq "ctypedef") {
784                 $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
785                         "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:C/^.*\\.//} " .
786                         "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
787                         " || true";
788                 $SRCOUT2 = "LC_CTYPE";
789                 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
790                                 "locale/etc/final-maps\n";
791                 $SRCOUT3 = "## SYMPAIRS\n\n" .
792                         ".for PAIR in \${SYMPAIRS}\n" .
793                         "\${PAIR:C/^.*://:S/src\$/LC_CTYPE/}: " .
794                         "\${PAIR:C/:.*//}\n" .
795                         "\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
796                         "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " .
797                         "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " .
798                         " || true\n" .
799                         ".endfor\n\n";
800         }
801         else {
802                 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
803                 $SRCOUT2 = "out";
804                 $MAPLOC = "";
805         }
806         open(FOUT, ">$TYPE.draft/Makefile");
807         print FOUT <<EOF;
808 # Warning: Do not edit. This file is automatically generated from the
809 # tools in /usr/src/tools/tools/locale.
810
811 LOCALEDIR=      \${SHAREDIR}/locale
812 FILESNAME=      $FILESNAMES{$TYPE}
813 .SUFFIXES:      .src .${SRCOUT2}
814 ${MAPLOC}
815 .src.${SRCOUT2}:
816         $SRCOUT
817
818 ## PLACEHOLDER
819
820 EOF
821
822         foreach my $hash (keys(%hashtable)) {
823                 # For colldef, weight LOCALES to UTF-8
824                 #     Sort as upper-case and reverse to achieve it
825                 #     Make en_US, ru_RU, and ca_AD preferred
826                 my @files;
827                 if ($TYPE eq "colldef") {
828                         @files = sort {
829                                 if ($a eq 'en_x_US.UTF-8' ||
830                                     $a eq 'ru_x_RU.UTF-8' ||
831                                     $a eq 'ca_x_AD.UTF-8') { return -1; }
832                                 elsif ($b eq 'en_x_US.UTF-8' ||
833                                        $b eq 'ru_x_RU.UTF-8' ||
834                                        $b eq 'ca_x_AD.UTF-8') { return 1; }
835                                 else { return uc($b) cmp uc($a); }
836                                 } keys(%{$hashtable{$hash}});
837                 } elsif ($TYPE eq "ctypedef") {
838                         @files = sort {
839                                 if ($a eq 'en_x_US.UTF-8') { return -1; }
840                                 elsif ($b eq 'en_x_US.UTF-8') { return 1; }
841                                 if ($a =~ /^en_x_US/) { return -1; }
842                                 elsif ($b =~ /^en_x_US/) { return 1; }
843
844                                 if ($a =~ /^en_x_GB.ISO8859-15/ ||
845                                     $a =~ /^ru_x_RU/) { return -1; }
846                                 elsif ($b =~ /^en_x_GB.ISO8859-15/ ||
847                                        $b =~ /ru_x_RU/) { return 1; }
848                                 else { return uc($b) cmp uc($a); }
849
850                                 } keys(%{$hashtable{$hash}});
851                 } else {
852                         @files = sort {
853                                 if ($a =~ /_Comm_/ ||
854                                     $b eq 'en_x_US.UTF-8') { return 1; }
855                                 elsif ($b =~ /_Comm_/ ||
856                                        $a eq 'en_x_US.UTF-8') { return -1; }
857                                 else { return uc($b) cmp uc($a); }
858                                 } keys(%{$hashtable{$hash}});
859                 }
860                 if ($#files > 0) {
861                         my $link = shift(@files);
862                         $link =~ s/_x_/_/;      # strip family if none there
863                         foreach my $file (@files) {
864                                 my @a = split(/_/, $file);
865                                 my @b = split(/\./, $a[-1]);
866                                 $file =~ s/_x_/_/;
867                                 print FOUT "SAME+=\t\t$link:$file\n";
868                                 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
869                         }
870                 }
871         }
872
873         foreach my $l (sort keys(%languages)) {
874         foreach my $f (sort keys(%{$languages{$l}})) {
875         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
876                 next if ($#filter == 2 && ($filter[0] ne $l
877                     || $filter[1] ne $f || $filter[2] ne $c));
878                 next if (defined $languages{$l}{$f}{definitions}
879                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
880                 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
881                  && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
882                         print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
883                             "${c} - not read\n";
884                         next;
885                 }
886                 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
887                         my $file = $l . "_";
888                         $file .= $f . "_" if ($f ne "x");
889                         $file .= $c;
890                         next if (!defined $languages{$l}{$f}{data}{$c}{$e});
891                         print FOUT "LOCALES+=\t$file.$e\n";
892                 }
893
894                 if (defined $languages{$l}{$f}{nc_link}) {
895                         foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
896                                 my $file = $l . "_";
897                                 $file .= $f . "_" if ($f ne "x");
898                                 $file .= $c;
899                                 print FOUT "SAME+=\t\t$file.$e:$languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
900                         }
901                 }
902
903                 if (defined $languages{$l}{$f}{e_link}) {
904                         foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
905                                 my @a = split(/:/, $el);
906                                 my $file = $l . "_";
907                                 $file .= $f . "_" if ($f ne "x");
908                                 $file .= $c;
909                                 print FOUT "SAME+=\t\t$file.$a[0]:$file.$a[1]\t# legacy (same charset)\n";
910                         }
911                 }
912
913         }
914         }
915         }
916
917         print FOUT <<EOF;
918
919 FILES=          \${LOCALES:S/\$/.${SRCOUT2}/}
920 CLEANFILES=     \${FILES}
921
922 .for f in \${SAME}
923 SYMLINKS+=      ../\${f:C/:.*\$//}/\${FILESNAME} \${LOCALEDIR}/\${f:C/^.*://}
924 .endfor
925
926 .for f in \${LOCALES}
927 FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
928 .endfor
929
930 ${SRCOUT3}.include <bsd.prog.mk>
931 EOF
932
933         close(FOUT);
934 }