]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - tools/tools/locale/tools/cldr2def.pl
Upgrade to Bzip2 version 1.0.8.
[FreeBSD/FreeBSD.git] / tools / tools / locale / tools / cldr2def.pl
1 #!/usr/local/bin/perl -wC
2 # $FreeBSD$
3
4 use strict;
5 use File::Copy;
6 use XML::Parser;
7 use Tie::IxHash;
8 use Text::Iconv;
9 #use Data::Dumper;
10 use Getopt::Long;
11 use Digest::SHA qw(sha1_hex);
12 require "charmaps.pm";
13
14
15 if ($#ARGV < 2) {
16         print "Usage: $0 --unidir=<unidir> --etc=<etcdir> --type=<type>\n";
17         exit(1);
18 }
19
20 my $DEFENCODING = "UTF-8";
21
22 my $UNIDIR = undef;
23 my $ETCDIR = undef;
24 my $TYPE = undef;
25
26 my $result = GetOptions (
27                 "unidir=s"      => \$UNIDIR,
28                 "etc=s"         => \$ETCDIR,
29                 "type=s"        => \$TYPE,
30             );
31
32 my %convertors = ();
33
34 my %ucd = ();
35 my %values = ();
36 my %hashtable = ();
37 my %languages = ();
38 my %translations = ();
39 my %encodings = ();
40 my %alternativemonths = ();
41 get_languages();
42
43 my %utf8map = ();
44 my %utf8aliases = ();
45 get_unidata($UNIDIR);
46 get_utf8map("$UNIDIR/posix/$DEFENCODING.cm");
47 get_encodings("$ETCDIR/charmaps");
48
49 my %keys = ();
50 tie(%keys, "Tie::IxHash");
51 tie(%hashtable, "Tie::IxHash");
52
53 my %FILESNAMES = (
54         "monetdef"      => "LC_MONETARY",
55         "timedef"       => "LC_TIME",
56         "msgdef"        => "LC_MESSAGES",
57         "numericdef"    => "LC_NUMERIC",
58         "colldef"       => "LC_COLLATE",
59         "ctypedef"      => "LC_CTYPE"
60 );
61
62 my %callback = (
63         mdorder => \&callback_mdorder,
64         altmon => \&callback_altmon,
65         cformat => \&callback_cformat,
66         dformat => \&callback_dformat,
67         dtformat => \&callback_dtformat,
68         cbabmon => \&callback_abmon,
69         cbampm => \&callback_ampm,
70         data => undef,
71 );
72
73 my %DESC = (
74
75         # numericdef
76         "decimal_point" => "decimal_point",
77         "thousands_sep" => "thousands_sep",
78         "grouping"      => "grouping",
79
80         # monetdef
81         "int_curr_symbol"       => "int_curr_symbol (last character always " .
82                                    "SPACE)",
83         "currency_symbol"       => "currency_symbol",
84         "mon_decimal_point"     => "mon_decimal_point",
85         "mon_thousands_sep"     => "mon_thousands_sep",
86         "mon_grouping"          => "mon_grouping",
87         "positive_sign"         => "positive_sign",
88         "negative_sign"         => "negative_sign",
89         "int_frac_digits"       => "int_frac_digits",
90         "frac_digits"           => "frac_digits",
91         "p_cs_precedes"         => "p_cs_precedes",
92         "p_sep_by_space"        => "p_sep_by_space",
93         "n_cs_precedes"         => "n_cs_precedes",
94         "n_sep_by_space"        => "n_sep_by_space",
95         "p_sign_posn"           => "p_sign_posn",
96         "n_sign_posn"           => "n_sign_posn",
97
98         # msgdef
99         "yesexpr"       => "yesexpr",
100         "noexpr"        => "noexpr",
101         "yesstr"        => "yesstr",
102         "nostr"         => "nostr",
103
104         # timedef
105         "abmon"         => "Short month names",
106         "mon"           => "Long month names (as in a date)",
107         "abday"         => "Short weekday names",
108         "day"           => "Long weekday names",
109         "t_fmt"         => "X_fmt",
110         "d_fmt"         => "x_fmt",
111         "c_fmt"         => "c_fmt",
112         "am_pm"         => "AM/PM",
113         "d_t_fmt"       => "date_fmt",
114         "altmon"        => "Long month names (without case ending)",
115         "md_order"      => "md_order",
116         "t_fmt_ampm"    => "ampm_fmt",
117 );
118
119 if ($TYPE eq "colldef") {
120         transform_collation();
121         make_makefile();
122 }
123
124 if ($TYPE eq "ctypedef") {
125         transform_ctypes();
126         make_makefile();
127 }
128
129 if ($TYPE eq "numericdef") {
130         %keys = (
131             "decimal_point"     => "s",
132             "thousands_sep"     => "s",
133             "grouping"          => "ai",
134         );
135         get_fields();
136         print_fields();
137         make_makefile();
138 }
139
140 if ($TYPE eq "monetdef") {
141         %keys = (
142             "int_curr_symbol"   => "s",
143             "currency_symbol"   => "s",
144             "mon_decimal_point" => "s",
145             "mon_thousands_sep" => "s",
146             "mon_grouping"      => "ai",
147             "positive_sign"     => "s",
148             "negative_sign"     => "s",
149             "int_frac_digits"   => "i",
150             "frac_digits"       => "i",
151             "p_cs_precedes"     => "i",
152             "p_sep_by_space"    => "i",
153             "n_cs_precedes"     => "i",
154             "n_sep_by_space"    => "i",
155             "p_sign_posn"       => "i",
156             "n_sign_posn"       => "i"
157         );
158         get_fields();
159         print_fields();
160         make_makefile();
161 }
162
163 if ($TYPE eq "msgdef") {
164         %keys = (
165             "yesexpr"           => "s",
166             "noexpr"            => "s",
167             "yesstr"            => "s",
168             "nostr"             => "s"
169         );
170         get_fields();
171         print_fields();
172         make_makefile();
173 }
174
175 if ($TYPE eq "timedef") {
176         %keys = (
177             "abmon"             => "<cbabmon<abmon<as",
178             "mon"               => "as",
179             "abday"             => "as",
180             "day"               => "as",
181             "t_fmt"             => "s",
182             "d_fmt"             => "<dformat<d_fmt<s",
183             "c_fmt"             => "<cformat<d_t_fmt<s",
184             "am_pm"             => "<cbampm<am_pm<as",
185             "d_t_fmt"           => "<dtformat<d_t_fmt<s",
186             "altmon"            => "<altmon<mon<as",
187             "md_order"          => "<mdorder<d_fmt<s",
188             "t_fmt_ampm"        => "s",
189         );
190         get_fields();
191         print_fields();
192         make_makefile();
193 }
194
195 sub callback_ampm {
196         my $s = shift;
197         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
198         my $enc = $callback{data}{e};
199
200         if ($nl eq 'ru_RU') {
201                 if ($enc eq 'UTF-8') {
202                         $s = 'дп;пп';
203                 } else {
204                         my  $converter = Text::Iconv->new("utf-8", "$enc");
205                         $s = $converter->convert("дп;пп");
206                 }
207         }
208         return $s;
209 }
210
211 sub callback_cformat {
212         my $s = shift;
213         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
214
215         if ($nl eq 'ko_KR') {
216                 $s =~ s/(> )(%p)/$1%A $2/;
217         }
218         $s =~ s/\.,/\./;
219         $s =~ s/ %Z//;
220         $s =~ s/ %z//;
221         $s =~ s/^"%e\./%A %e/;
222         $s =~ s/^"(%B %e, )/"%A, $1/;
223         $s =~ s/^"(%e %B )/"%A $1/;
224         return $s;
225 };
226
227 sub callback_dformat {
228         my $s = shift;
229
230         $s =~ s/(%m(<SOLIDUS>|[-.]))%e/$1%d/;
231         $s =~ s/%e((<SOLIDUS>|[-.])%m)/%d$1/;
232         return $s;
233 };
234
235 sub callback_dtformat {
236         my $s = shift;
237         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
238
239         if ($nl eq 'ja_JP') {
240                 $s =~ s/(> )(%H)/$1%A $2/;
241         } elsif ($nl eq 'ko_KR' || $nl eq 'zh_CN' || $nl eq 'zh_TW') {
242                 if ($nl ne 'ko_KR') {
243                         $s =~ s/%m/%_m/;
244                 }
245                 $s =~ s/(> )(%p)/$1%A $2/;
246         }
247         $s =~ s/\.,/\./;
248         $s =~ s/^"%e\./%A %e/;
249         $s =~ s/^"(%B %e, )/"%A, $1/;
250         $s =~ s/^"(%e %B )/"%A $1/;
251         return $s;
252 };
253
254 sub callback_mdorder {
255         my $s = shift;
256         return undef if (!defined $s);
257         $s =~ s/[^dem]//g;
258         $s =~ s/e/d/g;
259         return $s;
260 };
261
262 sub callback_altmon {
263         # if the language/country is known in %alternative months then
264         # return that, otherwise repeat mon
265         my $s = shift;
266
267         if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
268                 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
269                 my @cleaned;
270                 foreach (@altnames)
271                 {
272                         $_ =~ s/^\s+//;
273                         $_ =~ s/\s+$//;
274                         push @cleaned, $_;
275                 }
276                 return join(";",@cleaned);
277         }
278
279         return $s;
280 }
281
282 sub callback_abmon {
283         # for specified CJK locales, pad result with a space to enable
284         # columns to line up (style established in FreeBSD in 2001)
285         my $s = shift;
286         my $nl = $callback{data}{l} . "_" . $callback{data}{c};
287
288         if ($nl eq 'ja_JP' || $nl eq 'ko_KR' || $nl eq 'zh_CN' ||
289             $nl eq 'zh_HK' || $nl eq 'zh_TW') {
290                 my @monthnames = split(";", $s);
291                 my @cleaned;
292                 foreach (@monthnames)
293                 {
294                         if ($_ =~ /^"<(two|three|four|five|six|seven|eight|nine)>/ ||
295                            ($_ =~ /^"<one>/ && $_ !~ /^"<one>(<zero>|<one>|<two>)/))
296                         {
297                                 $_ =~ s/^"/"<space>/;
298                         }
299                         push @cleaned, $_;
300                 }
301                 return join(";",@cleaned);
302         }
303         return $s;
304 }
305
306 ############################
307
308 sub get_unidata {
309         my $directory = shift;
310
311         open(FIN, "$directory/UnicodeData.txt")
312             or die("Cannot open $directory/UnicodeData.txt");;
313         my @lines = <FIN>;
314         chomp(@lines);
315         close(FIN);
316
317         foreach my $l (@lines) {
318                 my @a = split(/;/, $l);
319
320                 $ucd{code2name}{"$a[0]"} = $a[1];       # Unicode name
321                 $ucd{name2code}{"$a[1]"} = $a[0];       # Unicode code
322         }
323 }
324
325 sub get_utf8map {
326         my $file = shift;
327
328         open(FIN, $file);
329         my @lines = <FIN>;
330         close(FIN);
331         chomp(@lines);
332
333         my $prev_k = undef;
334         my $prev_v = "";
335         my $incharmap = 0;
336         foreach my $l (@lines) {
337                 $l =~ s/\r//;
338                 next if ($l =~ /^\#/);
339                 next if ($l eq "");
340
341                 if ($l eq "CHARMAP") {
342                         $incharmap = 1;
343                         next;
344                 }
345
346                 next if (!$incharmap);
347                 last if ($l eq "END CHARMAP");
348
349                 $l =~ /^<([^\s]+)>\s+(.*)/;
350                 my $k = $1;
351                 my $v = $2;
352                 $k =~ s/_/ /g;          # unicode char string
353                 $v =~ s/\\x//g;         # UTF-8 char code
354                 $utf8map{$k} = $v;
355
356                 $utf8aliases{$k} = $prev_k if ($prev_v eq $v);
357
358                 $prev_v = $v;
359                 $prev_k = $k;
360         }
361 }
362
363 sub get_encodings {
364         my $dir = shift;
365         foreach my $e (sort(keys(%encodings))) {
366                 if (!open(FIN, "$dir/$e.TXT")) {
367                         print "Cannot open charmap for $e\n";
368                         next;
369
370                 }
371                 $encodings{$e} = 1;
372                 my @lines = <FIN>;
373                 close(FIN);
374                 chomp(@lines);
375                 foreach my $l (@lines) {
376                         $l =~ s/\r//;
377                         next if ($l =~ /^\#/);
378                         next if ($l eq "");
379
380                         my @a = split(" ", $l);
381                         next if ($#a < 1);
382                         $a[0] =~ s/^0[xX]//;    # local char code
383                         $a[1] =~ s/^0[xX]//;    # unicode char code
384                         $convertors{$e}{uc($a[1])} = uc($a[0]);
385                 }
386         }
387 }
388
389 sub get_languages {
390         my %data = get_xmldata($ETCDIR);
391         %languages = %{$data{L}}; 
392         %translations = %{$data{T}}; 
393         %alternativemonths = %{$data{AM}}; 
394         %encodings = %{$data{E}}; 
395 }
396
397 sub transform_ctypes {
398         # Add the C.UTF-8
399         $languages{"C"}{"x"}{data}{"x"}{$DEFENCODING} = undef;
400
401         foreach my $l (sort keys(%languages)) {
402         foreach my $f (sort keys(%{$languages{$l}})) {
403         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
404                 next if (defined $languages{$l}{$f}{definitions}
405                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
406                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
407                 my $file = $l;
408                 $file .= "_" . $f if ($f ne "x");
409                 $file .= "_" . $c if ($c ne "x");
410                 my $actfile = $file;
411
412                 my $filename = "$UNIDIR/posix/xx_Comm_C.UTF-8.src";
413                 if (! -f $filename) {
414                         print STDERR "Cannot open $filename\n";
415                         next;
416                 }
417                 open(FIN, "$filename");
418                 print "Reading from $filename for ${l}_${f}_${c}\n";
419                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
420                 my @lines;
421                 my $shex;
422                 my $uhex;
423                 while (<FIN>) {
424                         push @lines, $_;
425                 }
426                 close(FIN);
427                 $shex = sha1_hex(join("\n", @lines));
428                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
429                 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
430                 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
431                 print FOUT @lines;
432                 close(FOUT);
433                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
434                         next if ($enc eq $DEFENCODING);
435                         $filename = "$UNIDIR/posix/$file.$DEFENCODING.src";
436                         if (! -f $filename) {
437                                 print STDERR "Cannot open $filename\n";
438                                 next;
439                         }
440                         @lines = ();
441                         open(FIN, "$filename");
442                         while (<FIN>) {
443                                 if ((/^comment_char\s/) || (/^escape_char\s/)){
444                                         push @lines, $_;
445                                 }
446                                 if (/^LC_CTYPE/../^END LC_CTYPE/) {
447                                         push @lines, $_;
448                                 }
449                         }
450                         close(FIN);
451                         $uhex = sha1_hex(join("\n", @lines) . $enc);
452                         $languages{$l}{$f}{data}{$c}{$enc} = $uhex;
453                         $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
454                         open(FOUT, ">$TYPE.draft/$actfile.$enc.src");
455                         print FOUT <<EOF;
456 # Warning: Do not edit. This file is automatically extracted from the
457 # tools in /usr/src/tools/tools/locale. The data is obtained from the
458 # CLDR project, obtained from http://cldr.unicode.org/
459 # -----------------------------------------------------------------------------
460 EOF
461                         print FOUT @lines;
462                         close(FOUT);
463                 }
464         }
465         }
466         }
467 }
468
469
470 sub transform_collation {
471         foreach my $l (sort keys(%languages)) {
472         foreach my $f (sort keys(%{$languages{$l}})) {
473         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
474                 next if (defined $languages{$l}{$f}{definitions}
475                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
476                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
477                 my $file;
478                 $file = $l . "_";
479                 $file .= $f . "_" if ($f ne "x");
480                 $file .= $c;
481                 my $actfile = $file;
482
483                 my $filename = "$UNIDIR/posix/$file.$DEFENCODING.src";
484                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
485                     if (! -f $filename);
486                 if (! -f $filename
487                  && defined $languages{$l}{$f}{fallback}) {
488                         $file = $languages{$l}{$f}{fallback};
489                         $filename = "$UNIDIR/posix/$file.$DEFENCODING.src";
490                 }
491                 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"
492                     if (! -f $filename);
493                 if (! -f $filename) {
494                         print STDERR
495                             "Cannot open $file.$DEFENCODING.src or fallback\n";
496                         next;
497                 }
498                 open(FIN, "$filename");
499                 print "Reading from $filename for ${l}_${f}_${c}\n";
500                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
501                 my @lines;
502                 my $shex;
503                 while (<FIN>) {
504                         if ((/^comment_char\s/) || (/^escape_char\s/)){
505                                 push @lines, $_;
506                         }
507                         if (/^LC_COLLATE/../^END LC_COLLATE/) {
508                                 $_ =~ s/[ ]+/ /g;
509                                 push @lines, $_;
510                         }
511                 }
512                 close(FIN);
513                 $shex = sha1_hex(join("\n", @lines));
514                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
515                 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
516                 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
517                 print FOUT <<EOF;
518 # Warning: Do not edit. This file is automatically extracted from the
519 # tools in /usr/src/tools/tools/locale. The data is obtained from the
520 # CLDR project, obtained from http://cldr.unicode.org/
521 # -----------------------------------------------------------------------------
522 EOF
523                 print FOUT @lines;
524                 close(FOUT);
525
526                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
527                         next if ($enc eq $DEFENCODING);
528                         copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
529                               "$TYPE.draft/$actfile.$enc.src");
530                         $languages{$l}{$f}{data}{$c}{$enc} = $shex;
531                         $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
532                 }
533         }
534         }
535         }
536 }
537
538 sub get_fields {
539         foreach my $l (sort keys(%languages)) {
540         foreach my $f (sort keys(%{$languages{$l}})) {
541         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
542                 next if (defined $languages{$l}{$f}{definitions}
543                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
544
545                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
546                 my $file;
547                 $file = $l . "_";
548                 $file .= $f . "_" if ($f ne "x");
549                 $file .= $c;
550
551                 my $filename = "$UNIDIR/posix/$file.$DEFENCODING.src";
552                 $filename = "$ETCDIR/$file.$DEFENCODING.src"
553                     if (! -f $filename);
554                 if (! -f $filename
555                  && defined $languages{$l}{$f}{fallback}) {
556                         $file = $languages{$l}{$f}{fallback};
557                         $filename = "$UNIDIR/posix/$file.$DEFENCODING.src";
558                 }
559                 $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"
560                     if (! -f $filename);
561                 if (! -f $filename) {
562                         print STDERR
563                             "Cannot open $file.$DEFENCODING.src or fallback\n";
564                         next;
565                 }
566                 open(FIN, "$filename");
567                 print "Reading from $filename for ${l}_${f}_${c}\n";
568                 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
569                 my @lines = <FIN>;
570                 chomp(@lines);
571                 close(FIN);
572                 my $continue = 0;
573                 foreach my $k (keys(%keys)) {
574                         foreach my $line (@lines) {
575                                 $line =~ s/\r//;
576                                 next if (!$continue && $line !~ /^$k\s/);
577                                 if ($continue) {
578                                         $line =~ s/^\s+//;
579                                 } else {
580                                         $line =~ s/^$k\s+//;
581                                 }
582
583                                 $values{$l}{$f}{$c}{$k} = ""
584                                         if (!defined $values{$l}{$f}{$c}{$k});
585
586                                 $continue = ($line =~ /\/$/);
587                                 $line =~ s/\/$// if ($continue);
588
589                                 while ($line =~ /_/) {
590                                         $line =~
591                                             s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
592                                 }
593                                 die "_ in data - $line" if ($line =~ /_/);
594                                 $values{$l}{$f}{$c}{$k} .= $line;
595
596                                 last if (!$continue);
597                         }
598                 }
599         }
600         }
601         }
602 }
603
604 sub decodecldr {
605         my $e = shift;
606         my $s = shift;
607
608         my $v = undef;
609
610         if ($e eq "UTF-8") {
611                 #
612                 # Conversion to UTF-8 can be done from the Unicode name to
613                 # the UTF-8 character code.
614                 #
615                 $v = $utf8map{$s};
616                 die "Cannot convert $s in $e (charmap)" if (!defined $v);
617         } else {
618                 #
619                 # Conversion to these encodings can be done from the Unicode
620                 # name to Unicode code to the encodings code.
621                 #
622                 my $ucc = undef;
623                 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
624                 $ucc = $ucd{name2code}{$utf8aliases{$s}}
625                         if (!defined $ucc
626                          && $utf8aliases{$s}
627                          && defined $ucd{name2code}{$utf8aliases{$s}});
628
629                 if (!defined $ucc) {
630                         if (defined $translations{$e}{$s}{hex}) {
631                                 $v = $translations{$e}{$s}{hex};
632                                 $ucc = 0;
633                         } elsif (defined $translations{$e}{$s}{ucc}) {
634                                 $ucc = $translations{$e}{$s}{ucc};
635                         }
636                 }
637
638                 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
639                 $v = $convertors{$e}{$ucc} if (!defined $v);
640
641                 $v = $translations{$e}{$s}{hex}
642                         if (!defined $v && defined $translations{$e}{$s}{hex});
643
644                 if (!defined $v && defined $translations{$e}{$s}{unicode}) {
645                         my $ucn = $translations{$e}{$s}{unicode};
646                         $ucc = $ucd{name2code}{$ucn}
647                                 if (defined $ucd{name2code}{$ucn});
648                         $ucc = $ucd{name2code}{$utf8aliases{$ucn}}
649                                 if (!defined $ucc
650                                  && defined $ucd{name2code}{$utf8aliases{$ucn}});
651                         $v = $convertors{$e}{$ucc};
652                 }
653
654                 die "Cannot convert $s in $e (charmap)" if (!defined $v);
655         }
656
657         return pack("C", hex($v)) if (length($v) == 2);
658         return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
659                 if (length($v) == 4);
660         return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
661             hex(substr($v, 4, 2))) if (length($v) == 6);
662         print STDERR "Cannot convert $e $s\n";
663         return "length = " . length($v);
664
665 }
666
667 sub translate {
668         my $enc = shift;
669         my $v = shift;
670
671         return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
672         return undef;
673 }
674
675 sub print_fields {
676         foreach my $l (sort keys(%languages)) {
677         foreach my $f (sort keys(%{$languages{$l}})) {
678         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
679                 next if (defined $languages{$l}{$f}{definitions}
680                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
681                 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
682                         if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
683                                 print "Skipping ${l}_" .
684                                     ($f eq "x" ? "" : "${f}_") .
685                                     "${c} - not read\n";
686                                 next;
687                         }
688                         my $file = $l;
689                         $file .= "_" . $f if ($f ne "x");
690                         $file .= "_" . $c;
691                         print "Writing to $file in $enc\n";
692
693                         if ($enc ne $DEFENCODING &&
694                             !defined $convertors{$enc}) {
695                                 print "Failed! Cannot convert to $enc.\n";
696                                 next;
697                         };
698
699                         open(FOUT, ">$TYPE.draft/$file.$enc.new");
700                         my $okay = 1;
701                         my $output = "";
702                         print FOUT <<EOF;
703 # Warning: Do not edit. This file is automatically generated from the
704 # tools in /usr/src/tools/tools/locale. The data is obtained from the
705 # CLDR project, obtained from http://cldr.unicode.org/
706 # -----------------------------------------------------------------------------
707 EOF
708                         foreach my $k (keys(%keys)) {
709                                 my $g = $keys{$k};
710
711                                 die("Unknown $k in \%DESC")
712                                         if (!defined $DESC{$k});
713
714                                 $output .= "#\n# $DESC{$k}\n";
715
716                                 # Replace one row with another
717                                 if ($g =~ /^>/) {
718                                         $k = substr($g, 1);
719                                         $g = $keys{$k};
720                                 }
721
722                                 # Callback function
723                                 if ($g =~ /^\</) {
724                                         $callback{data}{c} = $c;
725                                         $callback{data}{k} = $k;
726                                         $callback{data}{f} = $f;
727                                         $callback{data}{l} = $l;
728                                         $callback{data}{e} = $enc;
729                                         my @a = split(/\</, substr($g, 1));
730                                         my $rv =
731                                             &{$callback{$a[0]}}($values{$l}{$f}{$c}{$a[1]});
732                                         $values{$l}{$f}{$c}{$k} = $rv;
733                                         $g = $a[2];
734                                         $callback{data} = ();
735                                 }
736
737                                 my $v = $values{$l}{$f}{$c}{$k};
738                                 $v = "undef" if (!defined $v);
739
740                                 if ($g eq "i") {
741                                         $output .= "$v\n";
742                                         next;
743                                 }
744                                 if ($g eq "ai") {
745                                         $output .= "$v\n";
746                                         next;
747                                 }
748                                 if ($g eq "s") {
749                                         $v =~ s/^"//;
750                                         $v =~ s/"$//;
751                                         my $cm = "";
752                                         while ($v =~ /^(.*?)<(.*?)>(.*)/) {
753                                                 my $p1 = $1;
754                                                 $cm = $2;
755                                                 my $p3 = $3;
756
757                                                 my $rv = decodecldr($enc, $cm);
758 #                                               $rv = translate($enc, $cm)
759 #                                                       if (!defined $rv);
760                                                 if (!defined $rv) {
761                                                         print STDERR 
762 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
763                                                         $okay = 0;
764                                                         next;
765                                                 }
766
767                                                 $v = $p1 . $rv . $p3;
768                                         }
769                                         $output .= "$v\n";
770                                         next;
771                                 }
772                                 if ($g eq "as") {
773                                         foreach my $v (split(/;/, $v)) {
774                                                 $v =~ s/^"//;
775                                                 $v =~ s/"$//;
776                                                 my $cm = "";
777                                                 while ($v =~ /^(.*?)<(.*?)>(.*)/) {
778                                                         my $p1 = $1;
779                                                         $cm = $2;
780                                                         my $p3 = $3;
781
782                                                         my $rv =
783                                                             decodecldr($enc,
784                                                                 $cm);
785 #                                                       $rv = translate($enc,
786 #                                                           $cm)
787 #                                                           if (!defined $rv);
788                                                         if (!defined $rv) {
789                                                                 print STDERR 
790 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
791                                                                 $okay = 0;
792                                                                 next;
793                                                         }
794
795                                                         $v = $1 . $rv . $3;
796                                                 }
797                                                 $output .= "$v\n";
798                                         }
799                                         next;
800                                 }
801
802                                 die("$k is '$g'");
803
804                         }
805
806                         $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
807                         $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
808                         print FOUT "$output# EOF\n";
809                         close(FOUT);
810
811                         if ($okay) {
812                                 rename("$TYPE.draft/$file.$enc.new",
813                                     "$TYPE.draft/$file.$enc.src");
814                         } else {
815                                 rename("$TYPE.draft/$file.$enc.new",
816                                     "$TYPE.draft/$file.$enc.failed");
817                         }
818                 }
819         }
820         }
821         }
822 }
823
824 sub make_makefile {
825         print "Creating Makefile for $TYPE\n";
826         my $SRCOUT;
827         my $SRCOUT2;
828         my $SRCOUT3 = "";
829         my $SRCOUT4 = "";
830         my $MAPLOC;
831         if ($TYPE eq "colldef") {
832                 $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U " .
833                         "-i \${.IMPSRC} \\\n" .
834                         "\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} " .
835                         "\${.OBJDIR}/\${.IMPSRC:T:R}";
836                 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
837                                 "locale/etc/final-maps\n";
838                 $SRCOUT2 = "LC_COLLATE";
839                 $SRCOUT3 = "" .
840                         ".for f t in \${LOCALES_MAPPED}\n" .
841                         "FILES+=\t\$t.LC_COLLATE\n" .
842                         "FILESDIR_\$t.LC_COLLATE=\t\${LOCALEDIR}/\$t\n" .
843                         "\$t.LC_COLLATE: \${.CURDIR}/\$f.src\n" .
844                         "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U " .
845                         "-i \${.ALLSRC} \\\n" .
846                         "\t\t-f \${MAPLOC}/map.\${.TARGET:T:R:E:C/@.*//} \\\n" .
847                         "\t\t\${.OBJDIR}/\${.TARGET:T:R}\n" .
848                         ".endfor\n\n";
849                 $SRCOUT4 = "## LOCALES_MAPPED\n";
850         }
851         elsif ($TYPE eq "ctypedef") {
852                 $SRCOUT = "localedef \${LOCALEDEF_ENDIAN} -D -U -c " .
853                         "-w \${MAPLOC}/widths.txt \\\n" .
854                         "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:E} " .
855                         "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
856                         " || true";
857                 $SRCOUT2 = "LC_CTYPE";
858                 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
859                                 "locale/etc/final-maps\n";
860                 $SRCOUT3 = "## SYMPAIRS\n\n" .
861                         ".for s t in \${SYMPAIRS}\n" .
862                         "\${t:S/src\$/LC_CTYPE/}: " .
863                         "\$s\n" .
864                         "\tlocaledef \${LOCALEDEF_ENDIAN} -D -U -c " .
865                         "-w \${MAPLOC}/widths.txt \\\n" .
866                         "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " .
867                         "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " .
868                         " || true\n" .
869                         ".endfor\n\n";
870         }
871         else {
872                 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
873                 $SRCOUT2 = "out";
874                 $MAPLOC = "";
875         }
876         open(FOUT, ">$TYPE.draft/Makefile");
877         print FOUT <<EOF;
878 # \$FreeBSD\$
879 # Warning: Do not edit. This file is automatically generated from the
880 # tools in /usr/src/tools/tools/locale.
881
882 LOCALEDIR=      \${SHAREDIR}/locale
883 FILESNAME=      $FILESNAMES{$TYPE}
884 .SUFFIXES:      .src .${SRCOUT2}
885 ${MAPLOC}
886 EOF
887
888         if ($TYPE eq "colldef" || $TYPE eq "ctypedef") {
889                 print FOUT <<EOF;
890 .include <bsd.endian.mk>
891
892 EOF
893         }
894
895         print FOUT <<EOF;
896 .src.${SRCOUT2}:
897         $SRCOUT
898
899 ## PLACEHOLDER
900
901 ${SRCOUT4}
902
903 EOF
904
905         foreach my $hash (keys(%hashtable)) {
906                 # For colldef, weight LOCALES to UTF-8
907                 #     Sort as upper-case and reverse to achieve it
908                 #     Make en_US, ru_RU, and ca_AD preferred
909                 my @files;
910                 if ($TYPE eq "colldef") {
911                         @files = sort {
912                                 if ($a eq 'en_x_US.UTF-8' ||
913                                     $a eq 'ru_x_RU.UTF-8' ||
914                                     $a eq 'ca_x_AD.UTF-8') { return -1; }
915                                 elsif ($b eq 'en_x_US.UTF-8' ||
916                                        $b eq 'ru_x_RU.UTF-8' ||
917                                        $b eq 'ca_x_AD.UTF-8') { return 1; }
918                                 else { return uc($b) cmp uc($a); }
919                                 } keys(%{$hashtable{$hash}});
920                 } elsif ($TYPE eq "ctypedef") {
921                         @files = sort {
922                                 if ($a eq 'C_x_x.UTF-8') { return -1; }
923                                 elsif ($b eq 'C_x_x.UTF-8') { return 1; }
924                                 if ($a =~ /^en_x_US/) { return -1; }
925                                 elsif ($b =~ /^en_x_US/) { return 1; }
926
927                                 if ($a =~ /^en_x_GB.ISO8859-15/ ||
928                                     $a =~ /^ru_x_RU/) { return -1; }
929                                 elsif ($b =~ /^en_x_GB.ISO8859-15/ ||
930                                        $b =~ /ru_x_RU/) { return 1; }
931                                 else { return uc($b) cmp uc($a); }
932
933                                 } keys(%{$hashtable{$hash}});
934                 } else {
935                         @files = sort {
936                                 if ($a =~ /_Comm_/ ||
937                                     $b eq 'en_x_US.UTF-8') { return 1; }
938                                 elsif ($b =~ /_Comm_/ ||
939                                        $a eq 'en_x_US.UTF-8') { return -1; }
940                                 else { return uc($b) cmp uc($a); }
941                                 } keys(%{$hashtable{$hash}});
942                 }
943                 if ($#files > 0) {
944                         my $link = shift(@files);
945                         $link =~ s/_x_x//;      # special case for C
946                         $link =~ s/_x_/_/;      # strip family if none there
947                         foreach my $file (@files) {
948                                 my @a = split(/_/, $file);
949                                 my @b = split(/\./, $a[-1]);
950                                 $file =~ s/_x_/_/;
951                                 print FOUT "SAME+=\t\t$link $file\n";
952                                 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
953                         }
954                 }
955         }
956
957         foreach my $l (sort keys(%languages)) {
958         foreach my $f (sort keys(%{$languages{$l}})) {
959         foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
960                 next if (defined $languages{$l}{$f}{definitions}
961                     && $languages{$l}{$f}{definitions} !~ /$TYPE/);
962                 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
963                  && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
964                         print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
965                             "${c} - not read\n";
966                         next;
967                 }
968                 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
969                         my $file = $l;
970                         $file .= "_" . $f if ($f ne "x");
971                         $file .= "_" . $c if ($c ne "x");
972                         next if (!defined $languages{$l}{$f}{data}{$c}{$e});
973                         print FOUT "LOCALES+=\t$file.$e\n";
974                 }
975
976                 if (defined $languages{$l}{$f}{nc_link}) {
977                         foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
978                                 my $file = $l . "_";
979                                 $file .= $f . "_" if ($f ne "x");
980                                 $file .= $c;
981                                 print FOUT "SAME+=\t\t$file.$e $languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
982                         }
983                 }
984
985                 if (defined $languages{$l}{$f}{e_link}) {
986                         foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
987                                 my @a = split(/:/, $el);
988                                 my $file = $l . "_";
989                                 $file .= $f . "_" if ($f ne "x");
990                                 $file .= $c;
991                                 print FOUT "SAME+=\t\t$file.$a[0] $file.$a[1]\t# legacy (same charset)\n";
992                         }
993                 }
994
995         }
996         }
997         }
998
999         print FOUT <<EOF;
1000
1001 FILES=          \${LOCALES:S/\$/.${SRCOUT2}/}
1002 CLEANFILES=     \${FILES}
1003
1004 .for f t in \${SAME}
1005 SYMLINKS+=      ../\$f/\${FILESNAME} \\
1006     \${LOCALEDIR}/\$t/\${FILESNAME}
1007 .endfor
1008
1009 .for f in \${LOCALES}
1010 FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
1011 .endfor
1012
1013 ${SRCOUT3}.include <bsd.prog.mk>
1014 EOF
1015
1016         close(FOUT);
1017 }