1 #!/usr/local/bin/perl -wC
9 use Digest::SHA qw(sha1_hex);
10 require "charmaps.pm";
14 print "Usage: $0 --cldr=<cldrdir> --unidata=<unidatadir> --etc=<etcdir> --type=<type> [--lc=<la_CC>]\n";
18 my $DEFENCODING = "UTF-8";
22 my $UNIDATADIR = undef;
27 my $result = GetOptions (
28 "cldr=s" => \$CLDRDIR,
29 "unidata=s" => \$UNIDATADIR,
41 my %translations = ();
43 my %alternativemonths = ();
48 get_unidata($UNIDATADIR);
49 get_utf8map("$CLDRDIR/posix/$DEFENCODING.cm");
50 get_encodings("$ETCDIR/charmaps");
53 tie(%keys, "Tie::IxHash");
54 tie(%hashtable, "Tie::IxHash");
57 "monetdef" => "LC_MONETARY",
58 "timedef" => "LC_TIME",
59 "msgdef" => "LC_MESSAGES",
60 "numericdef" => "LC_NUMERIC",
61 "colldef" => "LC_COLLATE",
62 "ctypedef" => "LC_CTYPE"
66 mdorder => \&callback_mdorder,
67 altmon => \&callback_altmon,
68 cformat => \&callback_cformat,
75 "decimal_point" => "decimal_point",
76 "thousands_sep" => "thousands_sep",
77 "grouping" => "grouping",
80 "int_curr_symbol" => "int_curr_symbol (last character always " .
82 "currency_symbol" => "currency_symbol",
83 "mon_decimal_point" => "mon_decimal_point",
84 "mon_thousands_sep" => "mon_thousands_sep",
85 "mon_grouping" => "mon_grouping",
86 "positive_sign" => "positive_sign",
87 "negative_sign" => "negative_sign",
88 "int_frac_digits" => "int_frac_digits",
89 "frac_digits" => "frac_digits",
90 "p_cs_precedes" => "p_cs_precedes",
91 "p_sep_by_space" => "p_sep_by_space",
92 "n_cs_precedes" => "n_cs_precedes",
93 "n_sep_by_space" => "n_sep_by_space",
94 "p_sign_posn" => "p_sign_posn",
95 "n_sign_posn" => "n_sign_posn",
98 "yesexpr" => "yesexpr",
100 "yesstr" => "yesstr",
104 "abmon" => "Short month names",
105 "mon" => "Long month names (as in a date)",
106 "abday" => "Short weekday names",
107 "day" => "Long weekday names",
112 "d_t_fmt" => "date_fmt",
113 "altmon" => "Long month names (without case ending)",
114 "md_order" => "md_order",
115 "t_fmt_ampm" => "ampm_fmt",
118 if ($TYPE eq "colldef") {
119 transform_collation();
123 if ($TYPE eq "ctypedef") {
128 if ($TYPE eq "numericdef") {
130 "decimal_point" => "s",
131 "thousands_sep" => "s",
139 if ($TYPE eq "monetdef") {
141 "int_curr_symbol" => "s",
142 "currency_symbol" => "s",
143 "mon_decimal_point" => "s",
144 "mon_thousands_sep" => "s",
145 "mon_grouping" => "ai",
146 "positive_sign" => "s",
147 "negative_sign" => "s",
148 "int_frac_digits" => "i",
149 "frac_digits" => "i",
150 "p_cs_precedes" => "i",
151 "p_sep_by_space" => "i",
152 "n_cs_precedes" => "i",
153 "n_sep_by_space" => "i",
154 "p_sign_posn" => "i",
162 if ($TYPE eq "msgdef") {
174 if ($TYPE eq "timedef") {
182 "c_fmt" => "<cformat<d_t_fmt<s",
186 "altmon" => "<altmon<mon<as",
187 "md_order" => "<mdorder<d_fmt<s",
195 sub callback_cformat {
202 sub callback_mdorder {
204 return undef if (!defined $s);
209 sub callback_altmon {
210 # if the language/country is known in %alternative months then
211 # return that, otherwise repeat mon
214 if (defined $alternativemonths{$callback{data}{l}}{$callback{data}{c}}) {
215 my @altnames = split(";",$alternativemonths{$callback{data}{l}}{$callback{data}{c}});
223 return join(";",@cleaned);
229 ############################
232 my $directory = shift;
234 open(FIN, "$directory/UnicodeData.txt")
235 or die("Cannot open $directory/UnicodeData.txt");;
240 foreach my $l (@lines) {
241 my @a = split(/;/, $l);
243 $ucd{code2name}{"$a[0]"} = $a[1]; # Unicode name
244 $ucd{name2code}{"$a[1]"} = $a[0]; # Unicode code
259 foreach my $l (@lines) {
261 next if ($l =~ /^\#/);
264 if ($l eq "CHARMAP") {
269 next if (!$incharmap);
270 last if ($l eq "END CHARMAP");
272 $l =~ /^<([^\s]+)>\s+(.*)/;
275 $k =~ s/_/ /g; # unicode char string
276 $v =~ s/\\x//g; # UTF-8 char code
279 $utf8aliases{$k} = $prev_k if ($prev_v eq $v);
288 foreach my $e (sort(keys(%encodings))) {
289 if (!open(FIN, "$dir/$e.TXT")) {
290 print "Cannot open charmap for $e\n";
298 foreach my $l (@lines) {
300 next if ($l =~ /^\#/);
303 my @a = split(" ", $l);
305 $a[0] =~ s/^0[xX]//; # local char code
306 $a[1] =~ s/^0[xX]//; # unicode char code
307 $convertors{$e}{uc($a[1])} = uc($a[0]);
313 my %data = get_xmldata($ETCDIR);
314 %languages = %{$data{L}};
315 %translations = %{$data{T}};
316 %alternativemonths = %{$data{AM}};
317 %encodings = %{$data{E}};
319 return if (!defined $doonly);
321 my @a = split(/_/, $doonly);
332 print Dumper(@filter);
336 sub transform_ctypes {
337 foreach my $l (sort keys(%languages)) {
338 foreach my $f (sort keys(%{$languages{$l}})) {
339 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
340 next if ($#filter == 2 && ($filter[0] ne $l
341 || $filter[1] ne $f || $filter[2] ne $c));
342 next if (defined $languages{$l}{$f}{definitions}
343 && $languages{$l}{$f}{definitions} !~ /$TYPE/);
344 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
347 $file .= $f . "_" if ($f ne "x");
351 my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src";
352 if (! -f $filename) {
353 print STDERR "Cannot open $filename\n";
356 open(FIN, "$filename");
357 print "Reading from $filename for ${l}_${f}_${c}\n";
358 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
366 $shex = sha1_hex(join("\n", @lines));
367 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
368 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
369 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
372 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
373 next if ($enc eq $DEFENCODING);
374 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
375 if (! -f $filename) {
376 print STDERR "Cannot open $filename\n";
380 open(FIN, "$filename");
382 if ((/^comment_char\s/) || (/^escape_char\s/)){
385 if (/^LC_CTYPE/../^END LC_CTYPE/) {
390 $uhex = sha1_hex(join("\n", @lines) . $enc);
391 $languages{$l}{$f}{data}{$c}{$enc} = $uhex;
392 $hashtable{$uhex}{"${l}_${f}_${c}.$enc"} = 1;
393 open(FOUT, ">$TYPE.draft/$actfile.$enc.src");
395 # Warning: Do not edit. This file is automatically extracted from the
396 # tools in /usr/src/tools/tools/locale. The data is obtained from the
397 # CLDR project, obtained from http://cldr.unicode.org/
398 # -----------------------------------------------------------------------------
409 sub transform_collation {
410 foreach my $l (sort keys(%languages)) {
411 foreach my $f (sort keys(%{$languages{$l}})) {
412 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
413 next if ($#filter == 2 && ($filter[0] ne $l
414 || $filter[1] ne $f || $filter[2] ne $c));
415 next if (defined $languages{$l}{$f}{definitions}
416 && $languages{$l}{$f}{definitions} !~ /$TYPE/);
417 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
420 $file .= $f . "_" if ($f ne "x");
424 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
425 $filename = "$ETCDIR/$file.$DEFENCODING.src"
428 && defined $languages{$l}{$f}{fallback}) {
429 $file = $languages{$l}{$f}{fallback};
430 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
432 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
434 if (! -f $filename) {
436 "Cannot open $file.$DEFENCODING.src or fallback\n";
439 open(FIN, "$filename");
440 print "Reading from $filename for ${l}_${f}_${c}\n";
441 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
445 if ((/^comment_char\s/) || (/^escape_char\s/)){
448 if (/^LC_COLLATE/../^END LC_COLLATE/) {
454 $shex = sha1_hex(join("\n", @lines));
455 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = $shex;
456 $hashtable{$shex}{"${l}_${f}_${c}.$DEFENCODING"} = 1;
457 open(FOUT, ">$TYPE.draft/$actfile.$DEFENCODING.src");
459 # Warning: Do not edit. This file is automatically extracted from the
460 # tools in /usr/src/tools/tools/locale. The data is obtained from the
461 # CLDR project, obtained from http://cldr.unicode.org/
462 # -----------------------------------------------------------------------------
467 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
468 next if ($enc eq $DEFENCODING);
469 copy ("$TYPE.draft/$actfile.$DEFENCODING.src",
470 "$TYPE.draft/$actfile.$enc.src");
471 $languages{$l}{$f}{data}{$c}{$enc} = $shex;
472 $hashtable{$shex}{"${l}_${f}_${c}.$enc"} = 1;
480 foreach my $l (sort keys(%languages)) {
481 foreach my $f (sort keys(%{$languages{$l}})) {
482 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
483 next if ($#filter == 2 && ($filter[0] ne $l
484 || $filter[1] ne $f || $filter[2] ne $c));
485 next if (defined $languages{$l}{$f}{definitions}
486 && $languages{$l}{$f}{definitions} !~ /$TYPE/);
488 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread
491 $file .= $f . "_" if ($f ne "x");
494 my $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
495 $filename = "$ETCDIR/$file.$DEFENCODING.src"
498 && defined $languages{$l}{$f}{fallback}) {
499 $file = $languages{$l}{$f}{fallback};
500 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src";
502 $filename = "$CLDRDIR/posix/$file.$DEFENCODING.src"
504 if (! -f $filename) {
506 "Cannot open $file.$DEFENCODING.src or fallback\n";
509 open(FIN, "$filename");
510 print "Reading from $filename for ${l}_${f}_${c}\n";
511 $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 1; # read
516 foreach my $k (keys(%keys)) {
517 foreach my $line (@lines) {
519 next if (!$continue && $line !~ /^$k\s/);
526 $values{$l}{$c}{$k} = ""
527 if (!defined $values{$l}{$c}{$k});
529 $continue = ($line =~ /\/$/);
530 $line =~ s/\/$// if ($continue);
532 while ($line =~ /_/) {
534 s/\<([^>_]+)_([^>]+)\>/<$1 $2>/;
536 die "_ in data - $line" if ($line =~ /_/);
537 $values{$l}{$c}{$k} .= $line;
539 last if (!$continue);
555 # Conversion to UTF-8 can be done from the Unicode name to
556 # the UTF-8 character code.
559 die "Cannot convert $s in $e (charmap)" if (!defined $v);
562 # Conversion to these encodings can be done from the Unicode
563 # name to Unicode code to the encodings code.
566 $ucc = $ucd{name2code}{$s} if (defined $ucd{name2code}{$s});
567 $ucc = $ucd{name2code}{$utf8aliases{$s}}
570 && defined $ucd{name2code}{$utf8aliases{$s}});
573 if (defined $translations{$e}{$s}{hex}) {
574 $v = $translations{$e}{$s}{hex};
576 } elsif (defined $translations{$e}{$s}{ucc}) {
577 $ucc = $translations{$e}{$s}{ucc};
581 die "Cannot convert $s in $e (ucd string)" if (!defined $ucc);
582 $v = $convertors{$e}{$ucc} if (!defined $v);
584 $v = $translations{$e}{$s}{hex}
585 if (!defined $v && defined $translations{$e}{$s}{hex});
587 if (!defined $v && defined $translations{$e}{$s}{unicode}) {
588 my $ucn = $translations{$e}{$s}{unicode};
589 $ucc = $ucd{name2code}{$ucn}
590 if (defined $ucd{name2code}{$ucn});
591 $ucc = $ucd{name2code}{$utf8aliases{$ucn}}
593 && defined $ucd{name2code}{$utf8aliases{$ucn}});
594 $v = $convertors{$e}{$ucc};
597 die "Cannot convert $s in $e (charmap)" if (!defined $v);
600 return pack("C", hex($v)) if (length($v) == 2);
601 return pack("CC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)))
602 if (length($v) == 4);
603 return pack("CCC", hex(substr($v, 0, 2)), hex(substr($v, 2, 2)),
604 hex(substr($v, 4, 2))) if (length($v) == 6);
605 print STDERR "Cannot convert $e $s\n";
606 return "length = " . length($v);
614 return $translations{$enc}{$v} if (defined $translations{$enc}{$v});
619 foreach my $l (sort keys(%languages)) {
620 foreach my $f (sort keys(%{$languages{$l}})) {
621 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
622 next if ($#filter == 2 && ($filter[0] ne $l
623 || $filter[1] ne $f || $filter[2] ne $c));
624 next if (defined $languages{$l}{$f}{definitions}
625 && $languages{$l}{$f}{definitions} !~ /$TYPE/);
626 foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
627 if ($languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
628 print "Skipping ${l}_" .
629 ($f eq "x" ? "" : "${f}_") .
634 $file .= "_" . $f if ($f ne "x");
636 print "Writing to $file in $enc\n";
638 if ($enc ne $DEFENCODING &&
639 !defined $convertors{$enc}) {
640 print "Failed! Cannot convert to $enc.\n";
644 open(FOUT, ">$TYPE.draft/$file.$enc.new");
648 # Warning: Do not edit. This file is automatically generated from the
649 # tools in /usr/src/tools/tools/locale. The data is obtained from the
650 # CLDR project, obtained from http://cldr.unicode.org/
651 # -----------------------------------------------------------------------------
653 foreach my $k (keys(%keys)) {
656 die("Unknown $k in \%DESC")
657 if (!defined $DESC{$k});
659 $output .= "#\n# $DESC{$k}\n";
661 # Replace one row with another
669 $callback{data}{c} = $c;
670 $callback{data}{k} = $k;
671 $callback{data}{l} = $l;
672 $callback{data}{e} = $enc;
673 my @a = split(/\</, substr($f, 1));
675 &{$callback{$a[0]}}($values{$l}{$c}{$a[1]});
676 $values{$l}{$c}{$k} = $rv;
678 $callback{data} = ();
681 my $v = $values{$l}{$c}{$k};
682 $v = "undef" if (!defined $v);
696 while ($v =~ /^(.*?)<(.*?)>(.*)/) {
701 my $rv = decodecldr($enc, $cm);
702 # $rv = translate($enc, $cm)
706 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
711 $v = $p1 . $rv . $p3;
717 foreach my $v (split(/;/, $v)) {
721 while ($v =~ /^(.*?)<(.*?)>(.*)/) {
729 # $rv = translate($enc,
734 "Could not convert $k ($cm) from $DEFENCODING to $enc\n";
750 $languages{$l}{$f}{data}{$c}{$enc} = sha1_hex($output);
751 $hashtable{sha1_hex($output)}{"${l}_${f}_${c}.$enc"} = 1;
752 print FOUT "$output# EOF\n";
756 rename("$TYPE.draft/$file.$enc.new",
757 "$TYPE.draft/$file.$enc.src");
759 rename("$TYPE.draft/$file.$enc.new",
760 "$TYPE.draft/$file.$enc.failed");
769 return if ($#filter > -1);
770 print "Creating Makefile for $TYPE\n";
775 if ($TYPE eq "colldef") {
776 $SRCOUT = "localedef -D -U -i \${.IMPSRC} \\\n" .
777 "\t-f \${MAPLOC}/map.UTF-8 " .
778 "\${.OBJDIR}/\${.IMPSRC:T:R}";
779 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
780 "locale/etc/final-maps\n";
781 $SRCOUT2 = "LC_COLLATE";
783 elsif ($TYPE eq "ctypedef") {
784 $SRCOUT = "localedef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
785 "\t-f \${MAPLOC}/map.\${.IMPSRC:T:R:C/^.*\\.//} " .
786 "\\\n\t-i \${.IMPSRC} \${.OBJDIR}/\${.IMPSRC:T:R} " .
788 $SRCOUT2 = "LC_CTYPE";
789 $MAPLOC = "MAPLOC=\t\t\${.CURDIR}/../../tools/tools/" .
790 "locale/etc/final-maps\n";
791 $SRCOUT3 = "## SYMPAIRS\n\n" .
792 ".for PAIR in \${SYMPAIRS}\n" .
793 "\${PAIR:C/^.*://:S/src\$/LC_CTYPE/}: " .
794 "\${PAIR:C/:.*//}\n" .
795 "\tlocaledef -D -U -c -w \${MAPLOC}/widths.txt \\\n" .
796 "\t-f \${MAPLOC}/map.\${.TARGET:T:R:C/^.*\\.//} " .
797 "\\\n\t-i \${.ALLSRC} \${.OBJDIR}/\${.TARGET:T:R} " .
802 $SRCOUT = "grep -v -E '^(\#\$\$|\#[ ])' < \${.IMPSRC} > \${.TARGET}";
806 open(FOUT, ">$TYPE.draft/Makefile");
808 # Warning: Do not edit. This file is automatically generated from the
809 # tools in /usr/src/tools/tools/locale.
811 LOCALEDIR= \${SHAREDIR}/locale
812 FILESNAME= $FILESNAMES{$TYPE}
813 .SUFFIXES: .src .${SRCOUT2}
822 foreach my $hash (keys(%hashtable)) {
823 # For colldef, weight LOCALES to UTF-8
824 # Sort as upper-case and reverse to achieve it
825 # Make en_US, ru_RU, and ca_AD preferred
827 if ($TYPE eq "colldef") {
829 if ($a eq 'en_x_US.UTF-8' ||
830 $a eq 'ru_x_RU.UTF-8' ||
831 $a eq 'ca_x_AD.UTF-8') { return -1; }
832 elsif ($b eq 'en_x_US.UTF-8' ||
833 $b eq 'ru_x_RU.UTF-8' ||
834 $b eq 'ca_x_AD.UTF-8') { return 1; }
835 else { return uc($b) cmp uc($a); }
836 } keys(%{$hashtable{$hash}});
837 } elsif ($TYPE eq "ctypedef") {
839 if ($a eq 'en_x_US.UTF-8') { return -1; }
840 elsif ($b eq 'en_x_US.UTF-8') { return 1; }
841 if ($a =~ /^en_x_US/) { return -1; }
842 elsif ($b =~ /^en_x_US/) { return 1; }
844 if ($a =~ /^en_x_GB.ISO8859-15/ ||
845 $a =~ /^ru_x_RU/) { return -1; }
846 elsif ($b =~ /^en_x_GB.ISO8859-15/ ||
847 $b =~ /ru_x_RU/) { return 1; }
848 else { return uc($b) cmp uc($a); }
850 } keys(%{$hashtable{$hash}});
853 if ($a =~ /_Comm_/ ||
854 $b eq 'en_x_US.UTF-8') { return 1; }
855 elsif ($b =~ /_Comm_/ ||
856 $a eq 'en_x_US.UTF-8') { return -1; }
857 else { return uc($b) cmp uc($a); }
858 } keys(%{$hashtable{$hash}});
861 my $link = shift(@files);
862 $link =~ s/_x_/_/; # strip family if none there
863 foreach my $file (@files) {
864 my @a = split(/_/, $file);
865 my @b = split(/\./, $a[-1]);
867 print FOUT "SAME+=\t\t$link:$file\n";
868 undef($languages{$a[0]}{$a[1]}{data}{$b[0]}{$b[1]});
873 foreach my $l (sort keys(%languages)) {
874 foreach my $f (sort keys(%{$languages{$l}})) {
875 foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) {
876 next if ($#filter == 2 && ($filter[0] ne $l
877 || $filter[1] ne $f || $filter[2] ne $c));
878 next if (defined $languages{$l}{$f}{definitions}
879 && $languages{$l}{$f}{definitions} !~ /$TYPE/);
880 if (defined $languages{$l}{$f}{data}{$c}{$DEFENCODING}
881 && $languages{$l}{$f}{data}{$c}{$DEFENCODING} eq "0") {
882 print "Skipping ${l}_" . ($f eq "x" ? "" : "${f}_") .
886 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
888 $file .= $f . "_" if ($f ne "x");
890 next if (!defined $languages{$l}{$f}{data}{$c}{$e});
891 print FOUT "LOCALES+=\t$file.$e\n";
894 if (defined $languages{$l}{$f}{nc_link}) {
895 foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) {
897 $file .= $f . "_" if ($f ne "x");
899 print FOUT "SAME+=\t\t$file.$e:$languages{$l}{$f}{nc_link}.$e\t# legacy (lang/country change)\n";
903 if (defined $languages{$l}{$f}{e_link}) {
904 foreach my $el (split(" ", $languages{$l}{$f}{e_link})) {
905 my @a = split(/:/, $el);
907 $file .= $f . "_" if ($f ne "x");
909 print FOUT "SAME+=\t\t$file.$a[0]:$file.$a[1]\t# legacy (same charset)\n";
919 FILES= \${LOCALES:S/\$/.${SRCOUT2}/}
920 CLEANFILES= \${FILES}
923 SYMLINKS+= ../\${f:C/:.*\$//}/\${FILESNAME} \${LOCALEDIR}/\${f:C/^.*://}
926 .for f in \${LOCALES}
927 FILESDIR_\${f}.${SRCOUT2}= \${LOCALEDIR}/\${f}
930 ${SRCOUT3}.include <bsd.prog.mk>