From a52c0ea420dade81a9426c51a14942d78c02c968 Mon Sep 17 00:00:00 2001 From: Yuri Pankov Date: Thu, 6 Dec 2018 10:41:22 +0000 Subject: [PATCH] MFC r340144: Add hybrid C.UTF-8 locale being identical to default C locale except that it uses the same ctype maps and functions as other UTF-8 locales. Reviewed by: bapt, cem, eadler Differential Revision: https://reviews.freebsd.org/D17833 --- lib/libc/locale/collate.c | 6 +- lib/libc/locale/ldpart.c | 3 +- .../ctypedef/{en_US.UTF-8.src => C.UTF-8.src} | 0 share/ctypedef/Makefile | 155 +++++++++--------- tools/tools/locale/Makefile | 4 +- tools/tools/locale/tools/cldr2def.pl | 23 +-- 6 files changed, 99 insertions(+), 92 deletions(-) rename share/ctypedef/{en_US.UTF-8.src => C.UTF-8.src} (100%) diff --git a/lib/libc/locale/collate.c b/lib/libc/locale/collate.c index 5017433a9d9..15ce3b2f52e 100644 --- a/lib/libc/locale/collate.c +++ b/lib/libc/locale/collate.c @@ -84,7 +84,8 @@ destruct_collate(void *t) void * __collate_load(const char *encoding, __unused locale_t unused) { - if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { + if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0 || + strncmp(encoding, "C.", 2) == 0) { return &__xlocale_C_collate; } struct xlocale_collate *table = calloc(sizeof(struct xlocale_collate), 1); @@ -122,7 +123,8 @@ __collate_load_tables_l(const char *encoding, struct xlocale_collate *table) table->__collate_load_error = 1; /* 'encoding' must be already checked. */ - if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { + if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0 || + strncmp(encoding, "C.", 2) == 0) { return (_LDP_CACHE); } diff --git a/lib/libc/locale/ldpart.c b/lib/libc/locale/ldpart.c index ed794337e11..889291cf316 100644 --- a/lib/libc/locale/ldpart.c +++ b/lib/libc/locale/ldpart.c @@ -63,7 +63,8 @@ __part_load_locale(const char *name, size_t namesize, bufsize; /* 'name' must be already checked. */ - if (strcmp(name, "C") == 0 || strcmp(name, "POSIX") == 0) { + if (strcmp(name, "C") == 0 || strcmp(name, "POSIX") == 0 || + strncmp(name, "C.", 2) == 0) { *using_locale = 0; return (_LDP_CACHE); } diff --git a/share/ctypedef/en_US.UTF-8.src b/share/ctypedef/C.UTF-8.src similarity index 100% rename from share/ctypedef/en_US.UTF-8.src rename to share/ctypedef/C.UTF-8.src diff --git a/share/ctypedef/Makefile b/share/ctypedef/Makefile index 279e7e4492c..ab002fc82c9 100644 --- a/share/ctypedef/Makefile +++ b/share/ctypedef/Makefile @@ -14,6 +14,7 @@ MAPLOC= ${.CURDIR}/../../tools/tools/locale/etc/final-maps -f ${MAPLOC}/map.${.IMPSRC:T:R:E} \ -i ${.IMPSRC} ${.OBJDIR}/${.IMPSRC:T:R} || true +LOCALES+= C.UTF-8 LOCALES+= be_BY.CP1131 LOCALES+= ca_IT.ISO8859-1 LOCALES+= ca_IT.ISO8859-15 @@ -21,7 +22,6 @@ LOCALES+= el_GR.ISO8859-7 LOCALES+= en_US.ISO8859-1 LOCALES+= en_US.ISO8859-15 LOCALES+= en_US.US-ASCII -LOCALES+= en_US.UTF-8 LOCALES+= hi_IN.ISCII-DEV LOCALES+= hy_AM.ARMSCII-8 LOCALES+= ja_JP.SJIS @@ -45,82 +45,83 @@ LOCALES+= zh_TW.Big5 -SAME+= en_US.UTF-8 ru_RU.UTF-8 -SAME+= en_US.UTF-8 zh_TW.UTF-8 -SAME+= en_US.UTF-8 zh_HK.UTF-8 -SAME+= en_US.UTF-8 zh_CN.UTF-8 -SAME+= en_US.UTF-8 uk_UA.UTF-8 -SAME+= en_US.UTF-8 tr_TR.UTF-8 -SAME+= en_US.UTF-8 sv_SE.UTF-8 -SAME+= en_US.UTF-8 sv_FI.UTF-8 -SAME+= en_US.UTF-8 sr_RS.UTF-8@latin -SAME+= en_US.UTF-8 sr_RS.UTF-8 -SAME+= en_US.UTF-8 sl_SI.UTF-8 -SAME+= en_US.UTF-8 sk_SK.UTF-8 -SAME+= en_US.UTF-8 se_NO.UTF-8 -SAME+= en_US.UTF-8 se_FI.UTF-8 -SAME+= en_US.UTF-8 ro_RO.UTF-8 -SAME+= en_US.UTF-8 pt_PT.UTF-8 -SAME+= en_US.UTF-8 pt_BR.UTF-8 -SAME+= en_US.UTF-8 pl_PL.UTF-8 -SAME+= en_US.UTF-8 nn_NO.UTF-8 -SAME+= en_US.UTF-8 nl_NL.UTF-8 -SAME+= en_US.UTF-8 nl_BE.UTF-8 -SAME+= en_US.UTF-8 nb_NO.UTF-8 -SAME+= en_US.UTF-8 mn_MN.UTF-8 -SAME+= en_US.UTF-8 lv_LV.UTF-8 -SAME+= en_US.UTF-8 lt_LT.UTF-8 -SAME+= en_US.UTF-8 ko_KR.UTF-8 -SAME+= en_US.UTF-8 kk_KZ.UTF-8 -SAME+= en_US.UTF-8 ja_JP.UTF-8 -SAME+= en_US.UTF-8 it_IT.UTF-8 -SAME+= en_US.UTF-8 it_CH.UTF-8 -SAME+= en_US.UTF-8 is_IS.UTF-8 -SAME+= en_US.UTF-8 hy_AM.UTF-8 -SAME+= en_US.UTF-8 hu_HU.UTF-8 -SAME+= en_US.UTF-8 hr_HR.UTF-8 -SAME+= en_US.UTF-8 hi_IN.UTF-8 -SAME+= en_US.UTF-8 he_IL.UTF-8 -SAME+= en_US.UTF-8 fr_FR.UTF-8 -SAME+= en_US.UTF-8 fr_CH.UTF-8 -SAME+= en_US.UTF-8 fr_CA.UTF-8 -SAME+= en_US.UTF-8 fr_BE.UTF-8 -SAME+= en_US.UTF-8 fi_FI.UTF-8 -SAME+= en_US.UTF-8 eu_ES.UTF-8 -SAME+= en_US.UTF-8 et_EE.UTF-8 -SAME+= en_US.UTF-8 es_MX.UTF-8 -SAME+= en_US.UTF-8 es_ES.UTF-8 -SAME+= en_US.UTF-8 es_CR.UTF-8 -SAME+= en_US.UTF-8 es_AR.UTF-8 -SAME+= en_US.UTF-8 en_ZA.UTF-8 -SAME+= en_US.UTF-8 en_SG.UTF-8 -SAME+= en_US.UTF-8 en_PH.UTF-8 -SAME+= en_US.UTF-8 en_NZ.UTF-8 -SAME+= en_US.UTF-8 en_IE.UTF-8 -SAME+= en_US.UTF-8 en_HK.UTF-8 -SAME+= en_US.UTF-8 en_GB.UTF-8 -SAME+= en_US.UTF-8 en_CA.UTF-8 -SAME+= en_US.UTF-8 en_AU.UTF-8 -SAME+= en_US.UTF-8 el_GR.UTF-8 -SAME+= en_US.UTF-8 de_DE.UTF-8 -SAME+= en_US.UTF-8 de_CH.UTF-8 -SAME+= en_US.UTF-8 de_AT.UTF-8 -SAME+= en_US.UTF-8 da_DK.UTF-8 -SAME+= en_US.UTF-8 cs_CZ.UTF-8 -SAME+= en_US.UTF-8 ca_IT.UTF-8 -SAME+= en_US.UTF-8 ca_FR.UTF-8 -SAME+= en_US.UTF-8 ca_ES.UTF-8 -SAME+= en_US.UTF-8 ca_AD.UTF-8 -SAME+= en_US.UTF-8 bg_BG.UTF-8 -SAME+= en_US.UTF-8 be_BY.UTF-8 -SAME+= en_US.UTF-8 ar_SA.UTF-8 -SAME+= en_US.UTF-8 ar_QA.UTF-8 -SAME+= en_US.UTF-8 ar_MA.UTF-8 -SAME+= en_US.UTF-8 ar_JO.UTF-8 -SAME+= en_US.UTF-8 ar_EG.UTF-8 -SAME+= en_US.UTF-8 ar_AE.UTF-8 -SAME+= en_US.UTF-8 am_ET.UTF-8 -SAME+= en_US.UTF-8 af_ZA.UTF-8 +SAME+= C.UTF-8 en_US.UTF-8 +SAME+= C.UTF-8 ru_RU.UTF-8 +SAME+= C.UTF-8 zh_TW.UTF-8 +SAME+= C.UTF-8 zh_HK.UTF-8 +SAME+= C.UTF-8 zh_CN.UTF-8 +SAME+= C.UTF-8 uk_UA.UTF-8 +SAME+= C.UTF-8 tr_TR.UTF-8 +SAME+= C.UTF-8 sv_SE.UTF-8 +SAME+= C.UTF-8 sv_FI.UTF-8 +SAME+= C.UTF-8 sr_RS.UTF-8@latin +SAME+= C.UTF-8 sr_RS.UTF-8 +SAME+= C.UTF-8 sl_SI.UTF-8 +SAME+= C.UTF-8 sk_SK.UTF-8 +SAME+= C.UTF-8 se_NO.UTF-8 +SAME+= C.UTF-8 se_FI.UTF-8 +SAME+= C.UTF-8 ro_RO.UTF-8 +SAME+= C.UTF-8 pt_PT.UTF-8 +SAME+= C.UTF-8 pt_BR.UTF-8 +SAME+= C.UTF-8 pl_PL.UTF-8 +SAME+= C.UTF-8 nn_NO.UTF-8 +SAME+= C.UTF-8 nl_NL.UTF-8 +SAME+= C.UTF-8 nl_BE.UTF-8 +SAME+= C.UTF-8 nb_NO.UTF-8 +SAME+= C.UTF-8 mn_MN.UTF-8 +SAME+= C.UTF-8 lv_LV.UTF-8 +SAME+= C.UTF-8 lt_LT.UTF-8 +SAME+= C.UTF-8 ko_KR.UTF-8 +SAME+= C.UTF-8 kk_KZ.UTF-8 +SAME+= C.UTF-8 ja_JP.UTF-8 +SAME+= C.UTF-8 it_IT.UTF-8 +SAME+= C.UTF-8 it_CH.UTF-8 +SAME+= C.UTF-8 is_IS.UTF-8 +SAME+= C.UTF-8 hy_AM.UTF-8 +SAME+= C.UTF-8 hu_HU.UTF-8 +SAME+= C.UTF-8 hr_HR.UTF-8 +SAME+= C.UTF-8 hi_IN.UTF-8 +SAME+= C.UTF-8 he_IL.UTF-8 +SAME+= C.UTF-8 fr_FR.UTF-8 +SAME+= C.UTF-8 fr_CH.UTF-8 +SAME+= C.UTF-8 fr_CA.UTF-8 +SAME+= C.UTF-8 fr_BE.UTF-8 +SAME+= C.UTF-8 fi_FI.UTF-8 +SAME+= C.UTF-8 eu_ES.UTF-8 +SAME+= C.UTF-8 et_EE.UTF-8 +SAME+= C.UTF-8 es_MX.UTF-8 +SAME+= C.UTF-8 es_ES.UTF-8 +SAME+= C.UTF-8 es_CR.UTF-8 +SAME+= C.UTF-8 es_AR.UTF-8 +SAME+= C.UTF-8 en_ZA.UTF-8 +SAME+= C.UTF-8 en_SG.UTF-8 +SAME+= C.UTF-8 en_PH.UTF-8 +SAME+= C.UTF-8 en_NZ.UTF-8 +SAME+= C.UTF-8 en_IE.UTF-8 +SAME+= C.UTF-8 en_HK.UTF-8 +SAME+= C.UTF-8 en_GB.UTF-8 +SAME+= C.UTF-8 en_CA.UTF-8 +SAME+= C.UTF-8 en_AU.UTF-8 +SAME+= C.UTF-8 el_GR.UTF-8 +SAME+= C.UTF-8 de_DE.UTF-8 +SAME+= C.UTF-8 de_CH.UTF-8 +SAME+= C.UTF-8 de_AT.UTF-8 +SAME+= C.UTF-8 da_DK.UTF-8 +SAME+= C.UTF-8 cs_CZ.UTF-8 +SAME+= C.UTF-8 ca_IT.UTF-8 +SAME+= C.UTF-8 ca_FR.UTF-8 +SAME+= C.UTF-8 ca_ES.UTF-8 +SAME+= C.UTF-8 ca_AD.UTF-8 +SAME+= C.UTF-8 bg_BG.UTF-8 +SAME+= C.UTF-8 be_BY.UTF-8 +SAME+= C.UTF-8 ar_SA.UTF-8 +SAME+= C.UTF-8 ar_QA.UTF-8 +SAME+= C.UTF-8 ar_MA.UTF-8 +SAME+= C.UTF-8 ar_JO.UTF-8 +SAME+= C.UTF-8 ar_EG.UTF-8 +SAME+= C.UTF-8 ar_AE.UTF-8 +SAME+= C.UTF-8 am_ET.UTF-8 +SAME+= C.UTF-8 af_ZA.UTF-8 SAME+= en_US.ISO8859-1 sv_SE.ISO8859-1 SAME+= en_US.ISO8859-1 sv_FI.ISO8859-1 SAME+= en_US.ISO8859-1 pt_PT.ISO8859-1 diff --git a/tools/tools/locale/Makefile b/tools/tools/locale/Makefile index 9e8483dc445..79b44749233 100644 --- a/tools/tools/locale/Makefile +++ b/tools/tools/locale/Makefile @@ -54,7 +54,7 @@ LC:= --lc=${LC} .endif all: - cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_US.UTF-8.src + cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_C.UTF-8.src .for t in ${TYPES} . if ${KNOWN:M${t}} test -d ${t} || mkdir ${t} @@ -105,7 +105,7 @@ static-colldef: .endfor transfer-rollup: - cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_US.UTF-8.src + cp ${ETCDIR}/common.UTF-8.src ${CLDRDIR}/posix/xx_Comm_C.UTF-8.src rollup: perl -I tools tools/utf8-rollup.pl \ diff --git a/tools/tools/locale/tools/cldr2def.pl b/tools/tools/locale/tools/cldr2def.pl index a34c5477670..b52899fa4f3 100755 --- a/tools/tools/locale/tools/cldr2def.pl +++ b/tools/tools/locale/tools/cldr2def.pl @@ -416,6 +416,9 @@ sub get_languages { } sub transform_ctypes { + # Add the C.UTF-8 + $languages{"C"}{"x"}{data}{"x"}{$DEFENCODING} = undef; + foreach my $l (sort keys(%languages)) { foreach my $f (sort keys(%{$languages{$l}})) { foreach my $c (sort keys(%{$languages{$l}{$f}{data}})) { @@ -424,13 +427,12 @@ sub transform_ctypes { next if (defined $languages{$l}{$f}{definitions} && $languages{$l}{$f}{definitions} !~ /$TYPE/); $languages{$l}{$f}{data}{$c}{$DEFENCODING} = 0; # unread - my $file; - $file = $l . "_"; - $file .= $f . "_" if ($f ne "x"); - $file .= $c; + my $file = $l; + $file .= "_" . $f if ($f ne "x"); + $file .= "_" . $c if ($c ne "x"); my $actfile = $file; - my $filename = "$CLDRDIR/posix/xx_Comm_US.UTF-8.src"; + my $filename = "$CLDRDIR/posix/xx_Comm_C.UTF-8.src"; if (! -f $filename) { print STDERR "Cannot open $filename\n"; next; @@ -939,8 +941,8 @@ EOF } keys(%{$hashtable{$hash}}); } elsif ($TYPE eq "ctypedef") { @files = sort { - if ($a eq 'en_x_US.UTF-8') { return -1; } - elsif ($b eq 'en_x_US.UTF-8') { return 1; } + if ($a eq 'C_x_x.UTF-8') { return -1; } + elsif ($b eq 'C_x_x.UTF-8') { return 1; } if ($a =~ /^en_x_US/) { return -1; } elsif ($b =~ /^en_x_US/) { return 1; } @@ -962,6 +964,7 @@ EOF } if ($#files > 0) { my $link = shift(@files); + $link =~ s/_x_x//; # special case for C $link =~ s/_x_/_/; # strip family if none there foreach my $file (@files) { my @a = split(/_/, $file); @@ -987,9 +990,9 @@ EOF next; } foreach my $e (sort keys(%{$languages{$l}{$f}{data}{$c}})) { - my $file = $l . "_"; - $file .= $f . "_" if ($f ne "x"); - $file .= $c; + my $file = $l; + $file .= "_" . $f if ($f ne "x"); + $file .= "_" . $c if ($c ne "x"); next if (!defined $languages{$l}{$f}{data}{$c}{$e}); print FOUT "LOCALES+=\t$file.$e\n"; } -- 2.45.0