From 91e0bf6a77d8c15b682d5122ce1532e5bd31c917 Mon Sep 17 00:00:00 2001 From: "Andrey A. Chernov" Date: Mon, 21 Jan 2008 23:48:12 +0000 Subject: [PATCH] Introduce new encoding: "ASCII" It differs from default C/POSIX "NONE" mainly by stricter 8bit check for mb*towc*/wc*tomb* family, returning EILSEQ --- lib/libc/locale/Makefile.inc | 2 +- lib/libc/locale/ascii.c | 187 ++++++++++++++++++++++++++++++++ lib/libc/locale/mblocal.h | 1 + lib/libc/locale/setrunelocale.c | 2 + 4 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 lib/libc/locale/ascii.c diff --git a/lib/libc/locale/Makefile.inc b/lib/libc/locale/Makefile.inc index 24ac9040500..074c4d9ded2 100644 --- a/lib/libc/locale/Makefile.inc +++ b/lib/libc/locale/Makefile.inc @@ -4,7 +4,7 @@ # locale sources .PATH: ${.CURDIR}/${MACHINE_ARCH}/locale ${.CURDIR}/locale -SRCS+= big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \ +SRCS+= ascii.c big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \ gb18030.c gb2312.c gbk.c isctype.c iswctype.c \ ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mblen.c \ mbrlen.c \ diff --git a/lib/libc/locale/ascii.c b/lib/libc/locale/ascii.c new file mode 100644 index 00000000000..493bf42f395 --- /dev/null +++ b/lib/libc/locale/ascii.c @@ -0,0 +1,187 @@ +/*- + * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Paul Borman at Krystal Technologies. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include "mblocal.h" + +static size_t _ascii_mbrtowc(wchar_t * __restrict, const char * __restrict, + size_t, mbstate_t * __restrict); +static int _ascii_mbsinit(const mbstate_t *); +static size_t _ascii_mbsnrtowcs(wchar_t * __restrict dst, + const char ** __restrict src, size_t nms, size_t len, + mbstate_t * __restrict ps __unused); +static size_t _ascii_wcrtomb(char * __restrict, wchar_t, + mbstate_t * __restrict); +static size_t _ascii_wcsnrtombs(char * __restrict, const wchar_t ** __restrict, + size_t, size_t, mbstate_t * __restrict); + +int +_ascii_init(_RuneLocale *rl) +{ + + __mbrtowc = _ascii_mbrtowc; + __mbsinit = _ascii_mbsinit; + __mbsnrtowcs = _ascii_mbsnrtowcs; + __wcrtomb = _ascii_wcrtomb; + __wcsnrtombs = _ascii_wcsnrtombs; + _CurrentRuneLocale = rl; + __mb_cur_max = 1; + __mb_sb_limit = 128; + return(0); +} + +static int +_ascii_mbsinit(const mbstate_t *ps __unused) +{ + + /* + * Encoding is not state dependent - we are always in the + * initial state. + */ + return (1); +} + +static size_t +_ascii_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, + mbstate_t * __restrict ps __unused) +{ + + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (0); + if (n == 0) + /* Incomplete multibyte sequence */ + return ((size_t)-2); + if (*s & 0x80) { + errno = EILSEQ; + return ((size_t)-1); + } + if (pwc != NULL) + *pwc = (unsigned char)*s; + return (*s == '\0' ? 0 : 1); +} + +static size_t +_ascii_wcrtomb(char * __restrict s, wchar_t wc, + mbstate_t * __restrict ps __unused) +{ + + if (s == NULL) + /* Reset to initial shift state (no-op) */ + return (1); + if (wc < 0 || wc > 127) { + errno = EILSEQ; + return ((size_t)-1); + } + *s = (unsigned char)wc; + return (1); +} + +static size_t +_ascii_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, + size_t nms, size_t len, mbstate_t * __restrict ps __unused) +{ + const char *s; + size_t nchr; + + if (dst == NULL) { + for (s = *src; nms > 0 && *s != '\0'; s++, nms--) { + if (*s & 0x80) { + errno = EILSEQ; + return ((size_t)-1); + } + } + return (s - *src); + } + + s = *src; + nchr = 0; + while (len-- > 0 && nms-- > 0) { + if (*s & 0x80) { + errno = EILSEQ; + return ((size_t)-1); + } + if ((*dst++ = (unsigned char)*s++) == L'\0') { + *src = NULL; + return (nchr); + } + nchr++; + } + *src = s; + return (nchr); +} + +static size_t +_ascii_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, + size_t nwc, size_t len, mbstate_t * __restrict ps __unused) +{ + const wchar_t *s; + size_t nchr; + + if (dst == NULL) { + for (s = *src; nwc > 0 && *s != L'\0'; s++, nwc--) { + if (*s < 0 || *s > 127) { + errno = EILSEQ; + return ((size_t)-1); + } + } + return (s - *src); + } + + s = *src; + nchr = 0; + while (len-- > 0 && nwc-- > 0) { + if (*s < 0 || *s > 127) { + errno = EILSEQ; + return ((size_t)-1); + } + if ((*dst++ = *s++) == '\0') { + *src = NULL; + return (nchr); + } + nchr++; + } + *src = s; + return (nchr); +} + diff --git a/lib/libc/locale/mblocal.h b/lib/libc/locale/mblocal.h index b4e10985f67..0faebe502c6 100644 --- a/lib/libc/locale/mblocal.h +++ b/lib/libc/locale/mblocal.h @@ -35,6 +35,7 @@ * Rune initialization function prototypes. */ int _none_init(_RuneLocale *); +int _ascii_init(_RuneLocale *); int _UTF8_init(_RuneLocale *); int _EUC_init(_RuneLocale *); int _GB18030_init(_RuneLocale *); diff --git a/lib/libc/locale/setrunelocale.c b/lib/libc/locale/setrunelocale.c index e723ea627af..e619e0b6878 100644 --- a/lib/libc/locale/setrunelocale.c +++ b/lib/libc/locale/setrunelocale.c @@ -126,6 +126,8 @@ __setrunelocale(const char *encoding) rl->__sgetrune = NULL; if (strcmp(rl->__encoding, "NONE") == 0) ret = _none_init(rl); + else if (strcmp(rl->__encoding, "ASCII") == 0) + ret = _ascii_init(rl); else if (strcmp(rl->__encoding, "UTF-8") == 0) ret = _UTF8_init(rl); else if (strcmp(rl->__encoding, "EUC") == 0) -- 2.45.2