From c5eb47ee8e5888893fc7c4e5f773a60a5eef1d0c Mon Sep 17 00:00:00 2001 From: brooks Date: Tue, 16 Apr 2013 19:27:09 +0000 Subject: [PATCH] MFC r248302: Update to the latest (un)vis(3) sources from NetBSD. This adds multibyte support[0] and the new functions strenvisx and strsenvisx. Add MLINKS for vis(3) functions add by this and the initial import from NetBSD[1]. PR: bin/166364, bin/175418 Submitted by: "J.R. Oldroyd" [0] stefanf[1] Obtained from: NetBSD git-svn-id: svn://svn.freebsd.org/base/stable/9@249560 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- contrib/libc-vis/unvis.3 | 37 +- contrib/libc-vis/unvis.c | 6 +- contrib/libc-vis/vis.3 | 144 +++++-- contrib/libc-vis/vis.c | 800 +++++++++++++++++++++----------------- contrib/libc-vis/vis.h | 5 +- lib/libc/gen/Makefile.inc | 19 +- lib/libc/gen/Symbol.map | 2 + 7 files changed, 607 insertions(+), 406 deletions(-) diff --git a/contrib/libc-vis/unvis.3 b/contrib/libc-vis/unvis.3 index 472c49d9b..34ebde674 100644 --- a/contrib/libc-vis/unvis.3 +++ b/contrib/libc-vis/unvis.3 @@ -1,4 +1,4 @@ -.\" $NetBSD: unvis.3,v 1.23 2011/03/17 14:06:29 wiz Exp $ +.\" $NetBSD: unvis.3,v 1.27 2012/12/15 07:34:36 wiz Exp $ .\" $FreeBSD$ .\" .\" Copyright (c) 1989, 1991, 1993 @@ -126,15 +126,17 @@ The function has several return codes that must be handled properly. They are: .Bl -tag -width UNVIS_VALIDPUSH -.It Li \&0 (zero) +.It Li \&0 No (zero) Another character is necessary; nothing has been recognized yet. .It Dv UNVIS_VALID A valid character has been recognized and is available at the location -pointed to by cp. +pointed to by +.Fa cp . .It Dv UNVIS_VALIDPUSH A valid character has been recognized and is available at the location -pointed to by cp; however, the character currently passed in should -be passed in again. +pointed to by +.Fa cp ; +however, the character currently passed in should be passed in again. .It Dv UNVIS_NOCHAR A valid sequence was detected, but no character was produced. This return code is necessary to indicate a logical break between characters. @@ -150,7 +152,7 @@ one more time with flag set to to extract any remaining character (the character passed in is ignored). .Pp The -.Ar flag +.Fa flag argument is also used to specify the encoding style of the source. If set to .Dv VIS_HTTPSTYLE @@ -161,7 +163,8 @@ will decode URI strings as specified in RFC 1808. If set to .Dv VIS_HTTP1866 , .Fn unvis -will decode URI strings as specified in RFC 1866. +will decode entity references and numeric character references +as specified in RFC 1866. If set to .Dv VIS_MIMESTYLE , .Fn unvis @@ -169,7 +172,9 @@ will decode MIME Quoted-Printable strings as specified in RFC 2045. If set to .Dv VIS_NOESCAPE , .Fn unvis -will not decode \e quoted characters. +will not decode +.Ql \e +quoted characters. .Pp The following code fragment illustrates a proper use of .Fn unvis . @@ -204,7 +209,7 @@ The functions and .Fn strnunvisx will return \-1 on error and set -.Va errno +.Va errno to: .Bl -tag -width Er .It Bq Er EINVAL @@ -212,7 +217,7 @@ An invalid escape sequence was detected, or the decoder is in an unknown state. .El .Pp In addition the functions -.Fn strnunvis +.Fn strnunvis and .Fn strnunvisx will can also set @@ -244,4 +249,14 @@ and functions appeared in .Nx 6.0 and -.Fx 10.0 . +.Fx 9.2 . +.Sh BUGS +The names +.Dv VIS_HTTP1808 +and +.Dv VIS_HTTP1866 +are wrong. +Percent-encoding was defined in RFC 1738, the original RFC for URL. +RFC 1866 defines HTML 2.0, an application of SGML, from which it +inherits concepts of numeric character references and entity +references. diff --git a/contrib/libc-vis/unvis.c b/contrib/libc-vis/unvis.c index d87812acd..9cf112c0f 100644 --- a/contrib/libc-vis/unvis.c +++ b/contrib/libc-vis/unvis.c @@ -1,4 +1,4 @@ -/* $NetBSD: unvis.c,v 1.40 2012/12/14 21:31:01 christos Exp $ */ +/* $NetBSD: unvis.c,v 1.41 2012/12/15 04:29:53 matt Exp $ */ /*- * Copyright (c) 1989, 1993 @@ -34,7 +34,7 @@ #if 0 static char sccsid[] = "@(#)unvis.c 8.1 (Berkeley) 6/4/93"; #else -__RCSID("$NetBSD: unvis.c,v 1.40 2012/12/14 21:31:01 christos Exp $"); +__RCSID("$NetBSD: unvis.c,v 1.41 2012/12/15 04:29:53 matt Exp $"); #endif #endif /* LIBC_SCCS and not lint */ __FBSDID("$FreeBSD$"); @@ -90,7 +90,7 @@ __weak_alias(strnunvisx,_strnunvisx) * RFC 1866 */ static const struct nv { - const char name[7]; + char name[7]; uint8_t value; } nv[] = { { "AElig", 198 }, /* capital AE diphthong (ligature) */ diff --git a/contrib/libc-vis/vis.3 b/contrib/libc-vis/vis.3 index 6f5b15f8c..9d2cb317f 100644 --- a/contrib/libc-vis/vis.3 +++ b/contrib/libc-vis/vis.3 @@ -1,4 +1,4 @@ -.\" $NetBSD: vis.3,v 1.29 2012/12/14 22:55:59 christos Exp $ +.\" $NetBSD: vis.3,v 1.39 2013/02/20 20:05:26 christos Exp $ .\" $FreeBSD$ .\" .\" Copyright (c) 1989, 1991, 1993 @@ -30,7 +30,7 @@ .\" .\" @(#)vis.3 8.1 (Berkeley) 6/9/93 .\" -.Dd December 14, 2012 +.Dd February 19, 2013 .Dt VIS 3 .Os .Sh NAME @@ -40,12 +40,14 @@ .Nm strnvis , .Nm strvisx , .Nm strnvisx , +.Nm strenvisx , .Nm svis , .Nm snvis , .Nm strsvis , .Nm strsnvis , -.Nm strsvisx -.Nm strsnvisx +.Nm strsvisx , +.Nm strsnvisx , +.Nm strsenvisx .Nd visually encode characters .Sh LIBRARY .Lb libc @@ -63,6 +65,8 @@ .Fn strvisx "char *dst" "const char *src" "size_t len" "int flag" .Ft int .Fn strnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" +.Ft int +.Fn strenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "int *cerr_ptr" .Ft char * .Fn svis "char *dst" "int c" "int flag" "int nextc" "const char *extra" .Ft char * @@ -75,6 +79,8 @@ .Fn strsvisx "char *dst" "const char *src" "size_t len" "int flag" "const char *extra" .Ft int .Fn strsnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra" +.Ft int +.Fn strsenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra" "int *cerr_ptr" .Sh DESCRIPTION The .Fn vis @@ -89,11 +95,11 @@ needs no encoding, it is copied in unaltered. The string is null terminated, and a pointer to the end of the string is returned. The maximum length of any encoding is four -characters (not including the trailing +bytes (not including the trailing .Dv NUL ) ; thus, when encoding a set of characters into a buffer, the size of the buffer should -be four times the number of characters encoded, plus one for the trailing +be four times the number of bytes encoded, plus one for the trailing .Dv NUL . The flag parameter is used for altering the default range of characters considered for encoding and for altering the visual @@ -142,16 +148,17 @@ terminate The size of .Fa dst must be four times the number -of characters encoded from +of bytes encoded from .Fa src (plus one for the .Dv NUL ) . Both -forms return the number of characters in dst (not including -the trailing +forms return the number of characters in +.Fa dst +(not including the trailing .Dv NUL ) . The -.Dq n +.Dq Nm n versions of the functions also take an additional argument .Fa dlen that indicates the length of the @@ -159,7 +166,7 @@ that indicates the length of the buffer. If .Fa dlen -is not large enough to fix the converted string then the +is not large enough to fit the converted string then the .Fn strnvis and .Fn strnvisx @@ -167,6 +174,14 @@ functions return \-1 and set .Va errno to .Dv ENOSPC . +The +.Fn strenvisx +function takes an additional argument, +.Fa cerr_ptr , +that is used to pass in and out a multibyte conversion error flag. +This is useful when processing single characters at a time when +it is possible that the locale may be set to something other +than the locale of the characters in the input data. .Pp The functions .Fn svis , @@ -174,16 +189,18 @@ The functions .Fn strsvis , .Fn strsnvis , .Fn strsvisx , +.Fn strsnvisx , and -.Fn strsnvisx +.Fn strsenvisx correspond to .Fn vis , .Fn nvis , .Fn strvis , .Fn strnvis , .Fn strvisx , +.Fn strnvisx , and -.Fn strnvisx +.Fn strenvisx but have an additional argument .Fa extra , pointing to a @@ -214,14 +231,13 @@ and .Fn strnvisx ) , and the type of representation used. By default, all non-graphic characters, -except space, tab, and newline are encoded. -(See -.Xr isgraph 3 . ) +except space, tab, and newline are encoded (see +.Xr isgraph 3 ) . The following flags alter this: .Bl -tag -width VIS_WHITEX .It Dv VIS_GLOB -Also encode magic characters +Also encode the magic characters .Ql ( * , .Ql \&? , .Ql \&[ @@ -243,11 +259,13 @@ Synonym for \&| .Dv VIS_NL . .It Dv VIS_SAFE -Only encode "unsafe" characters. +Only encode +.Dq unsafe +characters. Unsafe means control characters which may cause common terminals to perform unexpected functions. Currently this form allows space, tab, newline, backspace, bell, and -return - in addition to all graphic characters - unencoded. +return \(em in addition to all graphic characters \(em unencoded. .El .Pp (The above flags have no effect for @@ -287,8 +305,8 @@ Use an to represent meta characters (characters with the 8th bit set), and use caret .Ql ^ -to represent control characters see -.Pf ( Xr iscntrl 3 ) . +to represent control characters (see +.Xr iscntrl 3 ) . The following formats are used: .Bl -tag -width xxxxx .It Dv \e^C @@ -335,19 +353,20 @@ Use C-style backslash sequences to represent standard non-printable characters. The following sequences are used to represent the indicated characters: .Bd -unfilled -offset indent -.Li \ea Tn - BEL No (007) -.Li \eb Tn - BS No (010) -.Li \ef Tn - NP No (014) -.Li \en Tn - NL No (012) -.Li \er Tn - CR No (015) -.Li \es Tn - SP No (040) -.Li \et Tn - HT No (011) -.Li \ev Tn - VT No (013) -.Li \e0 Tn - NUL No (000) +.Li \ea Tn \(em BEL No (007) +.Li \eb Tn \(em BS No (010) +.Li \ef Tn \(em NP No (014) +.Li \en Tn \(em NL No (012) +.Li \er Tn \(em CR No (015) +.Li \es Tn \(em SP No (040) +.Li \et Tn \(em HT No (011) +.Li \ev Tn \(em VT No (013) +.Li \e0 Tn \(em NUL No (000) .Ed .Pp -When using this format, the nextc parameter is looked at to determine -if a +When using this format, the +.Fa nextc +parameter is looked at to determine if a .Dv NUL character can be encoded as .Ql \e0 @@ -374,8 +393,8 @@ represents a lower case hexadecimal digit. .It Dv VIS_MIMESTYLE Use MIME Quoted-Printable encoding as described in RFC 2045, only don't break lines and don't handle CRLF. -The form is: -.Ql %XX +The form is +.Ql =XX where .Em X represents an upper case hexadecimal digit. @@ -392,6 +411,41 @@ meta characters as .Ql M-C ) . With this flag set, the encoding is ambiguous and non-invertible. +.Sh MULTIBYTE CHARACTER SUPPORT +These functions support multibyte character input. +The encoding conversion is influenced by the setting of the +.Ev LC_CTYPE +environment variable which defines the set of characters +that can be copied without encoding. +.Pp +When 8-bit data is present in the input, +.Ev LC_CTYPE +must be set to the correct locale or to the C locale. +If the locales of the data and the conversion are mismatched, +multibyte character recognition may fail and encoding will be performed +byte-by-byte instead. +.Pp +As noted above, +.Fa dst +must be four times the number of bytes processed from +.Fa src . +But note that each multibyte character can be up to +.Dv MB_LEN_MAX +bytes +.\" (see +.\" .Xr multibyte 3 ) +so in terms of multibyte characters, +.Fa dst +must be four times +.Dv MB_LEN_MAX +times the number of characters processed from +.Fa src . +.Sh ENVIRONMENT +.Bl -tag -width ".Ev LC_CTYPE" +.It Ev LC_CTYPE +Specify the locale of the input data. +Set to C if the input data locale is unknown. +.El .Sh ERRORS The functions .Fn nvis @@ -407,11 +461,11 @@ and .Fn strsnvisx , will return \-1 when the .Fa dlen -destination buffer length size is not enough to perform the conversion while +destination buffer size is not enough to perform the conversion while setting .Va errno to: -.Bl -tag -width Er +.Bl -tag -width ".Bq Er ENOSPC" .It Bq Er ENOSPC The destination buffer size is not large enough to perform the conversion. .El @@ -419,18 +473,23 @@ The destination buffer size is not large enough to perform the conversion. .Xr unvis 1 , .Xr vis 1 , .Xr glob 3 , +.\" .Xr multibyte 3 , .Xr unvis 3 .Rs .%A T. Berners-Lee .%T Uniform Resource Locators (URL) -.%O RFC1738 +.%O "RFC 1738" +.Re +.Rs +.%T "Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies" +.%O "RFC 2045" .Re .Sh HISTORY The .Fn vis , .Fn strvis , and -.Fa strvisx +.Fn strvisx functions first appeared in .Bx 4.4 . The @@ -441,7 +500,7 @@ and functions appeared in .Nx 1.5 and -.Fx 10.0 . +.Fx 9.2 . The buffer size limited versions of the functions .Po Fn nvis , .Fn strnvis , @@ -451,6 +510,9 @@ The buffer size limited versions of the functions and .Fn strsnvisx Pc appeared in -.Nx 6.0 and -.Fx 10.0 . +.Fx 9.2 . +Myltibyte character support was added in +.Nx 7.0 +and +.Fx 9.2 . diff --git a/contrib/libc-vis/vis.c b/contrib/libc-vis/vis.c index 040c28c25..2ba6b5b23 100644 --- a/contrib/libc-vis/vis.c +++ b/contrib/libc-vis/vis.c @@ -1,4 +1,4 @@ -/* $NetBSD: vis.c,v 1.45 2012/12/14 21:38:18 christos Exp $ */ +/* $NetBSD: vis.c,v 1.60 2013/02/21 16:21:20 joerg Exp $ */ /*- * Copyright (c) 1989, 1993 @@ -57,19 +57,23 @@ #include #if defined(LIBC_SCCS) && !defined(lint) -__RCSID("$NetBSD: vis.c,v 1.45 2012/12/14 21:38:18 christos Exp $"); +__RCSID("$NetBSD: vis.c,v 1.60 2013/02/21 16:21:20 joerg Exp $"); #endif /* LIBC_SCCS and not lint */ +#ifdef __FBSDID __FBSDID("$FreeBSD$"); +#define _DIAGASSERT(x) assert(x) +#endif #include "namespace.h" #include +#include #include #include #include #include - -#define _DIAGASSERT(x) assert(x) +#include +#include #ifdef __weak_alias __weak_alias(strvisx,_strvisx) @@ -81,65 +85,66 @@ __weak_alias(strvisx,_strvisx) #include #include -static char *do_svis(char *, size_t *, int, int, int, const char *); +/* + * The reason for going through the trouble to deal with character encodings + * in vis(3), is that we use this to safe encode output of commands. This + * safe encoding varies depending on the character set. For example if we + * display ps output in French, we don't want to display French characters + * as M-foo. + */ + +static wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *); #undef BELL -#define BELL '\a' - -#define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7') -#define iswhite(c) (c == ' ' || c == '\t' || c == '\n') -#define issafe(c) (c == '\b' || c == BELL || c == '\r') -#define xtoa(c) "0123456789abcdef"[c] -#define XTOA(c) "0123456789ABCDEF"[c] - -#define MAXEXTRAS 9 - -#define MAKEEXTRALIST(flag, extra, orig_str) \ -do { \ - const char *orig = orig_str; \ - const char *o = orig; \ - char *e; \ - while (*o++) \ - continue; \ - extra = malloc((size_t)((o - orig) + MAXEXTRAS)); \ - if (!extra) break; \ - for (o = orig, e = extra; (*e++ = *o++) != '\0';) \ - continue; \ - e--; \ - if (flag & VIS_GLOB) { \ - *e++ = '*'; \ - *e++ = '?'; \ - *e++ = '['; \ - *e++ = '#'; \ - } \ - if (flag & VIS_SP) *e++ = ' '; \ - if (flag & VIS_TAB) *e++ = '\t'; \ - if (flag & VIS_NL) *e++ = '\n'; \ - if ((flag & VIS_NOSLASH) == 0) *e++ = '\\'; \ - *e = '\0'; \ -} while (/*CONSTCOND*/0) +#define BELL L'\a' + +#define iswoctal(c) (((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7') +#define iswwhite(c) (c == L' ' || c == L'\t' || c == L'\n') +#define iswsafe(c) (c == L'\b' || c == BELL || c == L'\r') +#define xtoa(c) L"0123456789abcdef"[c] +#define XTOA(c) L"0123456789ABCDEF"[c] + +#define MAXEXTRAS 10 + +#if !HAVE_NBTOOL_CONFIG_H +#ifndef __NetBSD__ +/* + * On NetBSD MB_LEN_MAX is currently 32 which does not fit on any integer + * integral type and it is probably wrong, since currently the maximum + * number of bytes and character needs is 6. Until this is fixed, the + * loops below are using sizeof(uint64_t) - 1 instead of MB_LEN_MAX, and + * the assertion is commented out. + */ +#ifdef __FreeBSD__ +/* + * On FreeBSD including for CTASSERT only works in kernel + * mode. + */ +#ifndef CTASSERT +#define CTASSERT(x) _CTASSERT(x, __LINE__) +#define _CTASSERT(x, y) __CTASSERT(x, y) +#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1] +#endif +#endif /* __FreeBSD__ */ +CTASSERT(MB_LEN_MAX <= sizeof(uint64_t)); +#endif /* !__NetBSD__ */ +#endif /* * This is do_hvis, for HTTP style (RFC 1808) */ -static char * -do_hvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra) +static wchar_t * +do_hvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra) { - - if ((isascii(c) && isalnum(c)) + if (iswalnum(c) /* safe */ - || c == '$' || c == '-' || c == '_' || c == '.' || c == '+' + || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+' /* extra */ - || c == '!' || c == '*' || c == '\'' || c == '(' || c == ')' - || c == ',') { - dst = do_svis(dst, dlen, c, flag, nextc, extra); - } else { - if (dlen) { - if (*dlen < 3) - return NULL; - *dlen -= 3; - } - *dst++ = '%'; + || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')' + || c == L',') + dst = do_svis(dst, c, flags, nextc, extra); + else { + *dst++ = L'%'; *dst++ = xtoa(((unsigned int)c >> 4) & 0xf); *dst++ = xtoa((unsigned int)c & 0xf); } @@ -151,312 +156,448 @@ do_hvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra) * This is do_mvis, for Quoted-Printable MIME (RFC 2045) * NB: No handling of long lines or CRLF. */ -static char * -do_mvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra) +static wchar_t * +do_mvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra) { - if ((c != '\n') && + if ((c != L'\n') && /* Space at the end of the line */ - ((isspace(c) && (nextc == '\r' || nextc == '\n')) || + ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) || /* Out of range */ - (!isspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) || - /* Specific char to be escaped */ - strchr("#$@[\\]^`{|}~", c) != NULL)) { - if (dlen) { - if (*dlen < 3) - return NULL; - *dlen -= 3; - } - *dst++ = '='; + (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) || + /* Specific char to be escaped */ + wcschr(L"#$@[\\]^`{|}~", c) != NULL)) { + *dst++ = L'='; *dst++ = XTOA(((unsigned int)c >> 4) & 0xf); *dst++ = XTOA((unsigned int)c & 0xf); - } else { - dst = do_svis(dst, dlen, c, flag, nextc, extra); - } + } else + dst = do_svis(dst, c, flags, nextc, extra); return dst; } /* - * This is do_vis, the central code of vis. - * dst: Pointer to the destination buffer - * c: Character to encode - * flag: Flag word - * nextc: The character following 'c' - * extra: Pointer to the list of extra characters to be - * backslash-protected. + * Output single byte of multibyte character. */ -static char * -do_svis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra) +static wchar_t * +do_mbyte(wchar_t *dst, wint_t c, int flags, wint_t nextc, int iswextra) { - int isextra; - size_t odlen = dlen ? *dlen : 0; - - isextra = strchr(extra, c) != NULL; -#define HAVE(x) \ - do { \ - if (dlen) { \ - if (*dlen < (x)) \ - goto out; \ - *dlen -= (x); \ - } \ - } while (/*CONSTCOND*/0) - if (!isextra && isascii(c) && (isgraph(c) || iswhite(c) || - ((flag & VIS_SAFE) && issafe(c)))) { - HAVE(1); - *dst++ = c; - return dst; - } - if (flag & VIS_CSTYLE) { - HAVE(2); + if (flags & VIS_CSTYLE) { switch (c) { - case '\n': - *dst++ = '\\'; *dst++ = 'n'; + case L'\n': + *dst++ = L'\\'; *dst++ = L'n'; return dst; - case '\r': - *dst++ = '\\'; *dst++ = 'r'; + case L'\r': + *dst++ = L'\\'; *dst++ = L'r'; return dst; - case '\b': - *dst++ = '\\'; *dst++ = 'b'; + case L'\b': + *dst++ = L'\\'; *dst++ = L'b'; return dst; case BELL: - *dst++ = '\\'; *dst++ = 'a'; + *dst++ = L'\\'; *dst++ = L'a'; return dst; - case '\v': - *dst++ = '\\'; *dst++ = 'v'; + case L'\v': + *dst++ = L'\\'; *dst++ = L'v'; return dst; - case '\t': - *dst++ = '\\'; *dst++ = 't'; + case L'\t': + *dst++ = L'\\'; *dst++ = L't'; return dst; - case '\f': - *dst++ = '\\'; *dst++ = 'f'; + case L'\f': + *dst++ = L'\\'; *dst++ = L'f'; return dst; - case ' ': - *dst++ = '\\'; *dst++ = 's'; + case L' ': + *dst++ = L'\\'; *dst++ = L's'; return dst; - case '\0': - *dst++ = '\\'; *dst++ = '0'; - if (isoctal(nextc)) { - HAVE(2); - *dst++ = '0'; - *dst++ = '0'; + case L'\0': + *dst++ = L'\\'; *dst++ = L'0'; + if (iswoctal(nextc)) { + *dst++ = L'0'; + *dst++ = L'0'; } return dst; default: - if (isgraph(c)) { - *dst++ = '\\'; *dst++ = c; + if (iswgraph(c)) { + *dst++ = L'\\'; + *dst++ = c; return dst; } - if (dlen) - *dlen = odlen; } } - if (isextra || ((c & 0177) == ' ') || (flag & VIS_OCTAL)) { - HAVE(4); - *dst++ = '\\'; - *dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + '0'; - *dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + '0'; - *dst++ = (c & 07) + '0'; + if (iswextra || ((c & 0177) == L' ') || (flags & VIS_OCTAL)) { + *dst++ = L'\\'; + *dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0'; + *dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0'; + *dst++ = (c & 07) + L'0'; } else { - if ((flag & VIS_NOSLASH) == 0) { - HAVE(1); - *dst++ = '\\'; - } + if ((flags & VIS_NOSLASH) == 0) + *dst++ = L'\\'; if (c & 0200) { - HAVE(1); - c &= 0177; *dst++ = 'M'; + c &= 0177; + *dst++ = L'M'; } - if (iscntrl(c)) { - HAVE(2); - *dst++ = '^'; + if (iswcntrl(c)) { + *dst++ = L'^'; if (c == 0177) - *dst++ = '?'; + *dst++ = L'?'; else - *dst++ = c + '@'; + *dst++ = c + L'@'; } else { - HAVE(2); - *dst++ = '-'; *dst++ = c; + *dst++ = L'-'; + *dst++ = c; } } + return dst; -out: - *dlen = odlen; - return NULL; } -typedef char *(*visfun_t)(char *, size_t *, int, int, int, const char *); +/* + * This is do_vis, the central code of vis. + * dst: Pointer to the destination buffer + * c: Character to encode + * flags: Flags word + * nextc: The character following 'c' + * extra: Pointer to the list of extra characters to be + * backslash-protected. + */ +static wchar_t * +do_svis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra) +{ + int iswextra, i, shft; + uint64_t bmsk, wmsk; + + iswextra = wcschr(extra, c) != NULL; + if (!iswextra && (iswgraph(c) || iswwhite(c) || + ((flags & VIS_SAFE) && iswsafe(c)))) { + *dst++ = c; + return dst; + } + + /* See comment in istrsenvisx() output loop, below. */ + wmsk = 0; + for (i = sizeof(wmsk) - 1; i >= 0; i--) { + shft = i * NBBY; + bmsk = (uint64_t)0xffLL << shft; + wmsk |= bmsk; + if ((c & wmsk) || i == 0) + dst = do_mbyte(dst, (wint_t)( + (uint64_t)(c & bmsk) >> shft), + flags, nextc, iswextra); + } + + return dst; +} + +typedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *); /* * Return the appropriate encoding function depending on the flags given. */ static visfun_t -getvisfun(int flag) +getvisfun(int flags) { - if (flag & VIS_HTTPSTYLE) + if (flags & VIS_HTTPSTYLE) return do_hvis; - if (flag & VIS_MIMESTYLE) + if (flags & VIS_MIMESTYLE) return do_mvis; return do_svis; } /* - * isnvis - visually encode characters, also encoding the characters - * pointed to by `extra' + * Expand list of extra characters to not visually encode. */ -static char * -isnvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra) +static wchar_t * +makeextralist(int flags, const char *src) { - char *nextra = NULL; - visfun_t f; + wchar_t *dst, *d; + size_t len; - _DIAGASSERT(dst != NULL); - _DIAGASSERT(extra != NULL); - MAKEEXTRALIST(flag, nextra, extra); - if (!nextra) { - if (dlen && *dlen == 0) { - errno = ENOSPC; - return NULL; - } - *dst = '\0'; /* can't create nextra, return "" */ - return dst; - } - f = getvisfun(flag); - dst = (*f)(dst, dlen, c, flag, nextc, nextra); - free(nextra); - if (dst == NULL || (dlen && *dlen == 0)) { - errno = ENOSPC; + len = strlen(src); + if ((dst = calloc(len + MAXEXTRAS, sizeof(*dst))) == NULL) return NULL; + + if (mbstowcs(dst, src, len) == (size_t)-1) { + size_t i; + for (i = 0; i < len; i++) + dst[i] = (wint_t)(u_char)src[i]; + d = dst + len; + } else + d = dst + wcslen(dst); + + if (flags & VIS_GLOB) { + *d++ = L'*'; + *d++ = L'?'; + *d++ = L'['; + *d++ = L'#'; } - *dst = '\0'; - return dst; -} -char * -svis(char *dst, int c, int flag, int nextc, const char *extra) -{ - return isnvis(dst, NULL, c, flag, nextc, extra); -} + if (flags & VIS_SP) *d++ = L' '; + if (flags & VIS_TAB) *d++ = L'\t'; + if (flags & VIS_NL) *d++ = L'\n'; + if ((flags & VIS_NOSLASH) == 0) *d++ = L'\\'; + *d = L'\0'; -char * -snvis(char *dst, size_t dlen, int c, int flag, int nextc, const char *extra) -{ - return isnvis(dst, &dlen, c, flag, nextc, extra); + return dst; } - /* - * strsvis, strsvisx - visually encode characters from src into dst - * - * Extra is a pointer to a \0-terminated list of characters to - * be encoded, too. These functions are useful e. g. to - * encode strings in such a way so that they are not interpreted - * by a shell. - * - * Dst must be 4 times the size of src to account for possible - * expansion. The length of dst, not including the trailing NULL, - * is returned. - * - * Strsvisx encodes exactly len bytes from src into dst. - * This is useful for encoding a block of data. + * istrsenvisx() + * The main internal function. + * All user-visible functions call this one. */ static int -istrsnvis(char *dst, size_t *dlen, const char *csrc, int flag, const char *extra) +istrsenvisx(char *mbdst, size_t *dlen, const char *mbsrc, size_t mblength, + int flags, const char *mbextra, int *cerr_ptr) { - int c; - char *start; - char *nextra = NULL; - const unsigned char *src = (const unsigned char *)csrc; + wchar_t *dst, *src, *pdst, *psrc, *start, *extra; + size_t len, olen; + uint64_t bmsk, wmsk; + wint_t c; visfun_t f; - - _DIAGASSERT(dst != NULL); - _DIAGASSERT(src != NULL); - _DIAGASSERT(extra != NULL); - MAKEEXTRALIST(flag, nextra, extra); - if (!nextra) { - *dst = '\0'; /* can't create nextra, return "" */ - return 0; + int clen = 0, cerr = 0, error = -1, i, shft; + ssize_t mbslength, maxolen; + + _DIAGASSERT(mbdst != NULL); + _DIAGASSERT(mbsrc != NULL); + _DIAGASSERT(mbextra != NULL); + + /* + * Input (mbsrc) is a char string considered to be multibyte + * characters. The input loop will read this string pulling + * one character, possibly multiple bytes, from mbsrc and + * converting each to wchar_t in src. + * + * The vis conversion will be done using the wide char + * wchar_t string. + * + * This will then be converted back to a multibyte string to + * return to the caller. + */ + + /* Allocate space for the wide char strings */ + psrc = pdst = extra = NULL; + if (!mblength) + mblength = strlen(mbsrc); + if ((psrc = calloc(mblength + 1, sizeof(*psrc))) == NULL) + return -1; + if ((pdst = calloc((4 * mblength) + 1, sizeof(*pdst))) == NULL) + goto out; + dst = pdst; + src = psrc; + + /* Use caller's multibyte conversion error flag. */ + if (cerr_ptr) + cerr = *cerr_ptr; + + /* + * Input loop. + * Handle up to mblength characters (not bytes). We do not + * stop at NULs because we may be processing a block of data + * that includes NULs. + */ + mbslength = (ssize_t)mblength; + /* + * When inputing a single character, must also read in the + * next character for nextc, the look-ahead character. + */ + if (mbslength == 1) + mbslength++; + while (mbslength > 0) { + /* Convert one multibyte character to wchar_t. */ + if (!cerr) + clen = mbtowc(src, mbsrc, MB_LEN_MAX); + if (cerr || clen < 0) { + /* Conversion error, process as a byte instead. */ + *src = (wint_t)(u_char)*mbsrc; + clen = 1; + cerr = 1; + } + if (clen == 0) + /* + * NUL in input gives 0 return value. process + * as single NUL byte and keep going. + */ + clen = 1; + /* Advance buffer character pointer. */ + src++; + /* Advance input pointer by number of bytes read. */ + mbsrc += clen; + /* Decrement input byte count. */ + mbslength -= clen; + } + len = src - psrc; + src = psrc; + /* + * In the single character input case, we will have actually + * processed two characters, c and nextc. Reset len back to + * just a single character. + */ + if (mblength < len) + len = mblength; + + /* Convert extra argument to list of characters for this mode. */ + extra = makeextralist(flags, mbextra); + if (!extra) { + if (dlen && *dlen == 0) { + errno = ENOSPC; + goto out; + } + *mbdst = '\0'; /* can't create extra, return "" */ + error = 0; + goto out; } - f = getvisfun(flag); - for (start = dst; (c = *src++) != '\0'; /* empty */) { - dst = (*f)(dst, dlen, c, flag, *src, nextra); + + /* Look up which processing function to call. */ + f = getvisfun(flags); + + /* + * Main processing loop. + * Call do_Xvis processing function one character at a time + * with next character available for look-ahead. + */ + for (start = dst; len > 0; len--) { + c = *src++; + dst = (*f)(dst, c, flags, len >= 1 ? *src : L'\0', extra); if (dst == NULL) { errno = ENOSPC; - return -1; + goto out; } } - free(nextra); - if (dlen && *dlen == 0) { - errno = ENOSPC; - return -1; + + /* Terminate the string in the buffer. */ + *dst = L'\0'; + + /* + * Output loop. + * Convert wchar_t string back to multibyte output string. + * If we have hit a multi-byte conversion error on input, + * output byte-by-byte here. Else use wctomb(). + */ + len = wcslen(start); + maxolen = dlen ? *dlen : (wcslen(start) * MB_LEN_MAX + 1); + olen = 0; + for (dst = start; len > 0; len--) { + if (!cerr) + clen = wctomb(mbdst, *dst); + if (cerr || clen < 0) { + /* + * Conversion error, process as a byte(s) instead. + * Examine each byte and higher-order bytes for + * data. E.g., + * 0x000000000000a264 -> a2 64 + * 0x000000001f00a264 -> 1f 00 a2 64 + */ + clen = 0; + wmsk = 0; + for (i = sizeof(wmsk) - 1; i >= 0; i--) { + shft = i * NBBY; + bmsk = (uint64_t)0xffLL << shft; + wmsk |= bmsk; + if ((*dst & wmsk) || i == 0) + mbdst[clen++] = (char)( + (uint64_t)(*dst & bmsk) >> + shft); + } + cerr = 1; + } + /* If this character would exceed our output limit, stop. */ + if (olen + clen > (size_t)maxolen) + break; + /* Advance output pointer by number of bytes written. */ + mbdst += clen; + /* Advance buffer character pointer. */ + dst++; + /* Incrment output character count. */ + olen += clen; } - *dst = '\0'; - return (int)(dst - start); + + /* Terminate the output string. */ + *mbdst = '\0'; + + /* Pass conversion error flag out. */ + if (cerr_ptr) + *cerr_ptr = cerr; + + free(extra); + free(pdst); + free(psrc); + + return (int)olen; +out: + free(extra); + free(pdst); + free(psrc); + return error; } +#endif -int -strsvis(char *dst, const char *csrc, int flag, const char *extra) +#if !HAVE_SVIS +/* + * The "svis" variants all take an "extra" arg that is a pointer + * to a NUL-terminated list of characters to be encoded, too. + * These functions are useful e. g. to encode strings in such a + * way so that they are not interpreted by a shell. + */ + +char * +svis(char *mbdst, int c, int flags, int nextc, const char *mbextra) { - return istrsnvis(dst, NULL, csrc, flag, extra); + char cc[2]; + int ret; + + cc[0] = c; + cc[1] = nextc; + + ret = istrsenvisx(mbdst, NULL, cc, 1, flags, mbextra, NULL); + if (ret < 0) + return NULL; + return mbdst + ret; } -int -strsnvis(char *dst, size_t dlen, const char *csrc, int flag, const char *extra) +char * +snvis(char *mbdst, size_t dlen, int c, int flags, int nextc, const char *mbextra) { - return istrsnvis(dst, &dlen, csrc, flag, extra); + char cc[2]; + int ret; + + cc[0] = c; + cc[1] = nextc; + + ret = istrsenvisx(mbdst, &dlen, cc, 1, flags, mbextra, NULL); + if (ret < 0) + return NULL; + return mbdst + ret; } -static int -istrsnvisx(char *dst, size_t *dlen, const char *csrc, size_t len, int flag, - const char *extra) +int +strsvis(char *mbdst, const char *mbsrc, int flags, const char *mbextra) { - unsigned char c; - char *start; - char *nextra = NULL; - const unsigned char *src = (const unsigned char *)csrc; - visfun_t f; + return istrsenvisx(mbdst, NULL, mbsrc, 0, flags, mbextra, NULL); +} - _DIAGASSERT(dst != NULL); - _DIAGASSERT(src != NULL); - _DIAGASSERT(extra != NULL); - MAKEEXTRALIST(flag, nextra, extra); - if (! nextra) { - if (dlen && *dlen == 0) { - errno = ENOSPC; - return -1; - } - *dst = '\0'; /* can't create nextra, return "" */ - return 0; - } +int +strsnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags, const char *mbextra) +{ + return istrsenvisx(mbdst, &dlen, mbsrc, 0, flags, mbextra, NULL); +} - f = getvisfun(flag); - for (start = dst; len > 0; len--) { - c = *src++; - dst = (*f)(dst, dlen, c, flag, len > 1 ? *src : '\0', nextra); - if (dst == NULL) { - errno = ENOSPC; - return -1; - } - } - free(nextra); - if (dlen && *dlen == 0) { - errno = ENOSPC; - return -1; - } - *dst = '\0'; - return (int)(dst - start); +int +strsvisx(char *mbdst, const char *mbsrc, size_t len, int flags, const char *mbextra) +{ + return istrsenvisx(mbdst, NULL, mbsrc, len, flags, mbextra, NULL); } int -strsvisx(char *dst, const char *csrc, size_t len, int flag, const char *extra) +strsnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags, + const char *mbextra) { - return istrsnvisx(dst, NULL, csrc, len, flag, extra); + return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, mbextra, NULL); } int -strsnvisx(char *dst, size_t dlen, const char *csrc, size_t len, int flag, - const char *extra) +strsenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags, + const char *mbextra, int *cerr_ptr) { - return istrsnvisx(dst, &dlen, csrc, len, flag, extra); + return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, mbextra, cerr_ptr); } #endif @@ -464,120 +605,83 @@ strsnvisx(char *dst, size_t dlen, const char *csrc, size_t len, int flag, /* * vis - visually encode characters */ -static char * -invis(char *dst, size_t *dlen, int c, int flag, int nextc) +char * +vis(char *mbdst, int c, int flags, int nextc) { - char *extra = NULL; - unsigned char uc = (unsigned char)c; - visfun_t f; + char cc[2]; + int ret; - _DIAGASSERT(dst != NULL); + cc[0] = c; + cc[1] = nextc; - MAKEEXTRALIST(flag, extra, ""); - if (! extra) { - if (dlen && *dlen == 0) { - errno = ENOSPC; - return NULL; - } - *dst = '\0'; /* can't create extra, return "" */ - return dst; - } - f = getvisfun(flag); - dst = (*f)(dst, dlen, uc, flag, nextc, extra); - free(extra); - if (dst == NULL || (dlen && *dlen == 0)) { - errno = ENOSPC; + ret = istrsenvisx(mbdst, NULL, cc, 1, flags, "", NULL); + if (ret < 0) return NULL; - } - *dst = '\0'; - return dst; + return mbdst + ret; } char * -vis(char *dst, int c, int flag, int nextc) +nvis(char *mbdst, size_t dlen, int c, int flags, int nextc) { - return invis(dst, NULL, c, flag, nextc); -} + char cc[2]; + int ret; -char * -nvis(char *dst, size_t dlen, int c, int flag, int nextc) -{ - return invis(dst, &dlen, c, flag, nextc); -} + cc[0] = c; + cc[1] = nextc; + ret = istrsenvisx(mbdst, &dlen, cc, 1, flags, "", NULL); + if (ret < 0) + return NULL; + return mbdst + ret; +} /* - * strvis, strvisx - visually encode characters from src into dst + * strvis - visually encode characters from src into dst * * Dst must be 4 times the size of src to account for possible * expansion. The length of dst, not including the trailing NULL, * is returned. - * - * Strvisx encodes exactly len bytes from src into dst. - * This is useful for encoding a block of data. */ -static int -istrnvis(char *dst, size_t *dlen, const char *src, int flag) -{ - char *extra = NULL; - int rv; - - MAKEEXTRALIST(flag, extra, ""); - if (!extra) { - if (dlen && *dlen == 0) { - errno = ENOSPC; - return -1; - } - *dst = '\0'; /* can't create extra, return "" */ - return 0; - } - rv = istrsnvis(dst, dlen, src, flag, extra); - free(extra); - return rv; -} int -strvis(char *dst, const char *src, int flag) +strvis(char *mbdst, const char *mbsrc, int flags) { - return istrnvis(dst, NULL, src, flag); + return istrsenvisx(mbdst, NULL, mbsrc, 0, flags, "", NULL); } int -strnvis(char *dst, size_t dlen, const char *src, int flag) +strnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags) { - return istrnvis(dst, &dlen, src, flag); + return istrsenvisx(mbdst, &dlen, mbsrc, 0, flags, "", NULL); } -static int -istrnvisx(char *dst, size_t *dlen, const char *src, size_t len, int flag) -{ - char *extra = NULL; - int rv; +/* + * strvisx - visually encode characters from src into dst + * + * Dst must be 4 times the size of src to account for possible + * expansion. The length of dst, not including the trailing NULL, + * is returned. + * + * Strvisx encodes exactly len characters from src into dst. + * This is useful for encoding a block of data. + */ - MAKEEXTRALIST(flag, extra, ""); - if (!extra) { - if (dlen && *dlen == 0) { - errno = ENOSPC; - return -1; - } - *dst = '\0'; /* can't create extra, return "" */ - return 0; - } - rv = istrsnvisx(dst, dlen, src, len, flag, extra); - free(extra); - return rv; +int +strvisx(char *mbdst, const char *mbsrc, size_t len, int flags) +{ + return istrsenvisx(mbdst, NULL, mbsrc, len, flags, "", NULL); } int -strvisx(char *dst, const char *src, size_t len, int flag) +strnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags) { - return istrnvisx(dst, NULL, src, len, flag); + return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, "", NULL); } int -strnvisx(char *dst, size_t dlen, const char *src, size_t len, int flag) +strenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags, + int *cerr_ptr) { - return istrnvisx(dst, &dlen, src, len, flag); + return istrsenvisx(mbdst, &dlen, mbsrc, len, flags, "", cerr_ptr); } - #endif diff --git a/contrib/libc-vis/vis.h b/contrib/libc-vis/vis.h index cd7680e1d..beb029c27 100644 --- a/contrib/libc-vis/vis.h +++ b/contrib/libc-vis/vis.h @@ -1,4 +1,4 @@ -/* $NetBSD: vis.h,v 1.20 2012/12/14 21:36:59 christos Exp $ */ +/* $NetBSD: vis.h,v 1.21 2013/02/20 17:01:15 christos Exp $ */ /* $FreeBSD$ */ /*- @@ -96,9 +96,12 @@ int strsnvis(char *, size_t, const char *, int, const char *); int strvisx(char *, const char *, size_t, int); int strnvisx(char *, size_t, const char *, size_t, int); +int strenvisx(char *, size_t, const char *, size_t, int, int *); int strsvisx(char *, const char *, size_t, int, const char *); int strsnvisx(char *, size_t, const char *, size_t, int, const char *); +int strsenvisx(char *, size_t, const char *, size_t , int, const char *, + int *); int strunvis(char *, const char *); int strnunvis(char *, size_t, const char *); diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc index 252f6ec5d..b91ba5ad3 100644 --- a/lib/libc/gen/Makefile.inc +++ b/lib/libc/gen/Makefile.inc @@ -193,6 +193,21 @@ MLINKS+=tcsetattr.3 cfgetispeed.3 tcsetattr.3 cfgetospeed.3 \ tcsetattr.3 cfsetspeed.3 tcsetattr.3 tcgetattr.3 MLINKS+=ttyname.3 isatty.3 ttyname.3 ttyname_r.3 MLINKS+=tzset.3 tzsetwall.3 -MLINKS+=unvis.3 strunvis.3 unvis.3 strunvisx.3 -MLINKS+=vis.3 strvis.3 vis.3 strvisx.3 +MLINKS+=unvis.3 strunvis.3 \ + unvis.3 strunvisx.3 +MLINKS+=vis.3 nvis.3 \ + vis.3 snvis.3 \ + vis.3 strenvisx.3 \ + vis.3 strnunvis.3 \ + vis.3 strnunvisx.3 \ + vis.3 strnvis.3 \ + vis.3 strnvisx.3 \ + vis.3 strsenvisx.3 \ + vis.3 strsnvis.3 \ + vis.3 strsnvisx.3 \ + vis.3 strsvis.3 \ + vis.3 strsvisx.3 \ + vis.3 strvis.3 \ + vis.3 strvisx.3 \ + vis.3 svis.3 MLINKS+=wordexp.3 wordfree.3 diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map index e734f900a..6ef01a08f 100644 --- a/lib/libc/gen/Symbol.map +++ b/lib/libc/gen/Symbol.map @@ -388,10 +388,12 @@ FBSD_1.3 { pwcache_userdb; pwcache_groupdb; snvis; + strenvisx; strnunvis; strnunvisx; strnvis; strnvisx; + strsenvisx; strsnvis; strsnvisx; strsvis; -- 2.45.0