From 1ea4739cfbb843bd9ea08a25b7a1e507f6d34b0c Mon Sep 17 00:00:00 2001 From: pfg Date: Mon, 5 May 2014 14:50:53 +0000 Subject: [PATCH] MFC r265095, r265167; citrus: Avoid invalid code points. The UTF-8 decoder should not accept byte sequences which decode to unicode code positions U+D800 to U+DFFF (UTF-16 surrogates).[1] Contrary to the original OpenBSD patch, we do pass U+FFFE and U+FFFF, both values are valid "non-characters" [2] and must be mapped through UTFs. [1] http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 [2] http://www.unicode.org/faq/private_use.html Reported by: Stefan Sperling [1] Thanks to: jilles [2] Obtained from: OpenBSD git-svn-id: svn://svn.freebsd.org/base/stable/10@265361 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- lib/libc/locale/utf8.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/libc/locale/utf8.c b/lib/libc/locale/utf8.c index 40f0e1701..cffa24113 100644 --- a/lib/libc/locale/utf8.c +++ b/lib/libc/locale/utf8.c @@ -203,6 +203,13 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, errno = EILSEQ; return ((size_t)-1); } + if (wch >= 0xd800 && wch <= 0xdfff) { + /* + * Malformed input; invalid code points. + */ + errno = EILSEQ; + return ((size_t)-1); + } if (pwc != NULL) *pwc = wch; us->want = 0; -- 2.45.0