From 45916e7c91ce4e8b6dd5ae9ade8349f51c97d417 Mon Sep 17 00:00:00 2001 From: Xin LI Date: Sun, 27 Mar 2022 22:19:28 -0700 Subject: [PATCH] Vendor import of expat 2.4.7. --- Changes | 34 +++++++++++ Makefile.am | 4 +- Makefile.in | 4 +- README.md | 2 +- configure.ac | 2 +- doc/reference.html | 20 ++++-- doc/xmlwf.1 | 2 +- doc/xmlwf.xml | 2 +- lib/expat.h | 22 +++++-- lib/xmlparse.c | 147 ++++++++++++++++++++++++++++++++++++++++++--- tests/runtests.c | 25 +++++--- 11 files changed, 232 insertions(+), 32 deletions(-) diff --git a/Changes b/Changes index 40127e1b76f..95f697b39a4 100644 --- a/Changes +++ b/Changes @@ -2,6 +2,40 @@ NOTE: We are looking for help with a few things: https://github.com/libexpat/libexpat/labels/help%20wanted If you can help, please get in touch. Thanks! +Release 2.4.7 Fri March 4 2022 + Bug fixes: + #572 #577 Relax fix to CVE-2022-25236 (introduced with release 2.4.5) + with regard to all valid URI characters (RFC 3986), + i.e. the following set (excluding whitespace): + ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz + 0123456789 % -._~ :/?#[]@ !$&'()*+,;= + + Other changes: + #555 #570 #581 CMake|Windows: Store Expat version in the DLL + #577 Document consequences of namespace separator choices not just + in doc/reference.html but also in header + #577 Document Expat's lack of validation of namespace URIs against + RFC 3986, and that the XML 1.0r4 specification doesn't + require Expat to validate namespace URIs, and that Expat + may do more in that regard in future releases. + If you find need for strict RFC 3986 URI validation on + application level today, https://uriparser.github.io/ may + be of interest. + #579 Fix documentation of XML_EndDoctypeDeclHandler in + #575 Document that a call to XML_FreeContentModel can be done at + a later time from outside the element declaration handler + #574 Make hardcoded namespace URIs easier to find in code + #573 Update documentation on use of XML_POOR_ENTOPY on Solaris + #569 #571 tests: Resolve use of macros NAN and INFINITY for GNU G++ + 4.8.2 on Solaris. + #578 #580 Version info bumped from 9:6:8 to 9:7:8; + see https://verbump.de/ for what these numbers do + + Special thanks to: + Jeffrey Walton + Johnny Jazeix + Thijs Schreijer + Release 2.4.6 Sun February 20 2022 Bug fixes: #566 Fix a regression introduced by the fix for CVE-2022-25313 diff --git a/Makefile.am b/Makefile.am index e6e7971ec8b..37ae3738edd 100644 --- a/Makefile.am +++ b/Makefile.am @@ -8,6 +8,7 @@ # # Copyright (c) 2017-2021 Sebastian Pipping # Copyright (c) 2018 KangLin +# Copyright (c) 2022 Johnny Jazeix # Licensed under the MIT license: # # Permission is hereby granted, free of charge, to any person obtaining @@ -82,7 +83,8 @@ _EXTRA_DIST_WINDOWS = \ win32/build_expat_iss.bat \ win32/expat.iss \ win32/MANIFEST.txt \ - win32/README.txt + win32/README.txt \ + win32/version.rc EXTRA_DIST = \ $(_EXTRA_DIST_CMAKE) \ diff --git a/Makefile.in b/Makefile.in index 7c6551fca2c..ea8c72e80ea 100644 --- a/Makefile.in +++ b/Makefile.in @@ -24,6 +24,7 @@ # # Copyright (c) 2017-2021 Sebastian Pipping # Copyright (c) 2018 KangLin +# Copyright (c) 2022 Johnny Jazeix # Licensed under the MIT license: # # Permission is hereby granted, free of charge, to any person obtaining @@ -475,7 +476,8 @@ _EXTRA_DIST_WINDOWS = \ win32/build_expat_iss.bat \ win32/expat.iss \ win32/MANIFEST.txt \ - win32/README.txt + win32/README.txt \ + win32/version.rc EXTRA_DIST = \ $(_EXTRA_DIST_CMAKE) \ diff --git a/README.md b/README.md index 959c4a6e94a..6bfbf130dbf 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![Downloads GitHub](https://img.shields.io/github/downloads/libexpat/libexpat/total?label=Downloads%20GitHub)](https://github.com/libexpat/libexpat/releases) -# Expat, Release 2.4.6 +# Expat, Release 2.4.7 This is Expat, a C library for parsing XML, started by [James Clark](https://en.wikipedia.org/wiki/James_Clark_%28programmer%29) in 1997. diff --git a/configure.ac b/configure.ac index 5175487bb4b..7a7f013febc 100644 --- a/configure.ac +++ b/configure.ac @@ -82,7 +82,7 @@ dnl If the API changes incompatibly set LIBAGE back to 0 dnl LIBCURRENT=9 # sync -LIBREVISION=6 # with +LIBREVISION=7 # with LIBAGE=8 # CMakeLists.txt! AC_CONFIG_HEADERS([expat_config.h]) diff --git a/doc/reference.html b/doc/reference.html index 26db5a63547..87ace02d456 100644 --- a/doc/reference.html +++ b/doc/reference.html @@ -18,6 +18,7 @@ Copyright (c) 2017 Jakub Wilk Copyright (c) 2021 Tomas Korbar Copyright (c) 2021 Nicolas Cavallari + Copyright (c) 2022 Thijs Schreijer Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -49,7 +50,7 @@

The Expat XML Parser - Release 2.4.6 + Release 2.4.7

@@ -974,6 +975,14 @@ the local part will be concatenated without any separator - this is intended to support RDF processors. It is a programming error to use the null separator with namespace triplets.
+

Note: +Expat does not validate namespace URIs (beyond encoding) +against RFC 3986 today (and is not required to do so with regard to +the XML 1.0 namespaces specification) but it may start doing that +in future releases. Before that, an application using Expat must +be ready to receive namespace URIs containing non-URI characters. +

+

XML_ParserCreate_MM

 XML_Parser XMLCALL
@@ -1808,10 +1817,11 @@ struct XML_cp {
 

Sets a handler for element declarations in a DTD. The handler gets called with the name of the element in the declaration and a pointer -to a structure that contains the element model. It is the -application's responsibility to free this data structure using -XML_FreeContentModel.

+to a structure that contains the element model. It's the user code's +responsibility to free model when finished with it. See +XML_FreeContentModel. +There is no need to free the model from the handler, it can be kept +around and freed at a later stage.

The model argument is the root of a tree of XML_Content nodes. If type equals diff --git a/doc/xmlwf.1 b/doc/xmlwf.1 index f931d63d4e1..aa024e0abed 100644 --- a/doc/xmlwf.1 +++ b/doc/xmlwf.1 @@ -5,7 +5,7 @@ \\$2 \(la\\$1\(ra\\$3 .. .if \n(.g .mso www.tmac -.TH XMLWF 1 "February 20, 2022" "" "" +.TH XMLWF 1 "March 4, 2022" "" "" .SH NAME xmlwf \- Determines if an XML document is well-formed .SH SYNOPSIS diff --git a/doc/xmlwf.xml b/doc/xmlwf.xml index 79ed58569ea..8b43a11ef3a 100644 --- a/doc/xmlwf.xml +++ b/doc/xmlwf.xml @@ -21,7 +21,7 @@ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [ Scott"> Bronson"> - February 20, 2022"> + March 4, 2022"> 1"> bronson@rinspin.com"> diff --git a/lib/expat.h b/lib/expat.h index 46a0e1bcd22..c9214f64070 100644 --- a/lib/expat.h +++ b/lib/expat.h @@ -15,6 +15,7 @@ Copyright (c) 2016 Cristian Rodríguez Copyright (c) 2016 Thomas Beutlich Copyright (c) 2017 Rhodri James + Copyright (c) 2022 Thijs Schreijer Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -174,8 +175,10 @@ struct XML_cp { }; /* This is called for an element declaration. See above for - description of the model argument. It's the caller's responsibility - to free model when finished with it. + description of the model argument. It's the user code's responsibility + to free model when finished with it. See XML_FreeContentModel. + There is no need to free the model from the handler, it can be kept + around and freed at a later stage. */ typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData, const XML_Char *name, @@ -237,6 +240,17 @@ XML_ParserCreate(const XML_Char *encoding); and the local part will be concatenated without any separator. It is a programming error to use the separator '\0' with namespace triplets (see XML_SetReturnNSTriplet). + If a namespace separator is chosen that can be part of a URI or + part of an XML name, splitting an expanded name back into its + 1, 2 or 3 original parts on application level in the element handler + may end up vulnerable, so these are advised against; sane choices for + a namespace separator are e.g. '\n' (line feed) and '|' (pipe). + + Note that Expat does not validate namespace URIs (beyond encoding) + against RFC 3986 today (and is not required to do so with regard to + the XML 1.0 namespaces specification) but it may start doing that + in future releases. Before that, an application using Expat must + be ready to receive namespace URIs containing non-URI characters. */ XMLPARSEAPI(XML_Parser) XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); @@ -317,7 +331,7 @@ typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData, const XML_Char *pubid, int has_internal_subset); -/* This is called for the start of the DOCTYPE declaration when the +/* This is called for the end of the DOCTYPE declaration when the closing > is encountered, but after processing any external subset. */ @@ -1041,7 +1055,7 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold( */ #define XML_MAJOR_VERSION 2 #define XML_MINOR_VERSION 4 -#define XML_MICRO_VERSION 6 +#define XML_MICRO_VERSION 7 #ifdef __cplusplus } diff --git a/lib/xmlparse.c b/lib/xmlparse.c index 7db28d07acb..05216d997b0 100644 --- a/lib/xmlparse.c +++ b/lib/xmlparse.c @@ -1,4 +1,4 @@ -/* a30d2613dcfdef81475a9d1a349134d2d42722172fdaa7d5bb12ed2aa74b9596 (2.4.6+) +/* fcb1a62fefa945567301146eb98e3ad3413e823a41c4378e84e8b6b6f308d824 (2.4.7+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -34,6 +34,7 @@ Copyright (c) 2019 Vadim Zeitlin Copyright (c) 2021 Dong-hee Na Copyright (c) 2022 Samanta Navarro + Copyright (c) 2022 Jeffrey Walton Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -133,7 +134,7 @@ * BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \ * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ - * Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \ + * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ * Windows >=Vista (rand_s): _WIN32. \ \ If insist on not using any of these, bypass this error by defining \ @@ -722,6 +723,7 @@ XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { return XML_ParserCreate_MM(encodingName, NULL, tmp); } +// "xml=http://www.w3.org/XML/1998/namespace" static const XML_Char implicitContext[] = {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, @@ -3704,12 +3706,124 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, return XML_ERROR_NONE; } +static XML_Bool +is_rfc3986_uri_char(XML_Char candidate) { + // For the RFC 3986 ANBF grammar see + // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A + + switch (candidate) { + // From rule "ALPHA" (uppercase half) + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + + // From rule "ALPHA" (lowercase half) + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + + // From rule "DIGIT" + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + + // From rule "pct-encoded" + case '%': + + // From rule "unreserved" + case '-': + case '.': + case '_': + case '~': + + // From rule "gen-delims" + case ':': + case '/': + case '?': + case '#': + case '[': + case ']': + case '@': + + // From rule "sub-delims" + case '!': + case '$': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case ';': + case '=': + return XML_TRUE; + + default: + return XML_FALSE; + } +} + /* addBinding() overwrites the value of prefix->binding without checking. Therefore one must keep track of the old value outside of addBinding(). */ static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr) { + // "http://www.w3.org/XML/1998/namespace" static const XML_Char xmlNamespace[] = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, @@ -3720,6 +3834,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'}; static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1; + // "http://www.w3.org/2000/xmlns/" static const XML_Char xmlnsNamespace[] = {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, @@ -3760,14 +3875,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) isXMLNS = XML_FALSE; - // NOTE: While Expat does not validate namespace URIs against RFC 3986, - // we have to at least make sure that the XML processor on top of - // Expat (that is splitting tag names by namespace separator into - // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused - // by an attacker putting additional namespace separator characters - // into namespace declarations. That would be ambiguous and not to - // be expected. - if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) { + // NOTE: While Expat does not validate namespace URIs against RFC 3986 + // today (and is not REQUIRED to do so with regard to the XML 1.0 + // namespaces specification) we have to at least make sure, that + // the application on top of Expat (that is likely splitting expanded + // element names ("qualified names") of form + // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces + // in its element handler code) cannot be confused by an attacker + // putting additional namespace separator characters into namespace + // declarations. That would be ambiguous and not to be expected. + // + // While the HTML API docs of function XML_ParserCreateNS have been + // advising against use of a namespace separator character that can + // appear in a URI for >20 years now, some widespread applications + // are using URI characters (':' (colon) in particular) for a + // namespace separator, in practice. To keep these applications + // functional, we only reject namespaces URIs containing the + // application-chosen namespace separator if the chosen separator + // is a non-URI character with regard to RFC 3986. + if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator) + && ! is_rfc3986_uri_char(uri[len])) { return XML_ERROR_SYNTAX; } } diff --git a/tests/runtests.c b/tests/runtests.c index 6d6f66909a1..3309bbaa076 100644 --- a/tests/runtests.c +++ b/tests/runtests.c @@ -54,7 +54,6 @@ #include #include #include /* intptr_t uint64_t */ -#include /* NAN, INFINITY, isnan */ #if ! defined(__cplusplus) # include @@ -7407,16 +7406,18 @@ START_TEST(test_ns_separator_in_uri) { struct test_case { enum XML_Status expectedStatus; const char *doc; + XML_Char namesep; }; struct test_case cases[] = { - {XML_STATUS_OK, ""}, - {XML_STATUS_ERROR, ""}, + {XML_STATUS_OK, "", XCS('\n')}, + {XML_STATUS_ERROR, "", XCS('\n')}, + {XML_STATUS_OK, "", XCS(':')}, }; size_t i = 0; size_t failCount = 0; for (; i < sizeof(cases) / sizeof(cases[0]); i++) { - XML_Parser parser = XML_ParserCreateNS(NULL, '\n'); + XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep); XML_SetElementHandler(parser, dummy_start_element, dummy_end_element); if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc), /*isFinal*/ XML_TRUE) @@ -7588,7 +7589,7 @@ START_TEST(test_misc_version) { fail("Version mismatch"); #if ! defined(XML_UNICODE) || defined(XML_UNICODE_WCHAR_T) - if (xcstrcmp(version_text, XCS("expat_2.4.6"))) /* needs bump on releases */ + if (xcstrcmp(version_text, XCS("expat_2.4.7"))) /* needs bump on releases */ fail("XML_*_VERSION in expat.h out of sync?\n"); #else /* If we have XML_UNICODE defined but not XML_UNICODE_WCHAR_T @@ -11762,6 +11763,16 @@ START_TEST(test_accounting_precision) { } END_TEST +static float +portableNAN() { + return strtof("nan", NULL); +} + +static float +portableINFINITY() { + return strtof("infinity", NULL); +} + START_TEST(test_billion_laughs_attack_protection_api) { XML_Parser parserWithoutParent = XML_ParserCreate(NULL); XML_Parser parserWithParent @@ -11780,7 +11791,7 @@ START_TEST(test_billion_laughs_attack_protection_api) { == XML_TRUE) fail("Call with non-root parser is NOT supposed to succeed"); if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( - parserWithoutParent, NAN) + parserWithoutParent, portableNAN()) == XML_TRUE) fail("Call with NaN limit is NOT supposed to succeed"); if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( @@ -11802,7 +11813,7 @@ START_TEST(test_billion_laughs_attack_protection_api) { == XML_FALSE) fail("Call with positive limit >=1.0 is supposed to succeed"); if (XML_SetBillionLaughsAttackProtectionMaximumAmplification( - parserWithoutParent, INFINITY) + parserWithoutParent, portableINFINITY()) == XML_FALSE) fail("Call with positive limit >=1.0 is supposed to succeed"); -- 2.45.0