1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* escape/unescape functions.
19 * These functions perform various escaping operations, and are provided in
20 * pairs, a function to query the length of and escape existing buffers, as
21 * well as companion functions to perform the same process to memory
22 * allocated from a pool.
24 * The API is designed to have the smallest possible RAM footprint, and so
25 * will only allocate the exact amount of RAM needed for each conversion.
28 #include "apr_escape.h"
29 #include "apr_escape_test_char.h"
30 #include "apr_encode_private.h"
32 #include "apr_strings.h"
34 /* we assume the folks using this ensure 0 <= c < 256... which means
35 * you need a cast to (unsigned char) first, you can't just plug a
36 * char in here and get it to work, because if char is signed then it
37 * will first be sign extended.
39 #define TEST_CHAR(c, f) (test_char_table[(unsigned)(c)] & (f))
41 APR_DECLARE(apr_status_t) apr_escape_shell(char *escaped, const char *str,
42 apr_ssize_t slen, apr_size_t *len)
45 const unsigned char *s;
49 d = (unsigned char *) escaped;
50 s = (const unsigned char *) str;
54 for (; *s && slen; ++s, slen--) {
55 #if defined(OS2) || defined(WIN32)
57 * Newlines to Win32/OS2 CreateProcess() are ill advised.
58 * Convert them to spaces since they are effectively white
59 * space to most applications
61 if (*s == '\r' || *s == '\n') {
69 if (TEST_CHAR(*s, T_ESCAPE_SHELL_CMD)) {
80 for (; *s && slen; ++s, slen--) {
81 if (TEST_CHAR(*s, T_ESCAPE_SHELL_CMD)) {
100 APR_DECLARE(const char *) apr_pescape_shell(apr_pool_t *p, const char *str)
104 switch (apr_escape_shell(NULL, str, APR_ESCAPE_STRING, &len)) {
106 char *cmd = apr_palloc(p, len);
107 apr_escape_shell(cmd, str, APR_ESCAPE_STRING, NULL);
118 static char x2c(const char *what)
122 #if !APR_CHARSET_EBCDIC
124 ((what[0] >= 'A') ? ((what[0] & 0xdf) - 'A') + 10 : (what[0] - '0'));
126 digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A') + 10 : (what[1] - '0'));
127 #else /*APR_CHARSET_EBCDIC*/
134 digit = ENCODE_TO_NATIVE[0xFF & strtol(xstr, NULL, 16)];
135 #endif /*APR_CHARSET_EBCDIC*/
139 APR_DECLARE(apr_status_t) apr_unescape_url(char *escaped, const char *url,
140 apr_ssize_t slen, const char *forbid, const char *reserved, int plus,
145 const char *s = (const char *) url;
146 char *d = (char *) escaped;
147 register int badesc, badpath;
157 for (; *s && slen; ++s, d++, slen--) {
158 if (plus && *s == '+') {
162 else if (*s != '%') {
166 if (!apr_isxdigit(*(s + 1)) || !apr_isxdigit(*(s + 2))) {
172 decoded = x2c(s + 1);
173 if ((decoded == '\0')
174 || (forbid && strchr(forbid, decoded))) {
180 else if (reserved && strchr(reserved, decoded)) {
199 for (; *s && slen; ++s, slen--) {
200 if (plus && *s == '+') {
203 else if (*s != '%') {
204 /* character unchanged */
207 if (!apr_isxdigit(*(s + 1)) || !apr_isxdigit(*(s + 2))) {
212 decoded = x2c(s + 1);
213 if ((decoded == '\0')
214 || (forbid && strchr(forbid, decoded))) {
219 else if (reserved && strchr(reserved, decoded)) {
252 APR_DECLARE(const char *) apr_punescape_url(apr_pool_t *p, const char *url,
253 const char *forbid, const char *reserved, int plus)
257 switch (apr_unescape_url(NULL, url, APR_ESCAPE_STRING, forbid, reserved,
260 char *buf = apr_palloc(p, len);
261 apr_unescape_url(buf, url, APR_ESCAPE_STRING, forbid, reserved, plus,
277 /* c2x takes an unsigned, and expects the caller has guaranteed that
278 * 0 <= what < 256... which usually means that you have to cast to
279 * unsigned char first, because (unsigned)(char)(x) first goes through
280 * signed extension to an int before the unsigned cast.
282 * The reason for this assumption is to assist gcc code generation --
283 * the unsigned char -> unsigned extension is already done earlier in
284 * both uses of this code, so there's no need to waste time doing it
287 static const char c2x_table[] = "0123456789abcdef";
289 static APR_INLINE unsigned char *c2x(unsigned what, unsigned char prefix,
290 unsigned char *where)
292 #if APR_CHARSET_EBCDIC
293 what = convert_e2a[(unsigned char)what];
294 #endif /*APR_CHARSET_EBCDIC*/
296 *where++ = c2x_table[what >> 4];
297 *where++ = c2x_table[what & 0xf];
301 APR_DECLARE(apr_status_t) apr_escape_path_segment(char *escaped,
302 const char *str, apr_ssize_t slen, apr_size_t *len)
306 const unsigned char *s = (const unsigned char *) str;
307 unsigned char *d = (unsigned char *) escaped;
312 while ((c = *s) && slen) {
313 if (TEST_CHAR(c, T_ESCAPE_PATH_SEGMENT)) {
328 while ((c = *s) && slen) {
329 if (TEST_CHAR(c, T_ESCAPE_PATH_SEGMENT)) {
350 APR_DECLARE(const char *) apr_pescape_path_segment(apr_pool_t *p,
355 switch (apr_escape_path_segment(NULL, str, APR_ESCAPE_STRING, &len)) {
357 char *cmd = apr_palloc(p, len);
358 apr_escape_path_segment(cmd, str, APR_ESCAPE_STRING, NULL);
369 APR_DECLARE(apr_status_t) apr_escape_path(char *escaped, const char *path,
370 apr_ssize_t slen, int partial, apr_size_t *len)
374 const unsigned char *s = (const unsigned char *) path;
375 unsigned char *d = (unsigned char *) escaped;
383 const char *colon = strchr(path, ':');
384 const char *slash = strchr(path, '/');
386 if (colon && (!slash || colon < slash)) {
396 while ((c = *s) && slen) {
397 if (TEST_CHAR(c, T_OS_ESCAPE_PATH)) {
412 while ((c = *s) && slen) {
413 if (TEST_CHAR(c, T_OS_ESCAPE_PATH)) {
433 APR_DECLARE(const char *) apr_pescape_path(apr_pool_t *p, const char *str,
438 switch (apr_escape_path(NULL, str, APR_ESCAPE_STRING, partial, &len)) {
440 char *path = apr_palloc(p, len);
441 apr_escape_path(path, str, APR_ESCAPE_STRING, partial, NULL);
452 APR_DECLARE(apr_status_t) apr_escape_urlencoded(char *escaped, const char *str,
453 apr_ssize_t slen, apr_size_t *len)
457 const unsigned char *s = (const unsigned char *) str;
458 unsigned char *d = (unsigned char *) escaped;
463 while ((c = *s) && slen) {
464 if (TEST_CHAR(c, T_ESCAPE_URLENCODED)) {
483 while ((c = *s) && slen) {
484 if (TEST_CHAR(c, T_ESCAPE_URLENCODED)) {
508 APR_DECLARE(const char *) apr_pescape_urlencoded(apr_pool_t *p, const char *str)
512 switch (apr_escape_urlencoded(NULL, str, APR_ESCAPE_STRING, &len)) {
514 char *encoded = apr_palloc(p, len);
515 apr_escape_urlencoded(encoded, str, APR_ESCAPE_STRING, NULL);
526 APR_DECLARE(apr_status_t) apr_escape_entity(char *escaped, const char *str,
527 apr_ssize_t slen, int toasc, apr_size_t *len)
531 const unsigned char *s = (const unsigned char *) str;
532 unsigned char *d = (unsigned char *) escaped;
537 while ((c = *s) && slen) {
538 if (TEST_CHAR(c, T_ESCAPE_XML)) {
541 memcpy(d, ">", 4);
547 memcpy(d, "<", 4);
553 memcpy(d, "&", 5);
559 memcpy(d, """, 6);
565 memcpy(d, "'", 6);
573 else if (toasc && !apr_isascii(c)) {
574 int offset = apr_snprintf((char *) d, 6, "&#%3.3d;", c);
589 while ((c = *s) && slen) {
590 if (TEST_CHAR(c, T_ESCAPE_XML)) {
615 else if (toasc && !apr_isascii(c)) {
617 size += apr_snprintf(buf, 6, "&#%3.3d;", c);
639 APR_DECLARE(const char *) apr_pescape_entity(apr_pool_t *p, const char *str,
644 switch (apr_escape_entity(NULL, str, APR_ESCAPE_STRING, toasc, &len)) {
646 char *cmd = apr_palloc(p, len);
647 apr_escape_entity(cmd, str, APR_ESCAPE_STRING, toasc, NULL);
658 /* maximum length of any ISO-LATIN-1 HTML entity name. */
659 #define MAXENTLEN (6)
661 APR_DECLARE(apr_status_t) apr_unescape_entity(char *unescaped, const char *str,
662 apr_ssize_t slen, apr_size_t *len)
670 static const char * const entlist[MAXENTLEN + 1] =
674 "lt\074gt\076", /* 2 */
675 "amp\046ETH\320eth\360", /* 3 */
676 "quot\042Auml\304Euml\313Iuml\317Ouml\326Uuml\334auml\344euml"
677 "\353iuml\357ouml\366uuml\374yuml\377", /* 4 */
678 "Acirc\302Aring\305AElig\306Ecirc\312Icirc\316Ocirc\324Ucirc"
679 "\333THORN\336szlig\337acirc\342aring\345aelig\346ecirc\352"
680 "icirc\356ocirc\364ucirc\373thorn\376", /* 5 */
681 "Agrave\300Aacute\301Atilde\303Ccedil\307Egrave\310Eacute\311"
682 "Igrave\314Iacute\315Ntilde\321Ograve\322Oacute\323Otilde"
683 "\325Oslash\330Ugrave\331Uacute\332Yacute\335agrave\340"
684 "aacute\341atilde\343ccedil\347egrave\350eacute\351igrave"
685 "\354iacute\355ntilde\361ograve\362oacute\363otilde\365"
686 "oslash\370ugrave\371uacute\372yacute\375" /* 6 */
691 for (; *s != '\0' && slen; s++, d++, size++, slen--) {
696 /* find end of entity */
697 for (i = 1; s[i] != ';' && s[i] != '\0' && (slen - i) != 0;
702 if (s[i] == '\0' || (slen - i) == 0) { /* treat as normal data */
707 /* is it numeric ? */
709 for (j = 2, val = 0; j < i && apr_isdigit(s[j]); j++) {
710 val = val * 10 + s[j] - '0';
713 if (j < i || val <= 8 || (val >= 11 && val <= 31)
714 || (val >= 127 && val <= 160) || val >= 256) {
715 d--; /* no data to output */
719 *d = ENCODE_TO_ASCII(val);
725 if (j > MAXENTLEN || entlist[j] == NULL) {
728 continue; /* skip it */
730 for (ents = entlist[j]; *ents != '\0'; ents += i) {
731 if (strncmp(s + 1, ents, j) == 0) {
737 *d = '&'; /* unknown */
740 *d = ENCODE_TO_ASCII(((const unsigned char *) ents)[j]);
750 for (; *s != '\0' && slen; s++, size++, slen--) {
754 /* find end of entity */
755 for (i = 1; s[i] != ';' && s[i] != '\0' && (slen - i) != 0;
760 if (s[i] == '\0' || (slen - i) == 0) { /* treat as normal data */
764 /* is it numeric ? */
766 for (j = 2, val = 0; j < i && apr_isdigit(s[j]); j++) {
767 val = val * 10 + s[j] - '0';
770 if (j < i || val <= 8 || (val >= 11 && val <= 31)
771 || (val >= 127 && val <= 160) || val >= 256) {
772 /* no data to output */
781 if (j > MAXENTLEN || entlist[j] == NULL) {
783 continue; /* skip it */
785 for (ents = entlist[j]; *ents != '\0'; ents += i) {
786 if (strncmp(s + 1, ents, j) == 0) {
814 APR_DECLARE(const char *) apr_punescape_entity(apr_pool_t *p, const char *str)
818 switch (apr_unescape_entity(NULL, str, APR_ESCAPE_STRING, &len)) {
820 char *cmd = apr_palloc(p, len);
821 apr_unescape_entity(cmd, str, APR_ESCAPE_STRING, NULL);
832 APR_DECLARE(apr_status_t) apr_escape_echo(char *escaped, const char *str,
833 apr_ssize_t slen, int quote, apr_size_t *len)
837 const unsigned char *s = (const unsigned char *) str;
838 unsigned char *d = (unsigned char *) escaped;
843 while ((c = *s) && slen) {
844 if (TEST_CHAR(c, T_ESCAPE_ECHO)) {
916 while ((c = *s) && slen) {
917 if (TEST_CHAR(c, T_ESCAPE_ECHO)) {
962 APR_DECLARE(const char *) apr_pescape_echo(apr_pool_t *p, const char *str,
967 switch (apr_escape_echo(NULL, str, APR_ESCAPE_STRING, quote, &len)) {
969 char *cmd = apr_palloc(p, len);
970 apr_escape_echo(cmd, str, APR_ESCAPE_STRING, quote, NULL);
981 APR_DECLARE(apr_status_t) apr_escape_hex(char *dest, const void *src,
982 apr_size_t srclen, int colon, apr_size_t *len)
984 const unsigned char *in = src;
992 for (size = 0; size < srclen; size++) {
996 *dest++ = c2x_table[in[size] >> 4];
997 *dest++ = c2x_table[in[size] & 0xf];
1003 if (colon && srclen) {
1007 *len = srclen * 2 + 1;
1014 APR_DECLARE(const char *) apr_pescape_hex(apr_pool_t *p, const void *src,
1015 apr_size_t srclen, int colon)
1019 switch (apr_escape_hex(NULL, src, srclen, colon, &len)) {
1021 char *cmd = apr_palloc(p, len);
1022 apr_escape_hex(cmd, src, srclen, colon, NULL);
1025 case APR_NOTFOUND: {
1033 APR_DECLARE(apr_status_t) apr_unescape_hex(void *dest, const char *str,
1034 apr_ssize_t slen, int colon, apr_size_t *len)
1036 apr_size_t size = 0;
1038 const unsigned char *s = (const unsigned char *) str;
1039 unsigned char *d = (unsigned char *) dest;
1041 unsigned char u = 0;
1045 while ((c = *s) && slen) {
1051 if (colon && c == ':' && !flip) {
1056 else if (apr_isdigit(c)) {
1059 else if (apr_isupper(c) && c <= 'F') {
1060 u |= c - ('A' - 10);
1062 else if (apr_islower(c) && c <= 'f') {
1063 u |= c - ('a' - 10);
1084 while ((c = *s) && slen) {
1086 if (colon && c == ':' && !flip) {
1091 else if (apr_isdigit(c)) {
1094 else if (apr_isupper(c) && c <= 'F') {
1097 else if (apr_islower(c) && c <= 'f') {
1119 return APR_NOTFOUND;
1125 APR_DECLARE(const void *) apr_punescape_hex(apr_pool_t *p, const char *str,
1126 int colon, apr_size_t *len)
1130 switch (apr_unescape_hex(NULL, str, APR_ESCAPE_STRING, colon, &size)) {
1132 void *cmd = apr_palloc(p, size);
1133 apr_unescape_hex(cmd, str, APR_ESCAPE_STRING, colon, len);
1137 case APR_NOTFOUND: {
1145 APR_DECLARE(apr_status_t) apr_escape_ldap(char *escaped, const void *str,
1146 apr_ssize_t slen, int flags, apr_size_t *len)
1148 apr_size_t size = 1;
1150 const unsigned char *s = (const unsigned char *) str;
1151 unsigned char *d = (unsigned char *) escaped;
1156 while (((c = *s) && slen) || (slen > 0)) {
1157 if (((flags & APR_ESCAPE_LDAP_DN) && TEST_CHAR(c, T_ESCAPE_LDAP_DN))
1158 || ((flags & APR_ESCAPE_LDAP_FILTER) && TEST_CHAR(c, T_ESCAPE_LDAP_FILTER))) {
1159 d = c2x(c, '\\', d);
1173 while (((c = *s) && slen) || (slen > 0)) {
1174 if (((flags & APR_ESCAPE_LDAP_DN) && TEST_CHAR(c, T_ESCAPE_LDAP_DN))
1175 || ((flags & APR_ESCAPE_LDAP_FILTER) && TEST_CHAR(c, T_ESCAPE_LDAP_FILTER))) {
1190 return APR_NOTFOUND;
1196 APR_DECLARE(const char *) apr_pescape_ldap(apr_pool_t *p, const void *src,
1197 apr_ssize_t srclen, int flags)
1201 switch (apr_escape_ldap(NULL, src, srclen, flags, &len)) {
1203 char *encoded = apr_palloc(p, len);
1204 apr_escape_ldap(encoded, src, srclen, flags, NULL);
1207 case APR_NOTFOUND: {