]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - contrib/apr-util/uri/apr_uri.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / contrib / apr-util / uri / apr_uri.c
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2  * contributor license agreements.  See the NOTICE file distributed with
3  * this work for additional information regarding copyright ownership.
4  * The ASF licenses this file to You under the Apache License, Version 2.0
5  * (the "License"); you may not use this file except in compliance with
6  * the License.  You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * apr_uri.c: URI related utility things
19  * 
20  */
21
22 #include <stdlib.h>
23
24 #include "apu.h"
25 #include "apr.h"
26 #include "apr_general.h"
27 #include "apr_strings.h"
28
29 #define APR_WANT_STRFUNC
30 #include "apr_want.h"
31
32 #include "apr_uri.h"
33
34 typedef struct schemes_t schemes_t;
35
36 /** Structure to store various schemes and their default ports */
37 struct schemes_t {
38     /** The name of the scheme */
39     const char *name;
40     /** The default port for the scheme */
41     apr_port_t default_port;
42 };
43
44 /* Some WWW schemes and their default ports; this is basically /etc/services */
45 /* This will become global when the protocol abstraction comes */
46 /* As the schemes are searched by a linear search, */
47 /* they are sorted by their expected frequency */
48 static schemes_t schemes[] =
49 {
50     {"http",     APR_URI_HTTP_DEFAULT_PORT},
51     {"ftp",      APR_URI_FTP_DEFAULT_PORT},
52     {"https",    APR_URI_HTTPS_DEFAULT_PORT},
53     {"gopher",   APR_URI_GOPHER_DEFAULT_PORT},
54     {"ldap",     APR_URI_LDAP_DEFAULT_PORT},
55     {"nntp",     APR_URI_NNTP_DEFAULT_PORT},
56     {"snews",    APR_URI_SNEWS_DEFAULT_PORT},
57     {"imap",     APR_URI_IMAP_DEFAULT_PORT},
58     {"pop",      APR_URI_POP_DEFAULT_PORT},
59     {"sip",      APR_URI_SIP_DEFAULT_PORT},
60     {"rtsp",     APR_URI_RTSP_DEFAULT_PORT},
61     {"wais",     APR_URI_WAIS_DEFAULT_PORT},
62     {"z39.50r",  APR_URI_WAIS_DEFAULT_PORT},
63     {"z39.50s",  APR_URI_WAIS_DEFAULT_PORT},
64     {"prospero", APR_URI_PROSPERO_DEFAULT_PORT},
65     {"nfs",      APR_URI_NFS_DEFAULT_PORT},
66     {"tip",      APR_URI_TIP_DEFAULT_PORT},
67     {"acap",     APR_URI_ACAP_DEFAULT_PORT},
68     {"telnet",   APR_URI_TELNET_DEFAULT_PORT},
69     {"ssh",      APR_URI_SSH_DEFAULT_PORT},
70     { NULL, 0xFFFF }     /* unknown port */
71 };
72
73 APU_DECLARE(apr_port_t) apr_uri_port_of_scheme(const char *scheme_str)
74 {
75     schemes_t *scheme;
76
77     if (scheme_str) {
78         for (scheme = schemes; scheme->name != NULL; ++scheme) {
79             if (strcasecmp(scheme_str, scheme->name) == 0) {
80                 return scheme->default_port;
81             }
82         }
83     }
84     return 0;
85 }
86
87 /* Unparse a apr_uri_t structure to an URI string.
88  * Optionally suppress the password for security reasons.
89  */
90 APU_DECLARE(char *) apr_uri_unparse(apr_pool_t *p, 
91                                     const apr_uri_t *uptr, 
92                                     unsigned flags)
93 {
94     char *ret = "";
95
96     /* If suppressing the site part, omit both user name & scheme://hostname */
97     if (!(flags & APR_URI_UNP_OMITSITEPART)) {
98
99         /* Construct a "user:password@" string, honoring the passed
100          * APR_URI_UNP_ flags: */
101         if (uptr->user || uptr->password) {
102             ret = apr_pstrcat(p,
103                       (uptr->user     && !(flags & APR_URI_UNP_OMITUSER))
104                           ? uptr->user : "",
105                       (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD))
106                           ? ":" : "",
107                       (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD))
108                           ? ((flags & APR_URI_UNP_REVEALPASSWORD)
109                               ? uptr->password : "XXXXXXXX")
110                           : "",
111                       ((uptr->user     && !(flags & APR_URI_UNP_OMITUSER)) ||
112                        (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD)))
113                           ? "@" : "", 
114                       NULL);
115         }
116
117         /* Construct scheme://site string */
118         if (uptr->hostname) {
119             int is_default_port;
120             const char *lbrk = "", *rbrk = "";
121
122             if (strchr(uptr->hostname, ':')) { /* v6 literal */
123                 lbrk = "[";
124                 rbrk = "]";
125             }
126
127             is_default_port =
128                 (uptr->port_str == NULL ||
129                  uptr->port == 0 ||
130                  uptr->port == apr_uri_port_of_scheme(uptr->scheme));
131
132             ret = apr_pstrcat(p, "//", ret, lbrk, uptr->hostname, rbrk,
133                         is_default_port ? "" : ":",
134                         is_default_port ? "" : uptr->port_str,
135                         NULL);
136         }
137         if (uptr->scheme) {
138             ret = apr_pstrcat(p, uptr->scheme, ":", ret, NULL);
139         }
140     }
141     
142     /* Should we suppress all path info? */
143     if (!(flags & APR_URI_UNP_OMITPATHINFO)) {
144         /* Append path, query and fragment strings: */
145         ret = apr_pstrcat(p,
146                           ret,
147                           (uptr->path)
148                               ? uptr->path : "",
149                           (uptr->query    && !(flags & APR_URI_UNP_OMITQUERY))
150                               ? "?" : "",
151                           (uptr->query    && !(flags & APR_URI_UNP_OMITQUERY))
152                               ? uptr->query : "",
153                           (uptr->fragment && !(flags & APR_URI_UNP_OMITQUERY))
154                               ? "#" : NULL,
155                           (uptr->fragment && !(flags & APR_URI_UNP_OMITQUERY))
156                               ? uptr->fragment : NULL,
157                           NULL);
158     }
159     return ret;
160 }
161
162 /* Here is the hand-optimized parse_uri_components().  There are some wild
163  * tricks we could pull in assembly language that we don't pull here... like we
164  * can do word-at-time scans for delimiter characters using the same technique
165  * that fast memchr()s use.  But that would be way non-portable. -djg
166  */
167
168 /* We have a apr_table_t that we can index by character and it tells us if the
169  * character is one of the interesting delimiters.  Note that we even get
170  * compares for NUL for free -- it's just another delimiter.
171  */
172
173 #define T_SLASH           0x01        /* '/' */
174 #define T_QUESTION        0x02        /* '?' */
175 #define T_HASH            0x04        /* '#' */
176 #define T_ALPHA           0x08        /* 'A' ... 'Z', 'a' ... 'z' */
177 #define T_SCHEME          0x10        /* '0' ... '9', '-', '+', '.'
178                                        * (allowed in scheme except first char)
179                                        */
180 #define T_NUL             0x80        /* '\0' */
181
182 #if APR_CHARSET_EBCDIC
183 /* Delimiter table for the EBCDIC character set */
184 static const unsigned char uri_delims[256] = {
185     T_NUL,                      /* 0x00     */
186     0,                          /* 0x01     */
187     0,                          /* 0x02     */
188     0,                          /* 0x03     */
189     0,                          /* 0x04     */
190     0,                          /* 0x05     */
191     0,                          /* 0x06     */
192     0,                          /* 0x07     */
193     0,                          /* 0x08     */
194     0,                          /* 0x09     */
195     0,                          /* 0x0a     */
196     0,                          /* 0x0b     */
197     0,                          /* 0x0c     */
198     0,                          /* 0x0d     */
199     0,                          /* 0x0e     */
200     0,                          /* 0x0f     */
201     0,                          /* 0x10     */
202     0,                          /* 0x11     */
203     0,                          /* 0x12     */
204     0,                          /* 0x13     */
205     0,                          /* 0x14     */
206     0,                          /* 0x15     */
207     0,                          /* 0x16     */
208     0,                          /* 0x17     */
209     0,                          /* 0x18     */
210     0,                          /* 0x19     */
211     0,                          /* 0x1a     */
212     0,                          /* 0x1b     */
213     0,                          /* 0x1c     */
214     0,                          /* 0x1d     */
215     0,                          /* 0x1e     */
216     0,                          /* 0x1f     */
217     0,                          /* 0x20     */
218     0,                          /* 0x21     */
219     0,                          /* 0x22     */
220     0,                          /* 0x23     */
221     0,                          /* 0x24     */
222     0,                          /* 0x25     */
223     0,                          /* 0x26     */
224     0,                          /* 0x27     */
225     0,                          /* 0x28     */
226     0,                          /* 0x29     */
227     0,                          /* 0x2a     */
228     0,                          /* 0x2b     */
229     0,                          /* 0x2c     */
230     0,                          /* 0x2d     */
231     0,                          /* 0x2e     */
232     0,                          /* 0x2f     */
233     0,                          /* 0x30     */
234     0,                          /* 0x31     */
235     0,                          /* 0x32     */
236     0,                          /* 0x33     */
237     0,                          /* 0x34     */
238     0,                          /* 0x35     */
239     0,                          /* 0x36     */
240     0,                          /* 0x37     */
241     0,                          /* 0x38     */
242     0,                          /* 0x39     */
243     0,                          /* 0x3a     */
244     0,                          /* 0x3b     */
245     0,                          /* 0x3c     */
246     0,                          /* 0x3d     */
247     0,                          /* 0x3e     */
248     0,                          /* 0x3f     */
249     0,                          /* 0x40 ' ' */
250     0,                          /* 0x41     */
251     0,                          /* 0x42     */
252     0,                          /* 0x43     */
253     0,                          /* 0x44     */
254     0,                          /* 0x45     */
255     0,                          /* 0x46     */
256     0,                          /* 0x47     */
257     0,                          /* 0x48     */
258     0,                          /* 0x49     */
259     0,                          /* 0x4a '[' */
260     T_SCHEME,                   /* 0x4b '.' */
261     0,                          /* 0x4c '<' */
262     0,                          /* 0x4d '(' */
263     T_SCHEME,                   /* 0x4e '+' */
264     0,                          /* 0x4f '!' */
265     0,                          /* 0x50 '&' */
266     0,                          /* 0x51     */
267     0,                          /* 0x52     */
268     0,                          /* 0x53     */
269     0,                          /* 0x54     */
270     0,                          /* 0x55     */
271     0,                          /* 0x56     */
272     0,                          /* 0x57     */
273     0,                          /* 0x58     */
274     0,                          /* 0x59     */
275     0,                          /* 0x5a ']' */
276     0,                          /* 0x5b '$' */
277     0,                          /* 0x5c '*' */
278     0,                          /* 0x5d ')' */
279     0,                          /* 0x5e ';' */
280     0,                          /* 0x5f '^' */
281     T_SCHEME,                   /* 0x60 '-' */
282     T_SLASH,                    /* 0x61 '/' */
283     0,                          /* 0x62     */
284     0,                          /* 0x63     */
285     0,                          /* 0x64     */
286     0,                          /* 0x65     */
287     0,                          /* 0x66     */
288     0,                          /* 0x67     */
289     0,                          /* 0x68     */
290     0,                          /* 0x69     */
291     0,                          /* 0x6a '|' */
292     0,                          /* 0x6b ',' */
293     0,                          /* 0x6c '%' */
294     0,                          /* 0x6d '_' */
295     0,                          /* 0x6e '>' */
296     T_QUESTION,                 /* 0x6f '?' */
297     0,                          /* 0x70     */
298     0,                          /* 0x71     */
299     0,                          /* 0x72     */
300     0,                          /* 0x73     */
301     0,                          /* 0x74     */
302     0,                          /* 0x75     */
303     0,                          /* 0x76     */
304     0,                          /* 0x77     */
305     0,                          /* 0x78     */
306     0,                          /* 0x79 '`' */
307     0,                          /* 0x7a ':' */
308     T_HASH,                     /* 0x7b '#' */
309     0,                          /* 0x7c '@' */
310     0,                          /* 0x7d ''' */
311     0,                          /* 0x7e '=' */
312     0,                          /* 0x7f '"' */
313     0,                          /* 0x80     */
314     T_ALPHA,                    /* 0x81 'a' */
315     T_ALPHA,                    /* 0x82 'b' */
316     T_ALPHA,                    /* 0x83 'c' */
317     T_ALPHA,                    /* 0x84 'd' */
318     T_ALPHA,                    /* 0x85 'e' */
319     T_ALPHA,                    /* 0x86 'f' */
320     T_ALPHA,                    /* 0x87 'g' */
321     T_ALPHA,                    /* 0x88 'h' */
322     T_ALPHA,                    /* 0x89 'i' */
323     0,                          /* 0x8a     */
324     0,                          /* 0x8b     */
325     0,                          /* 0x8c     */
326     0,                          /* 0x8d     */
327     0,                          /* 0x8e     */
328     0,                          /* 0x8f     */
329     0,                          /* 0x90     */
330     T_ALPHA,                    /* 0x91 'j' */
331     T_ALPHA,                    /* 0x92 'k' */
332     T_ALPHA,                    /* 0x93 'l' */
333     T_ALPHA,                    /* 0x94 'm' */
334     T_ALPHA,                    /* 0x95 'n' */
335     T_ALPHA,                    /* 0x96 'o' */
336     T_ALPHA,                    /* 0x97 'p' */
337     T_ALPHA,                    /* 0x98 'q' */
338     T_ALPHA,                    /* 0x99 'r' */
339     0,                          /* 0x9a     */
340     0,                          /* 0x9b     */
341     0,                          /* 0x9c     */
342     0,                          /* 0x9d     */
343     0,                          /* 0x9e     */
344     0,                          /* 0x9f     */
345     0,                          /* 0xa0     */
346     0,                          /* 0xa1 '~' */
347     T_ALPHA,                    /* 0xa2 's' */
348     T_ALPHA,                    /* 0xa3 't' */
349     T_ALPHA,                    /* 0xa4 'u' */
350     T_ALPHA,                    /* 0xa5 'v' */
351     T_ALPHA,                    /* 0xa6 'w' */
352     T_ALPHA,                    /* 0xa7 'x' */
353     T_ALPHA,                    /* 0xa8 'y' */
354     T_ALPHA,                    /* 0xa9 'z' */
355     0,                          /* 0xaa     */
356     0,                          /* 0xab     */
357     0,                          /* 0xac     */
358     0,                          /* 0xad     */
359     0,                          /* 0xae     */
360     0,                          /* 0xaf     */
361     0,                          /* 0xb0     */
362     0,                          /* 0xb1     */
363     0,                          /* 0xb2     */
364     0,                          /* 0xb3     */
365     0,                          /* 0xb4     */
366     0,                          /* 0xb5     */
367     0,                          /* 0xb6     */
368     0,                          /* 0xb7     */
369     0,                          /* 0xb8     */
370     0,                          /* 0xb9     */
371     0,                          /* 0xba     */
372     0,                          /* 0xbb     */
373     0,                          /* 0xbc     */
374     0,                          /* 0xbd     */
375     0,                          /* 0xbe     */
376     0,                          /* 0xbf     */
377     0,                          /* 0xc0 '{' */
378     T_ALPHA,                    /* 0xc1 'A' */
379     T_ALPHA,                    /* 0xc2 'B' */
380     T_ALPHA,                    /* 0xc3 'C' */
381     T_ALPHA,                    /* 0xc4 'D' */
382     T_ALPHA,                    /* 0xc5 'E' */
383     T_ALPHA,                    /* 0xc6 'F' */
384     T_ALPHA,                    /* 0xc7 'G' */
385     T_ALPHA,                    /* 0xc8 'H' */
386     T_ALPHA,                    /* 0xc9 'I' */
387     0,                          /* 0xca     */
388     0,                          /* 0xcb     */
389     0,                          /* 0xcc     */
390     0,                          /* 0xcd     */
391     0,                          /* 0xce     */
392     0,                          /* 0xcf     */
393     0,                          /* 0xd0 '}' */
394     T_ALPHA,                    /* 0xd1 'J' */
395     T_ALPHA,                    /* 0xd2 'K' */
396     T_ALPHA,                    /* 0xd3 'L' */
397     T_ALPHA,                    /* 0xd4 'M' */
398     T_ALPHA,                    /* 0xd5 'N' */
399     T_ALPHA,                    /* 0xd6 'O' */
400     T_ALPHA,                    /* 0xd7 'P' */
401     T_ALPHA,                    /* 0xd8 'Q' */
402     T_ALPHA,                    /* 0xd9 'R' */
403     0,                          /* 0xda     */
404     0,                          /* 0xdb     */
405     0,                          /* 0xdc     */
406     0,                          /* 0xdd     */
407     0,                          /* 0xde     */
408     0,                          /* 0xdf     */
409     0,                          /* 0xe0 '\' */
410     0,                          /* 0xe1     */
411     T_ALPHA,                    /* 0xe2 'S' */
412     T_ALPHA,                    /* 0xe3 'T' */
413     T_ALPHA,                    /* 0xe4 'U' */
414     T_ALPHA,                    /* 0xe5 'V' */
415     T_ALPHA,                    /* 0xe6 'W' */
416     T_ALPHA,                    /* 0xe7 'X' */
417     T_ALPHA,                    /* 0xe8 'Y' */
418     T_ALPHA,                    /* 0xe9 'Z' */
419     0,                          /* 0xea     */
420     0,                          /* 0xeb     */
421     0,                          /* 0xec     */
422     0,                          /* 0xed     */
423     0,                          /* 0xee     */
424     0,                          /* 0xef     */
425     T_SCHEME,                   /* 0xf0 '0' */
426     T_SCHEME,                   /* 0xf1 '1' */
427     T_SCHEME,                   /* 0xf2 '2' */
428     T_SCHEME,                   /* 0xf3 '3' */
429     T_SCHEME,                   /* 0xf4 '4' */
430     T_SCHEME,                   /* 0xf5 '5' */
431     T_SCHEME,                   /* 0xf6 '6' */
432     T_SCHEME,                   /* 0xf7 '7' */
433     T_SCHEME,                   /* 0xf8 '8' */
434     T_SCHEME,                   /* 0xf9 '9' */
435     0,                          /* 0xfa     */
436     0,                          /* 0xfb     */
437     0,                          /* 0xfc     */
438     0,                          /* 0xfd     */
439     0,                          /* 0xfe     */
440     0                           /* 0xff     */
441 };
442 #else
443 /* Delimiter table for the ASCII character set */
444 static const unsigned char uri_delims[256] = {
445     T_NUL,                      /* 0x00     */
446     0,                          /* 0x01     */
447     0,                          /* 0x02     */
448     0,                          /* 0x03     */
449     0,                          /* 0x04     */
450     0,                          /* 0x05     */
451     0,                          /* 0x06     */
452     0,                          /* 0x07     */
453     0,                          /* 0x08     */
454     0,                          /* 0x09     */
455     0,                          /* 0x0a     */
456     0,                          /* 0x0b     */
457     0,                          /* 0x0c     */
458     0,                          /* 0x0d     */
459     0,                          /* 0x0e     */
460     0,                          /* 0x0f     */
461     0,                          /* 0x10     */
462     0,                          /* 0x11     */
463     0,                          /* 0x12     */
464     0,                          /* 0x13     */
465     0,                          /* 0x14     */
466     0,                          /* 0x15     */
467     0,                          /* 0x16     */
468     0,                          /* 0x17     */
469     0,                          /* 0x18     */
470     0,                          /* 0x19     */
471     0,                          /* 0x1a     */
472     0,                          /* 0x1b     */
473     0,                          /* 0x1c     */
474     0,                          /* 0x1d     */
475     0,                          /* 0x1e     */
476     0,                          /* 0x1f     */
477     0,                          /* 0x20 ' ' */
478     0,                          /* 0x21 '!' */
479     0,                          /* 0x22 '"' */
480     T_HASH,                     /* 0x23 '#' */
481     0,                          /* 0x24 '$' */
482     0,                          /* 0x25 '%' */
483     0,                          /* 0x26 '&' */
484     0,                          /* 0x27 ''' */
485     0,                          /* 0x28 '(' */
486     0,                          /* 0x29 ')' */
487     0,                          /* 0x2a '*' */
488     T_SCHEME,                   /* 0x2b '+' */
489     0,                          /* 0x2c ',' */
490     T_SCHEME,                   /* 0x2d '-' */
491     T_SCHEME,                   /* 0x2e '.' */
492     T_SLASH,                    /* 0x2f '/' */
493     T_SCHEME,                   /* 0x30 '0' */
494     T_SCHEME,                   /* 0x31 '1' */
495     T_SCHEME,                   /* 0x32 '2' */
496     T_SCHEME,                   /* 0x33 '3' */
497     T_SCHEME,                   /* 0x34 '4' */
498     T_SCHEME,                   /* 0x35 '5' */
499     T_SCHEME,                   /* 0x36 '6' */
500     T_SCHEME,                   /* 0x37 '7' */
501     T_SCHEME,                   /* 0x38 '8' */
502     T_SCHEME,                   /* 0x39 '9' */
503     0,                          /* 0x3a ':' */
504     0,                          /* 0x3b ';' */
505     0,                          /* 0x3c '<' */
506     0,                          /* 0x3d '=' */
507     0,                          /* 0x3e '>' */
508     T_QUESTION,                 /* 0x3f '?' */
509     0,                          /* 0x40 '@' */
510     T_ALPHA,                    /* 0x41 'A' */
511     T_ALPHA,                    /* 0x42 'B' */
512     T_ALPHA,                    /* 0x43 'C' */
513     T_ALPHA,                    /* 0x44 'D' */
514     T_ALPHA,                    /* 0x45 'E' */
515     T_ALPHA,                    /* 0x46 'F' */
516     T_ALPHA,                    /* 0x47 'G' */
517     T_ALPHA,                    /* 0x48 'H' */
518     T_ALPHA,                    /* 0x49 'I' */
519     T_ALPHA,                    /* 0x4a 'J' */
520     T_ALPHA,                    /* 0x4b 'K' */
521     T_ALPHA,                    /* 0x4c 'L' */
522     T_ALPHA,                    /* 0x4d 'M' */
523     T_ALPHA,                    /* 0x4e 'N' */
524     T_ALPHA,                    /* 0x4f 'O' */
525     T_ALPHA,                    /* 0x50 'P' */
526     T_ALPHA,                    /* 0x51 'Q' */
527     T_ALPHA,                    /* 0x52 'R' */
528     T_ALPHA,                    /* 0x53 'S' */
529     T_ALPHA,                    /* 0x54 'T' */
530     T_ALPHA,                    /* 0x55 'U' */
531     T_ALPHA,                    /* 0x56 'V' */
532     T_ALPHA,                    /* 0x57 'W' */
533     T_ALPHA,                    /* 0x58 'X' */
534     T_ALPHA,                    /* 0x59 'Y' */
535     T_ALPHA,                    /* 0x5a 'Z' */
536     0,                          /* 0x5b '[' */
537     0,                          /* 0x5c '\' */
538     0,                          /* 0x5d ']' */
539     0,                          /* 0x5e '^' */
540     0,                          /* 0x5f '_' */
541     0,                          /* 0x60 '`' */
542     T_ALPHA,                    /* 0x61 'a' */
543     T_ALPHA,                    /* 0x62 'b' */
544     T_ALPHA,                    /* 0x63 'c' */
545     T_ALPHA,                    /* 0x64 'd' */
546     T_ALPHA,                    /* 0x65 'e' */
547     T_ALPHA,                    /* 0x66 'f' */
548     T_ALPHA,                    /* 0x67 'g' */
549     T_ALPHA,                    /* 0x68 'h' */
550     T_ALPHA,                    /* 0x69 'i' */
551     T_ALPHA,                    /* 0x6a 'j' */
552     T_ALPHA,                    /* 0x6b 'k' */
553     T_ALPHA,                    /* 0x6c 'l' */
554     T_ALPHA,                    /* 0x6d 'm' */
555     T_ALPHA,                    /* 0x6e 'n' */
556     T_ALPHA,                    /* 0x6f 'o' */
557     T_ALPHA,                    /* 0x70 'p' */
558     T_ALPHA,                    /* 0x71 'q' */
559     T_ALPHA,                    /* 0x72 'r' */
560     T_ALPHA,                    /* 0x73 's' */
561     T_ALPHA,                    /* 0x74 't' */
562     T_ALPHA,                    /* 0x75 'u' */
563     T_ALPHA,                    /* 0x76 'v' */
564     T_ALPHA,                    /* 0x77 'w' */
565     T_ALPHA,                    /* 0x78 'x' */
566     T_ALPHA,                    /* 0x79 'y' */
567     T_ALPHA,                    /* 0x7a 'z' */
568     0,                          /* 0x7b '{' */
569     0,                          /* 0x7c '|' */
570     0,                          /* 0x7d '}' */
571     0,                          /* 0x7e '~' */
572     0,                          /* 0x7f     */
573     0,                          /* 0x80     */
574     0,                          /* 0x81     */
575     0,                          /* 0x82     */
576     0,                          /* 0x83     */
577     0,                          /* 0x84     */
578     0,                          /* 0x85     */
579     0,                          /* 0x86     */
580     0,                          /* 0x87     */
581     0,                          /* 0x88     */
582     0,                          /* 0x89     */
583     0,                          /* 0x8a     */
584     0,                          /* 0x8b     */
585     0,                          /* 0x8c     */
586     0,                          /* 0x8d     */
587     0,                          /* 0x8e     */
588     0,                          /* 0x8f     */
589     0,                          /* 0x90     */
590     0,                          /* 0x91     */
591     0,                          /* 0x92     */
592     0,                          /* 0x93     */
593     0,                          /* 0x94     */
594     0,                          /* 0x95     */
595     0,                          /* 0x96     */
596     0,                          /* 0x97     */
597     0,                          /* 0x98     */
598     0,                          /* 0x99     */
599     0,                          /* 0x9a     */
600     0,                          /* 0x9b     */
601     0,                          /* 0x9c     */
602     0,                          /* 0x9d     */
603     0,                          /* 0x9e     */
604     0,                          /* 0x9f     */
605     0,                          /* 0xa0     */
606     0,                          /* 0xa1     */
607     0,                          /* 0xa2     */
608     0,                          /* 0xa3     */
609     0,                          /* 0xa4     */
610     0,                          /* 0xa5     */
611     0,                          /* 0xa6     */
612     0,                          /* 0xa7     */
613     0,                          /* 0xa8     */
614     0,                          /* 0xa9     */
615     0,                          /* 0xaa     */
616     0,                          /* 0xab     */
617     0,                          /* 0xac     */
618     0,                          /* 0xad     */
619     0,                          /* 0xae     */
620     0,                          /* 0xaf     */
621     0,                          /* 0xb0     */
622     0,                          /* 0xb1     */
623     0,                          /* 0xb2     */
624     0,                          /* 0xb3     */
625     0,                          /* 0xb4     */
626     0,                          /* 0xb5     */
627     0,                          /* 0xb6     */
628     0,                          /* 0xb7     */
629     0,                          /* 0xb8     */
630     0,                          /* 0xb9     */
631     0,                          /* 0xba     */
632     0,                          /* 0xbb     */
633     0,                          /* 0xbc     */
634     0,                          /* 0xbd     */
635     0,                          /* 0xbe     */
636     0,                          /* 0xbf     */
637     0,                          /* 0xc0     */
638     0,                          /* 0xc1     */
639     0,                          /* 0xc2     */
640     0,                          /* 0xc3     */
641     0,                          /* 0xc4     */
642     0,                          /* 0xc5     */
643     0,                          /* 0xc6     */
644     0,                          /* 0xc7     */
645     0,                          /* 0xc8     */
646     0,                          /* 0xc9     */
647     0,                          /* 0xca     */
648     0,                          /* 0xcb     */
649     0,                          /* 0xcc     */
650     0,                          /* 0xcd     */
651     0,                          /* 0xce     */
652     0,                          /* 0xcf     */
653     0,                          /* 0xd0     */
654     0,                          /* 0xd1     */
655     0,                          /* 0xd2     */
656     0,                          /* 0xd3     */
657     0,                          /* 0xd4     */
658     0,                          /* 0xd5     */
659     0,                          /* 0xd6     */
660     0,                          /* 0xd7     */
661     0,                          /* 0xd8     */
662     0,                          /* 0xd9     */
663     0,                          /* 0xda     */
664     0,                          /* 0xdb     */
665     0,                          /* 0xdc     */
666     0,                          /* 0xdd     */
667     0,                          /* 0xde     */
668     0,                          /* 0xdf     */
669     0,                          /* 0xe0     */
670     0,                          /* 0xe1     */
671     0,                          /* 0xe2     */
672     0,                          /* 0xe3     */
673     0,                          /* 0xe4     */
674     0,                          /* 0xe5     */
675     0,                          /* 0xe6     */
676     0,                          /* 0xe7     */
677     0,                          /* 0xe8     */
678     0,                          /* 0xe9     */
679     0,                          /* 0xea     */
680     0,                          /* 0xeb     */
681     0,                          /* 0xec     */
682     0,                          /* 0xed     */
683     0,                          /* 0xee     */
684     0,                          /* 0xef     */
685     0,                          /* 0xf0     */
686     0,                          /* 0xf1     */
687     0,                          /* 0xf2     */
688     0,                          /* 0xf3     */
689     0,                          /* 0xf4     */
690     0,                          /* 0xf5     */
691     0,                          /* 0xf6     */
692     0,                          /* 0xf7     */
693     0,                          /* 0xf8     */
694     0,                          /* 0xf9     */
695     0,                          /* 0xfa     */
696     0,                          /* 0xfb     */
697     0,                          /* 0xfc     */
698     0,                          /* 0xfd     */
699     0,                          /* 0xfe     */
700     0                           /* 0xff     */
701 };
702 #endif
703
704
705 /* it works like this:
706     if (uri_delims[ch] & NOTEND_foobar) {
707         then we're not at a delimiter for foobar
708     }
709 */
710
711 #define NOTEND_HOSTINFO   (T_SLASH | T_QUESTION | T_HASH | T_NUL)
712 #define NOTEND_PATH       (T_QUESTION | T_HASH | T_NUL)
713
714 /* parse_uri_components():
715  * Parse a given URI, fill in all supplied fields of a uri_components
716  * structure. This eliminates the necessity of extracting host, port,
717  * path, query info repeatedly in the modules.
718  * Side effects:
719  *  - fills in fields of uri_components *uptr
720  *  - none on any of the r->* fields
721  */
722 APU_DECLARE(apr_status_t) apr_uri_parse(apr_pool_t *p, const char *uri, 
723                                         apr_uri_t *uptr)
724 {
725     const char *s;
726     const char *s1;
727     const char *hostinfo;
728     char *endstr;
729     int port;
730     int v6_offset1 = 0, v6_offset2 = 0;
731
732     /* Initialize the structure. parse_uri() and parse_uri_components()
733      * can be called more than once per request.
734      */
735     memset (uptr, '\0', sizeof(*uptr));
736     uptr->is_initialized = 1;
737
738     /* We assume the processor has a branch predictor like most --
739      * it assumes forward branches are untaken and backwards are taken.  That's
740      * the reason for the gotos.  -djg
741      */
742     if (uri[0] == '/') {
743         /* RFC2396 #4.3 says that two leading slashes mean we have an
744          * authority component, not a path!  Fixing this looks scary
745          * with the gotos here.  But if the existing logic is valid,
746          * then presumably a goto pointing to deal_with_authority works.
747          *
748          * RFC2396 describes this as resolving an ambiguity.  In the
749          * case of three or more slashes there would seem to be no
750          * ambiguity, so it is a path after all.
751          */
752         if (uri[1] == '/' && uri[2] != '/') {
753             s = uri + 2 ;
754             goto deal_with_authority ;
755         }
756
757 deal_with_path:
758         /* we expect uri to point to first character of path ... remember
759          * that the path could be empty -- http://foobar?query for example
760          */
761         s = uri;
762         while ((uri_delims[*(unsigned char *)s] & NOTEND_PATH) == 0) {
763             ++s;
764         }
765         if (s != uri) {
766             uptr->path = apr_pstrmemdup(p, uri, s - uri);
767         }
768         if (*s == 0) {
769             return APR_SUCCESS;
770         }
771         if (*s == '?') {
772             ++s;
773             s1 = strchr(s, '#');
774             if (s1) {
775                 uptr->fragment = apr_pstrdup(p, s1 + 1);
776                 uptr->query = apr_pstrmemdup(p, s, s1 - s);
777             }
778             else {
779                 uptr->query = apr_pstrdup(p, s);
780             }
781             return APR_SUCCESS;
782         }
783         /* otherwise it's a fragment */
784         uptr->fragment = apr_pstrdup(p, s + 1);
785         return APR_SUCCESS;
786     }
787
788     /* find the scheme: */
789     s = uri;
790     /* first char must be letter */
791     if (uri_delims[*(unsigned char *)s] & T_ALPHA) {
792         ++s;
793         while ((uri_delims[*(unsigned char *)s] & (T_ALPHA|T_SCHEME)))
794             ++s;
795     }
796     /* scheme must be non-empty and followed by : */
797     if (s != uri && s[0] == ':') {
798         uptr->scheme = apr_pstrmemdup(p, uri, s - uri);
799         s++;
800     }
801     else {
802         /* No valid scheme, restart from the beginning */
803         s = uri;
804     }
805
806     if (s[0] != '/' || s[1] != '/') {
807         if (uri == s) {
808             /*
809              * RFC 3986 3.3: If we have no scheme and no authority,
810              * the leading segment of a relative path must not contain a ':'.
811              */
812             char *first_slash = strchr(uri, '/');
813             if (first_slash) {
814                 while (s < first_slash) {
815                     if (s[0] == ':')
816                         return APR_EGENERAL;
817                     ++s;
818                 }
819                 /* no scheme but relative path, e.g. '../image.jpg' */
820             }
821             else {
822                 if (strchr(uri, ':') != NULL)
823                     return APR_EGENERAL;
824                 /* no scheme, no slash, but relative path, e.g. 'image.jpg' */
825             }
826             goto deal_with_path;
827         }
828         /* scheme and relative path */
829         uri = s;
830         goto deal_with_path;
831     }
832
833     s += 2;
834
835 deal_with_authority:
836     hostinfo = s;
837     while ((uri_delims[*(unsigned char *)s] & NOTEND_HOSTINFO) == 0) {
838         ++s;
839     }
840     uri = s;        /* whatever follows hostinfo is start of uri */
841     uptr->hostinfo = apr_pstrmemdup(p, hostinfo, uri - hostinfo);
842
843     /* If there's a username:password@host:port, the @ we want is the last @...
844      * too bad there's no memrchr()... For the C purists, note that hostinfo
845      * is definitely not the first character of the original uri so therefore
846      * &hostinfo[-1] < &hostinfo[0] ... and this loop is valid C.
847      */
848     do {
849         --s;
850     } while (s >= hostinfo && *s != '@');
851     if (s < hostinfo) {
852         /* again we want the common case to be fall through */
853 deal_with_host:
854         /* We expect hostinfo to point to the first character of
855          * the hostname.  If there's a port it is the first colon,
856          * except with IPv6.
857          */
858         if (*hostinfo == '[') {
859             v6_offset1 = 1;
860             v6_offset2 = 2;
861             s = memchr(hostinfo, ']', uri - hostinfo);
862             if (s == NULL) {
863                 return APR_EGENERAL;
864             }
865             if (*++s != ':') {
866                 s = NULL; /* no port */
867             }
868         }
869         else {
870             s = memchr(hostinfo, ':', uri - hostinfo);
871         }
872         if (s == NULL) {
873             /* we expect the common case to have no port */
874             uptr->hostname = apr_pstrmemdup(p,
875                                             hostinfo + v6_offset1,
876                                             uri - hostinfo - v6_offset2);
877             goto deal_with_path;
878         }
879         uptr->hostname = apr_pstrmemdup(p,
880                                         hostinfo + v6_offset1,
881                                         s - hostinfo - v6_offset2);
882         ++s;
883         uptr->port_str = apr_pstrmemdup(p, s, uri - s);
884         if (uri != s) {
885             port = strtol(uptr->port_str, &endstr, 10);
886             uptr->port = port;
887             if (*endstr == '\0') {
888                 goto deal_with_path;
889             }
890             /* Invalid characters after ':' found */
891             return APR_EGENERAL;
892         }
893         uptr->port = apr_uri_port_of_scheme(uptr->scheme);
894         goto deal_with_path;
895     }
896
897     /* first colon delimits username:password */
898     s1 = memchr(hostinfo, ':', s - hostinfo);
899     if (s1) {
900         uptr->user = apr_pstrmemdup(p, hostinfo, s1 - hostinfo);
901         ++s1;
902         uptr->password = apr_pstrmemdup(p, s1, s - s1);
903     }
904     else {
905         uptr->user = apr_pstrmemdup(p, hostinfo, s - hostinfo);
906     }
907     hostinfo = s + 1;
908     goto deal_with_host;
909 }
910
911 /* Special case for CONNECT parsing: it comes with the hostinfo part only */
912 /* See the INTERNET-DRAFT document "Tunneling SSL Through a WWW Proxy"
913  * currently at http://www.mcom.com/newsref/std/tunneling_ssl.html
914  * for the format of the "CONNECT host:port HTTP/1.0" request
915  */
916 APU_DECLARE(apr_status_t) apr_uri_parse_hostinfo(apr_pool_t *p, 
917                                                  const char *hostinfo, 
918                                                  apr_uri_t *uptr)
919 {
920     const char *s;
921     char *endstr;
922     const char *rsb;
923     int v6_offset1 = 0;
924
925     /* Initialize the structure. parse_uri() and parse_uri_components()
926      * can be called more than once per request.
927      */
928     memset(uptr, '\0', sizeof(*uptr));
929     uptr->is_initialized = 1;
930     uptr->hostinfo = apr_pstrdup(p, hostinfo);
931
932     /* We expect hostinfo to point to the first character of
933      * the hostname.  There must be a port, separated by a colon
934      */
935     if (*hostinfo == '[') {
936         if ((rsb = strchr(hostinfo, ']')) == NULL ||
937             *(rsb + 1) != ':') {
938             return APR_EGENERAL;
939         }
940         /* literal IPv6 address */
941         s = rsb + 1;
942         ++hostinfo;
943         v6_offset1 = 1;
944     }
945     else {
946         s = strchr(hostinfo, ':');
947     }
948     if (s == NULL) {
949         return APR_EGENERAL;
950     }
951     uptr->hostname = apr_pstrndup(p, hostinfo, s - hostinfo - v6_offset1);
952     ++s;
953     uptr->port_str = apr_pstrdup(p, s);
954     if (*s != '\0') {
955         uptr->port = (unsigned short) strtol(uptr->port_str, &endstr, 10);
956         if (*endstr == '\0') {
957             return APR_SUCCESS;
958         }
959         /* Invalid characters after ':' found */
960     }
961     return APR_EGENERAL;
962 }