1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * apr_date.c: date parsing utility routines
19 * These routines are (hopefully) platform independent.
21 * 27 Oct 1996 Roy Fielding
22 * Extracted (with many modifications) from mod_proxy.c and
23 * tested with over 50,000 randomly chosen valid date strings
24 * and several hundred variations of invalid date strings.
31 #define APR_WANT_STRFUNC
45 * Compare a string to a mask
46 * Mask characters (arbitrary maximum is 256 characters, just in case):
47 * @ - uppercase letter
48 * $ - lowercase letter
52 * * - swallow remaining characters
53 * <x> - exact match for any other character
55 APU_DECLARE(int) apr_date_checkmask(const char *data, const char *mask)
60 for (i = 0; i < 256; i++) {
86 if ((d != ' ') && !apr_isdigit(d))
95 return 0; /* We only get here if mask is corrupted (exceeds 256) */
99 * Parses an HTTP date in one of three standard forms:
101 * Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
102 * Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
103 * Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
105 * and returns the apr_time_t number of microseconds since 1 Jan 1970 GMT,
106 * or APR_DATE_BAD if this would be out of range or if the date is invalid.
108 * The restricted HTTP syntax is
110 * HTTP-date = rfc1123-date | rfc850-date | asctime-date
112 * rfc1123-date = wkday "," SP date1 SP time SP "GMT"
113 * rfc850-date = weekday "," SP date2 SP time SP "GMT"
114 * asctime-date = wkday SP date3 SP time SP 4DIGIT
116 * date1 = 2DIGIT SP month SP 4DIGIT
117 * ; day month year (e.g., 02 Jun 1982)
118 * date2 = 2DIGIT "-" month "-" 2DIGIT
119 * ; day-month-year (e.g., 02-Jun-82)
120 * date3 = month SP ( 2DIGIT | ( SP 1DIGIT ))
121 * ; month day (e.g., Jun 2)
123 * time = 2DIGIT ":" 2DIGIT ":" 2DIGIT
124 * ; 00:00:00 - 23:59:59
126 * wkday = "Mon" | "Tue" | "Wed"
127 * | "Thu" | "Fri" | "Sat" | "Sun"
129 * weekday = "Monday" | "Tuesday" | "Wednesday"
130 * | "Thursday" | "Friday" | "Saturday" | "Sunday"
132 * month = "Jan" | "Feb" | "Mar" | "Apr"
133 * | "May" | "Jun" | "Jul" | "Aug"
134 * | "Sep" | "Oct" | "Nov" | "Dec"
136 * However, for the sake of robustness (and Netscapeness), we ignore the
137 * weekday and anything after the time field (including the timezone).
139 * This routine is intended to be very fast; 10x faster than using sscanf.
141 * Originally from Andrew Daviel <andrew@vancouver-webpages.com>, 29 Jul 96
142 * but many changes since then.
145 APU_DECLARE(apr_time_t) apr_date_parse_http(const char *date)
150 const char *monstr, *timstr;
151 static const int months[12] =
153 ('J' << 16) | ('a' << 8) | 'n', ('F' << 16) | ('e' << 8) | 'b',
154 ('M' << 16) | ('a' << 8) | 'r', ('A' << 16) | ('p' << 8) | 'r',
155 ('M' << 16) | ('a' << 8) | 'y', ('J' << 16) | ('u' << 8) | 'n',
156 ('J' << 16) | ('u' << 8) | 'l', ('A' << 16) | ('u' << 8) | 'g',
157 ('S' << 16) | ('e' << 8) | 'p', ('O' << 16) | ('c' << 8) | 't',
158 ('N' << 16) | ('o' << 8) | 'v', ('D' << 16) | ('e' << 8) | 'c'};
163 while (*date && apr_isspace(*date)) /* Find first non-whitespace char */
169 if ((date = strchr(date, ' ')) == NULL) /* Find space after weekday */
172 ++date; /* Now pointing to first char after space, which should be */
174 /* start of the actual date information for all 4 formats. */
176 if (apr_date_checkmask(date, "## @$$ #### ##:##:## *")) {
177 /* RFC 1123 format with two days */
178 ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100;
182 ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0');
184 ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
189 else if (apr_date_checkmask(date, "##-@$$-## ##:##:## *")) {
191 ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
195 ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
200 else if (apr_date_checkmask(date, "@$$ ~# ##:##:## ####*")) {
202 ds.tm_year = ((date[16] - '0') * 10 + (date[17] - '0') - 19) * 100;
206 ds.tm_year += ((date[18] - '0') * 10) + (date[19] - '0');
211 ds.tm_mday = (date[4] - '0') * 10;
213 ds.tm_mday += (date[5] - '0');
218 else if (apr_date_checkmask(date, "# @$$ #### ##:##:## *")) {
219 /* RFC 1123 format with one day */
220 ds.tm_year = ((date[6] - '0') * 10 + (date[7] - '0') - 19) * 100;
224 ds.tm_year += ((date[8] - '0') * 10) + (date[9] - '0');
226 ds.tm_mday = (date[0] - '0');
234 if (ds.tm_mday <= 0 || ds.tm_mday > 31)
237 ds.tm_hour = ((timstr[0] - '0') * 10) + (timstr[1] - '0');
238 ds.tm_min = ((timstr[3] - '0') * 10) + (timstr[4] - '0');
239 ds.tm_sec = ((timstr[6] - '0') * 10) + (timstr[7] - '0');
241 if ((ds.tm_hour > 23) || (ds.tm_min > 59) || (ds.tm_sec > 61))
244 mint = (monstr[0] << 16) | (monstr[1] << 8) | monstr[2];
245 for (mon = 0; mon < 12; mon++)
246 if (mint == months[mon])
252 if ((ds.tm_mday == 31) && (mon == 3 || mon == 5 || mon == 8 || mon == 10))
255 /* February gets special check for leapyear */
257 ((ds.tm_mday > 29) ||
260 || (((ds.tm_year % 100) == 0)
261 && (((ds.tm_year % 400) != 100)))))))
266 /* ap_mplode_time uses tm_usec and tm_gmtoff fields, but they haven't
268 * It should be safe to just zero out these values.
269 * tm_usec is the number of microseconds into the second. HTTP only
270 * cares about second granularity.
271 * tm_gmtoff is the number of seconds off of GMT the time is. By
272 * definition all times going through this function are in GMT, so this
277 if (apr_time_exp_get(&result, &ds) != APR_SUCCESS)
284 * Parses a string resembling an RFC 822 date. This is meant to be
285 * leinent in its parsing of dates. Hence, this will parse a wider
286 * range of dates than apr_date_parse_http.
288 * The prominent mailer (or poster, if mailer is unknown) that has
289 * been seen in the wild is included for the unknown formats.
291 * Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
292 * Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
293 * Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
294 * Sun, 6 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
295 * Sun, 06 Nov 94 08:49:37 GMT ; RFC 822
296 * Sun, 6 Nov 94 08:49:37 GMT ; RFC 822
297 * Sun, 06 Nov 94 08:49 GMT ; Unknown [drtr@ast.cam.ac.uk]
298 * Sun, 6 Nov 94 08:49 GMT ; Unknown [drtr@ast.cam.ac.uk]
299 * Sun, 06 Nov 94 8:49:37 GMT ; Unknown [Elm 70.85]
300 * Sun, 6 Nov 94 8:49:37 GMT ; Unknown [Elm 70.85]
301 * Mon, 7 Jan 2002 07:21:22 GMT ; Unknown [Postfix]
302 * Sun, 06-Nov-1994 08:49:37 GMT ; RFC 850 with four digit years
306 #define TIMEPARSE(ds,hr10,hr1,min10,min1,sec10,sec1) \
308 ds.tm_hour = ((hr10 - '0') * 10) + (hr1 - '0'); \
309 ds.tm_min = ((min10 - '0') * 10) + (min1 - '0'); \
310 ds.tm_sec = ((sec10 - '0') * 10) + (sec1 - '0'); \
312 #define TIMEPARSE_STD(ds,timstr) \
314 TIMEPARSE(ds, timstr[0],timstr[1], \
315 timstr[3],timstr[4], \
316 timstr[6],timstr[7]); \
319 APU_DECLARE(apr_time_t) apr_date_parse_rfc(const char *date)
324 const char *monstr, *timstr, *gmtstr;
325 static const int months[12] =
327 ('J' << 16) | ('a' << 8) | 'n', ('F' << 16) | ('e' << 8) | 'b',
328 ('M' << 16) | ('a' << 8) | 'r', ('A' << 16) | ('p' << 8) | 'r',
329 ('M' << 16) | ('a' << 8) | 'y', ('J' << 16) | ('u' << 8) | 'n',
330 ('J' << 16) | ('u' << 8) | 'l', ('A' << 16) | ('u' << 8) | 'g',
331 ('S' << 16) | ('e' << 8) | 'p', ('O' << 16) | ('c' << 8) | 't',
332 ('N' << 16) | ('o' << 8) | 'v', ('D' << 16) | ('e' << 8) | 'c' };
337 /* Not all dates have text days at the beginning. */
338 if (!apr_isdigit(date[0]))
340 while (*date && apr_isspace(*date)) /* Find first non-whitespace char */
346 if ((date = strchr(date, ' ')) == NULL) /* Find space after weekday */
349 ++date; /* Now pointing to first char after space, which should be */ }
351 /* start of the actual date information for all 11 formats. */
352 if (apr_date_checkmask(date, "## @$$ #### ##:##:## *")) { /* RFC 1123 format */
353 ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100;
358 ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0');
360 ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
366 TIMEPARSE_STD(ds, timstr);
368 else if (apr_date_checkmask(date, "##-@$$-## ##:##:## *")) {/* RFC 850 format */
369 ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
374 ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
380 TIMEPARSE_STD(ds, timstr);
382 else if (apr_date_checkmask(date, "@$$ ~# ##:##:## ####*")) {
384 ds.tm_year = ((date[16] - '0') * 10 + (date[17] - '0') - 19) * 100;
388 ds.tm_year += ((date[18] - '0') * 10) + (date[19] - '0');
393 ds.tm_mday = (date[4] - '0') * 10;
395 ds.tm_mday += (date[5] - '0');
401 TIMEPARSE_STD(ds, timstr);
403 else if (apr_date_checkmask(date, "# @$$ #### ##:##:## *")) {
405 ds.tm_year = ((date[6] - '0') * 10 + (date[7] - '0') - 19) * 100;
410 ds.tm_year += ((date[8] - '0') * 10) + (date[9] - '0');
411 ds.tm_mday = (date[0] - '0');
417 TIMEPARSE_STD(ds, timstr);
419 else if (apr_date_checkmask(date, "## @$$ ## ##:##:## *")) {
420 /* This is the old RFC 1123 date format - many many years ago, people
421 * used two-digit years. Oh, how foolish.
423 * Two-digit day, two-digit year version. */
424 ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
429 ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
435 TIMEPARSE_STD(ds, timstr);
437 else if (apr_date_checkmask(date, " # @$$ ## ##:##:## *")) {
438 /* This is the old RFC 1123 date format - many many years ago, people
439 * used two-digit years. Oh, how foolish.
441 * Space + one-digit day, two-digit year version.*/
442 ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
447 ds.tm_mday = (date[1] - '0');
453 TIMEPARSE_STD(ds, timstr);
455 else if (apr_date_checkmask(date, "# @$$ ## ##:##:## *")) {
456 /* This is the old RFC 1123 date format - many many years ago, people
457 * used two-digit years. Oh, how foolish.
459 * One-digit day, two-digit year version. */
460 ds.tm_year = ((date[6] - '0') * 10) + (date[7] - '0');
465 ds.tm_mday = (date[0] - '0');
471 TIMEPARSE_STD(ds, timstr);
473 else if (apr_date_checkmask(date, "## @$$ ## ##:## *")) {
474 /* Loser format. This is quite bogus. */
475 ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
480 ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
486 TIMEPARSE(ds, timstr[0],timstr[1], timstr[3],timstr[4], '0','0');
488 else if (apr_date_checkmask(date, "# @$$ ## ##:## *")) {
489 /* Loser format. This is quite bogus. */
490 ds.tm_year = ((date[6] - '0') * 10) + (date[7] - '0');
495 ds.tm_mday = (date[0] - '0');
501 TIMEPARSE(ds, timstr[0],timstr[1], timstr[3],timstr[4], '0','0');
503 else if (apr_date_checkmask(date, "## @$$ ## #:##:## *")) {
504 /* Loser format. This is quite bogus. */
505 ds.tm_year = ((date[7] - '0') * 10) + (date[8] - '0');
510 ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
516 TIMEPARSE(ds, '0',timstr[1], timstr[3],timstr[4], timstr[6],timstr[7]);
518 else if (apr_date_checkmask(date, "# @$$ ## #:##:## *")) {
519 /* Loser format. This is quite bogus. */
520 ds.tm_year = ((date[6] - '0') * 10) + (date[7] - '0');
525 ds.tm_mday = (date[0] - '0');
531 TIMEPARSE(ds, '0',timstr[1], timstr[3],timstr[4], timstr[6],timstr[7]);
533 else if (apr_date_checkmask(date, " # @$$ #### ##:##:## *")) {
534 /* RFC 1123 format with a space instead of a leading zero. */
535 ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100;
540 ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0');
542 ds.tm_mday = (date[1] - '0');
548 TIMEPARSE_STD(ds, timstr);
550 else if (apr_date_checkmask(date, "##-@$$-#### ##:##:## *")) {
551 /* RFC 1123 with dashes instead of spaces between date/month/year
552 * This also looks like RFC 850 with four digit years.
554 ds.tm_year = ((date[7] - '0') * 10 + (date[8] - '0') - 19) * 100;
558 ds.tm_year += ((date[9] - '0') * 10) + (date[10] - '0');
560 ds.tm_mday = ((date[0] - '0') * 10) + (date[1] - '0');
566 TIMEPARSE_STD(ds, timstr);
571 if (ds.tm_mday <= 0 || ds.tm_mday > 31)
574 if ((ds.tm_hour > 23) || (ds.tm_min > 59) || (ds.tm_sec > 61))
577 mint = (monstr[0] << 16) | (monstr[1] << 8) | monstr[2];
578 for (mon = 0; mon < 12; mon++)
579 if (mint == months[mon])
585 if ((ds.tm_mday == 31) && (mon == 3 || mon == 5 || mon == 8 || mon == 10))
588 /* February gets special check for leapyear */
592 || ((ds.tm_mday == 29)
594 || (((ds.tm_year % 100) == 0)
595 && (((ds.tm_year % 400) != 100)))))))
600 /* tm_gmtoff is the number of seconds off of GMT the time is.
602 * We only currently support: [+-]ZZZZ where Z is the offset in
605 * If there is any confusion, tm_gmtoff will remain 0.
609 /* Do we have a timezone ? */
614 offset = atoi(gmtstr+1);
615 ds.tm_gmtoff -= (offset / 100) * 60 * 60;
616 ds.tm_gmtoff -= (offset % 100) * 60;
619 offset = atoi(gmtstr+1);
620 ds.tm_gmtoff += (offset / 100) * 60 * 60;
621 ds.tm_gmtoff += (offset % 100) * 60;
626 /* apr_time_exp_get uses tm_usec field, but it hasn't been set yet.
627 * It should be safe to just zero out this value.
628 * tm_usec is the number of microseconds into the second. HTTP only
629 * cares about second granularity.
633 if (apr_time_exp_gmt_get(&result, &ds) != APR_SUCCESS)