1 /* Parse a string, yielding a struct partime that describes it. */
3 /* Copyright 1993, 1994, 1995 Paul Eggert
4 Distributed under license by the Free Software Foundation, Inc.
6 This file is part of RCS.
8 RCS is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 RCS is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with RCS; see the file COPYING.
20 If not, write to the Free Software Foundation,
21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 Report problems and direct all questions to:
25 rcs-bugs@cs.purdue.edu
44 #define isdigit(c) (((unsigned)(c)-'0') <= 9) /* faster than stock */
48 char const partimeId[]
52 /* Lookup tables for names of months, weekdays, time zones. */
54 #define NAME_LENGTH_MAXIMUM 4
57 char name[NAME_LENGTH_MAXIMUM];
62 static char const *parse_decimal P((char const*,int,int,int,int,int*,int*));
63 static char const *parse_fixed P((char const*,int,int*));
64 static char const *parse_pattern_letter P((char const*,int,struct partime*));
65 static char const *parse_prefix P((char const*,struct partime*,int*));
66 static char const *parse_ranged P((char const*,int,int,int,int*));
67 static int lookup P((char const*,struct name_val const[]));
68 static int merge_partime P((struct partime*, struct partime const*));
69 static void undefine P((struct partime*));
72 static struct name_val const month_names[] = {
73 {"jan",0}, {"feb",1}, {"mar",2}, {"apr",3}, {"may",4}, {"jun",5},
74 {"jul",6}, {"aug",7}, {"sep",8}, {"oct",9}, {"nov",10}, {"dec",11},
78 static struct name_val const weekday_names[] = {
79 {"sun",0}, {"mon",1}, {"tue",2}, {"wed",3}, {"thu",4}, {"fri",5}, {"sat",6},
83 #define hr60nonnegative(t) ((t)/100 * 60 + (t)%100)
84 #define hr60(t) ((t)<0 ? -hr60nonnegative(-(t)) : hr60nonnegative(t))
85 #define zs(t,s) {s, hr60(t)}
86 #define zd(t,s,d) zs(t, s), zs((t)+100, d)
88 static struct name_val const zone_names[] = {
89 zs(-1000, "hst"), /* Hawaii */
90 zd(-1000,"hast","hadt"),/* Hawaii-Aleutian */
91 zd(- 900,"akst","akdt"),/* Alaska */
92 zd(- 800, "pst", "pdt"),/* Pacific */
93 zd(- 700, "mst", "mdt"),/* Mountain */
94 zd(- 600, "cst", "cdt"),/* Central */
95 zd(- 500, "est", "edt"),/* Eastern */
96 zd(- 400, "ast", "adt"),/* Atlantic */
97 zd(- 330, "nst", "ndt"),/* Newfoundland */
98 zs( 000, "utc"), /* Coordinated Universal */
99 zs( 000, "cut"), /* " */
100 zs( 000, "ut"), /* Universal */
101 zs( 000, "z"), /* Zulu (required by ISO 8601) */
102 zd( 000, "gmt", "bst"),/* Greenwich Mean, British Summer */
103 zs( 000, "wet"), /* Western Europe */
104 zs( 100, "met"), /* Middle Europe */
105 zs( 100, "cet"), /* Central Europe */
106 zs( 200, "eet"), /* Eastern Europe */
107 zs( 530, "ist"), /* India */
108 zd( 900, "jst", "jdt"),/* Japan */
109 zd( 900, "kst", "kdt"),/* Korea */
110 zd( 1200,"nzst","nzdt"),/* New Zealand */
113 /* The following names are duplicates or are not well attested. */
114 zs(-1100, "sst"), /* Samoa */
115 zs(-1000, "tht"), /* Tahiti */
116 zs(- 930, "mqt"), /* Marquesas */
117 zs(- 900, "gbt"), /* Gambier */
118 zd(- 900, "yst", "ydt"),/* Yukon - name is no longer used */
119 zs(- 830, "pit"), /* Pitcairn */
120 zd(- 500, "cst", "cdt"),/* Cuba */
121 zd(- 500, "ast", "adt"),/* Acre */
122 zd(- 400, "wst", "wdt"),/* Western Brazil */
123 zd(- 400, "ast", "adt"),/* Andes */
124 zd(- 400, "cst", "cdt"),/* Chile */
125 zs(- 300, "wgt"), /* Western Greenland */
126 zd(- 300, "est", "edt"),/* Eastern South America */
127 zs(- 300, "mgt"), /* Middle Greenland */
128 zd(- 200, "fst", "fdt"),/* Fernando de Noronha */
129 zs(- 100, "egt"), /* Eastern Greenland */
130 zs(- 100, "aat"), /* Atlantic Africa */
131 zs(- 100, "act"), /* Azores and Canaries */
132 zs( 000, "wat"), /* West Africa */
133 zs( 100, "cat"), /* Central Africa */
134 zd( 100, "mez","mesz"),/* Mittel-Europaeische Zeit */
135 zs( 200, "sat"), /* South Africa */
136 zd( 200, "ist", "idt"),/* Israel */
137 zs( 300, "eat"), /* East Africa */
138 zd( 300, "ast", "adt"),/* Arabia */
139 zd( 300, "msk", "msd"),/* Moscow */
140 zd( 330, "ist", "idt"),/* Iran */
141 zs( 400, "gst"), /* Gulf */
142 zs( 400, "smt"), /* Seychelles & Mascarene */
143 zd( 400, "esk", "esd"),/* Yekaterinburg */
144 zd( 400, "bsk", "bsd"),/* Baku */
145 zs( 430, "aft"), /* Afghanistan */
146 zd( 500, "osk", "osd"),/* Omsk */
147 zs( 500, "pkt"), /* Pakistan */
148 zd( 500, "tsk", "tsd"),/* Tashkent */
149 zs( 545, "npt"), /* Nepal */
150 zs( 600, "bgt"), /* Bangladesh */
151 zd( 600, "nsk", "nsd"),/* Novosibirsk */
152 zs( 630, "bmt"), /* Burma */
153 zs( 630, "cct"), /* Cocos */
154 zs( 700, "ict"), /* Indochina */
155 zs( 700, "jvt"), /* Java */
156 zd( 700, "isk", "isd"),/* Irkutsk */
157 zs( 800, "hkt"), /* Hong Kong */
158 zs( 800, "pst"), /* Philippines */
159 zs( 800, "sgt"), /* Singapore */
160 zd( 800, "cst", "cdt"),/* China */
161 zd( 800, "ust", "udt"),/* Ulan Bator */
162 zd( 800, "wst", "wst"),/* Western Australia */
163 zd( 800, "ysk", "ysd"),/* Yakutsk */
164 zs( 900, "blt"), /* Belau */
165 zs( 900, "mlt"), /* Moluccas */
166 zd( 900, "vsk", "vsd"),/* Vladivostok */
167 zd( 930, "cst", "cst"),/* Central Australia */
168 zs( 1000, "gst"), /* Guam */
169 zd( 1000, "gsk", "gsd"),/* Magadan */
170 zd( 1000, "est", "est"),/* Eastern Australia */
171 zd( 1100,"lhst","lhst"),/* Lord Howe */
172 zd( 1100, "psk", "psd"),/* Petropavlovsk-Kamchatski */
173 zs( 1100,"ncst"), /* New Caledonia */
174 zs( 1130,"nrft"), /* Norfolk */
175 zd( 1200, "ask", "asd"),/* Anadyr */
176 zs( 1245,"nz-chat"), /* Chatham */
177 zs( 1300, "tgt"), /* Tongatapu */
185 struct name_val const table[];
186 /* Look for a prefix of S in TABLE, returning val for first matching entry. */
189 char buf[NAME_LENGTH_MAXIMUM];
191 for (j = 0; j < NAME_LENGTH_MAXIMUM; j++) {
192 unsigned char c = *s++;
193 buf[j] = isupper (c) ? tolower (c) : c;
197 for (; table[0].name[0]; table++)
198 for (j = 0; buf[j] == table[0].name[j]; )
199 if (++j == NAME_LENGTH_MAXIMUM || !table[0].name[j])
207 undefine (t) struct partime *t;
208 /* Set *T to ``undefined'' values. */
210 t->tm.tm_sec = t->tm.tm_min = t->tm.tm_hour = t->tm.tm_mday = t->tm.tm_mon
211 = t->tm.tm_year = t->tm.tm_wday = t->tm.tm_yday
212 = t->ymodulus = t->yweek
214 t->zone = TM_UNDEFINED_ZONE;
218 * Array of patterns to look for in a date string.
219 * Order is important: we look for the first matching pattern
220 * whose values do not contradict values that we already know about.
221 * See `parse_pattern_letter' below for the meaning of the pattern codes.
223 static char const * const patterns[] = {
225 * These traditional patterns must come first,
226 * to prevent an ISO 8601 format from misinterpreting their prefixes.
228 "E_n_y", "x", /* RFC 822 */
229 "E_n", "n_E", "n", "t:m:s_A", "t:m_A", "t_A", /* traditional */
230 "y/N/D$", /* traditional RCS */
232 /* ISO 8601:1988 formats, generalized a bit. */
233 "y-N-D$", "4ND$", "Y-N$",
234 "RND$", "-R=N$", "-R$", "--N=D$", "N=DT",
235 "--N$", "---D$", "DT",
236 "Y-d$", "4d$", "R=d$", "-d$", "dT",
237 "y-W-X", "yWX", "y=W",
238 "-r-W-X", "r-W-XT", "-rWX", "rWXT", "-W=X", "W=XT", "-W",
239 "-w-X", "w-XT", "---X$", "XT", "4$",
241 "h:m:s$", "hms$", "h:m$", "hm$", "h$", "-m:s$", "-ms$", "-m$", "--s$",
248 parse_prefix (str, t, pi) char const *str; struct partime *t; int *pi;
250 * Parse an initial prefix of STR, setting *T accordingly.
251 * Return the first character after the prefix, or 0 if it couldn't be parsed.
252 * Start with pattern *PI; if success, set *PI to the next pattern to try.
253 * Set *PI to -1 if we know there are no more patterns to try;
254 * if *PI is initially negative, give up immediately.
264 /* Remove initial noise. */
265 while (!isalnum (c = *str) && c != '-' && c != '+') {
274 /* Try a pattern until one succeeds. */
275 while ((pat = patterns[i++]) != 0) {
283 } while ((s = parse_pattern_letter (s, c, t)) != 0);
290 parse_fixed (s, digits, res) char const *s; int digits, *res;
292 * Parse an initial prefix of S of length DIGITS; it must be a number.
293 * Store the parsed number into *RES.
294 * Return the first character after the prefix, or 0 if it couldn't be parsed.
298 char const *lim = s + digits;
300 unsigned d = *s++ - '0';
310 parse_ranged (s, digits, lo, hi, res) char const *s; int digits, lo, hi, *res;
312 * Parse an initial prefix of S of length DIGITS;
313 * it must be a number in the range LO through HI.
314 * Store the parsed number into *RES.
315 * Return the first character after the prefix, or 0 if it couldn't be parsed.
318 s = parse_fixed (s, digits, res);
319 return s && lo<=*res && *res<=hi ? s : 0;
323 parse_decimal (s, digits, lo, hi, resolution, res, fres)
325 int digits, lo, hi, resolution, *res, *fres;
327 * Parse an initial prefix of S of length DIGITS;
328 * it must be a number in the range LO through HI
329 * and it may be followed by a fraction that is to be computed using RESOLUTION.
330 * Store the parsed number into *RES; store the fraction times RESOLUTION,
331 * rounded to the nearest integer, into *FRES.
332 * Return the first character after the prefix, or 0 if it couldn't be parsed.
335 s = parse_fixed (s, digits, res);
336 if (s && lo<=*res && *res<=hi) {
338 if ((s[0]==',' || s[0]=='.') && isdigit ((unsigned char) s[1])) {
339 char const *s1 = ++s;
340 int num10 = 0, denom10 = 10, product;
341 while (isdigit ((unsigned char) *++s))
343 s = parse_fixed (s1, s - s1, &num10);
344 product = num10*resolution;
345 f = (product + (denom10>>1)) / denom10;
346 f -= f & (product%denom10 == denom10>>1); /* round to even */
347 if (f < 0 || product/resolution != num10)
348 return 0; /* overflow */
357 parzone (s, zone) char const *s; long *zone;
359 * Parse an initial prefix of S; it must denote a time zone.
360 * Set *ZONE to the number of seconds east of GMT,
361 * or to TM_LOCAL_ZONE if it is the local time zone.
362 * Return the first character after the prefix, or 0 if it couldn't be parsed.
367 int minutesEastOfUTC;
371 * The formats are LT, n, n DST, nDST, no, o
372 * where n is a time zone name
373 * and o is a time zone offset of the form [-+]hh[:mm[:ss]].
381 minutesEastOfUTC = lookup (s, zone_names);
382 if (minutesEastOfUTC == -1)
385 /* Don't bother to check rest of spelling. */
386 while (isalpha ((unsigned char) *s))
389 /* Don't modify LT. */
390 if (minutesEastOfUTC == 1) {
391 *zone = TM_LOCAL_ZONE;
395 z = minutesEastOfUTC * 60L;
397 /* Look for trailing " DST". */
399 (s[-1]=='T' || s[-1]=='t') &&
400 (s[-2]=='S' || s[-2]=='s') &&
401 (s[-3]=='D' || s[-3]=='t')
404 while (isspace ((unsigned char) *s))
407 (s[0]=='D' || s[0]=='d') &&
408 (s[1]=='S' || s[1]=='s') &&
409 (s[2]=='T' || s[2]=='t')
418 case '-': case '+': break;
419 default: return (char *) s;
424 if (!(s = parse_ranged (s, 2, 0, 23, &hh)))
429 if (isdigit ((unsigned char) *s)) {
430 if (!(s = parse_ranged (s, 2, 0, 59, &mm)))
432 if (*s==':' && s[-3]==':' && isdigit ((unsigned char) s[1])) {
433 if (!(s = parse_ranged (s + 1, 2, 0, 59, &ss)))
437 if (isdigit ((unsigned char) *s))
439 offset = (hh*60 + mm)*60L + ss;
440 *zone = z + (sign=='-' ? -offset : offset);
442 * ?? Are fractions allowed here?
443 * If so, they're not implemented.
449 parse_pattern_letter (s, c, t) char const *s; int c; struct partime *t;
451 * Parse an initial prefix of S, matching the pattern whose code is C.
452 * Set *T accordingly.
453 * Return the first character after the prefix, or 0 if it couldn't be parsed.
457 case '$': /* The next character must be a non-digit. */
458 if (isdigit ((unsigned char) *s))
462 case '-': case '/': case ':':
463 /* These characters stand for themselves. */
468 case '4': /* 4-digit year */
469 s = parse_fixed (s, 4, &t->tm.tm_year);
472 case '=': /* optional '-' */
476 case 'A': /* AM or PM */
478 * This matches the regular expression [AaPp][Mm]?.
479 * It must not be followed by a letter or digit;
480 * otherwise it would match prefixes of strings like "PST".
484 if (t->tm.tm_hour == 12)
489 if (t->tm.tm_hour != 12)
496 case 'M': case 'm': s++; break;
502 case 'D': /* day of month [01-31] */
503 s = parse_ranged (s, 2, 1, 31, &t->tm.tm_mday);
506 case 'd': /* day of year [001-366] */
507 s = parse_ranged (s, 3, 1, 366, &t->tm.tm_yday);
511 case 'E': /* extended day of month [1-9, 01-31] */
512 s = parse_ranged (s, (
513 isdigit ((unsigned char) s[0]) &&
514 isdigit ((unsigned char) s[1])
515 ) + 1, 1, 31, &t->tm.tm_mday);
518 case 'h': /* hour [00-23 followed by optional fraction] */
521 s = parse_decimal (s, 2, 0, 23, 60*60, &t->tm.tm_hour, &frac);
522 t->tm.tm_min = frac / 60;
523 t->tm.tm_sec = frac % 60;
527 case 'm': /* minute [00-59 followed by optional fraction] */
528 s = parse_decimal (s, 2, 0, 59, 60, &t->tm.tm_min, &t->tm.tm_sec);
531 case 'n': /* month name [e.g. "Jan"] */
532 if (!TM_DEFINED (t->tm.tm_mon = lookup (s, month_names)))
534 /* Don't bother to check rest of spelling. */
535 while (isalpha ((unsigned char) *s))
539 case 'N': /* month [01-12] */
540 s = parse_ranged (s, 2, 1, 12, &t->tm.tm_mon);
544 case 'r': /* year % 10 (remainder in origin-0 decade) [0-9] */
545 s = parse_fixed (s, 1, &t->tm.tm_year);
550 case 'R': /* year % 100 (remainder in origin-0 century) [00-99] */
551 s = parse_fixed (s, 2, &t->tm.tm_year);
555 case 's': /* second [00-60 followed by optional fraction] */
558 s = parse_decimal (s, 2, 0, 60, 1, &t->tm.tm_sec, &frac);
559 t->tm.tm_sec += frac;
563 case 'T': /* 'T' or 't' */
565 case 'T': case 't': break;
570 case 't': /* traditional hour [1-9 or 01-12] */
571 s = parse_ranged (s, (
572 isdigit ((unsigned char) s[0]) && isdigit ((unsigned char) s[1])
573 ) + 1, 1, 12, &t->tm.tm_hour);
576 case 'w': /* 'W' or 'w' only (stands for current week) */
578 case 'W': case 'w': break;
583 case 'W': /* 'W' or 'w', followed by a week of year [00-53] */
585 case 'W': case 'w': break;
588 s = parse_ranged (s, 2, 0, 53, &t->yweek);
591 case 'X': /* weekday (1=Mon ... 7=Sun) [1-7] */
592 s = parse_ranged (s, 1, 1, 7, &t->tm.tm_wday);
596 case 'x': /* weekday name [e.g. "Sun"] */
597 if (!TM_DEFINED (t->tm.tm_wday = lookup (s, weekday_names)))
599 /* Don't bother to check rest of spelling. */
600 while (isalpha ((unsigned char) *s))
604 case 'y': /* either R or Y */
606 isdigit ((unsigned char) s[0]) &&
607 isdigit ((unsigned char) s[1]) &&
608 !isdigit ((unsigned char) s[2])
612 case 'Y': /* year in full [4 or more digits] */
615 while (isdigit ((unsigned char) s[len]))
619 s = parse_fixed (s, len, &t->tm.tm_year);
623 case 'Z': /* time zone */
624 s = parzone (s, &t->zone);
627 case '_': /* possibly empty sequence of non-alphanumerics */
628 while (!isalnum (*s) && *s)
632 default: /* bad pattern */
639 merge_partime (t, u) struct partime *t; struct partime const *u;
641 * If there is no conflict, merge into *T the additional information in *U
642 * and return 0. Otherwise do nothing and return -1.
645 # define conflict(a,b) ((a) != (b) && TM_DEFINED (a) && TM_DEFINED (b))
647 conflict (t->tm.tm_sec, u->tm.tm_sec) ||
648 conflict (t->tm.tm_min, u->tm.tm_min) ||
649 conflict (t->tm.tm_hour, u->tm.tm_hour) ||
650 conflict (t->tm.tm_mday, u->tm.tm_mday) ||
651 conflict (t->tm.tm_mon, u->tm.tm_mon) ||
652 conflict (t->tm.tm_year, u->tm.tm_year) ||
653 conflict (t->tm.tm_wday, u->tm.tm_yday) ||
654 conflict (t->ymodulus, u->ymodulus) ||
655 conflict (t->yweek, u->yweek) ||
657 t->zone != u->zone &&
658 t->zone != TM_UNDEFINED_ZONE &&
659 u->zone != TM_UNDEFINED_ZONE
664 # define merge_(a,b) if (TM_DEFINED (b)) (a) = (b);
665 merge_ (t->tm.tm_sec, u->tm.tm_sec)
666 merge_ (t->tm.tm_min, u->tm.tm_min)
667 merge_ (t->tm.tm_hour, u->tm.tm_hour)
668 merge_ (t->tm.tm_mday, u->tm.tm_mday)
669 merge_ (t->tm.tm_mon, u->tm.tm_mon)
670 merge_ (t->tm.tm_year, u->tm.tm_year)
671 merge_ (t->tm.tm_wday, u->tm.tm_yday)
672 merge_ (t->ymodulus, u->ymodulus)
673 merge_ (t->yweek, u->yweek)
675 if (u->zone != TM_UNDEFINED_ZONE) t->zone = u->zone;
680 partime (s, t) char const *s; struct partime *t;
682 * Parse a date/time prefix of S, putting the parsed result into *T.
683 * Return the first character after the prefix.
684 * The prefix may contain no useful information;
685 * in that case, *T will contain only undefined values.
695 if (!(s1 = parse_prefix (s, &p, &i)))
697 } while (merge_partime (t, &p) != 0);