1 # Convert tzdata source into vanguard or rearguard form.
3 # Contributed by Paul Eggert. This file is in the public domain.
5 # This is not a general-purpose converter; it is designed for current tzdata.
6 # It just converts from current source to main, vanguard, and rearguard forms.
7 # Although it might be nice for it to be idempotent, or to be useful
8 # for converting back and forth between vanguard and rearguard formats,
9 # it does not do these nonessential tasks now.
11 # Although main and vanguard forms are currently equivalent,
12 # this need not always be the case. When the two forms differ,
13 # this script can convert either from main to vanguard form (needed then),
14 # or from vanguard to main form (this conversion would be needed later,
15 # after main became rearguard and vanguard became main).
16 # There is no need to convert rearguard to other forms.
18 # When converting to vanguard form, the output can use the line
19 # "Zone GMT 0 - GMT" which TZUpdater 2.3.2 mistakenly rejects.
21 # When converting to vanguard form, the output can use negative SAVE
24 # When converting to rearguard form, the output uses only nonnegative
25 # SAVE values. The idea is for the output data to simulate the behavior
26 # of the input data as best it can within the constraints of the
29 # Given a FIELD like "-0:30", return a minute count like -30.
30 function get_minutes(field, \
33 sign = field ~ /^-/ ? -1 : 1
37 sub(/[^:]*:/, "", minutes)
39 return 60 * hours + sign * minutes
42 # Given an OFFSET, which is a minute count like 300 or 330,
43 # return a %z-style abbreviation like "+05" or "+0530".
44 function offset_abbr(offset, \
47 hours = int(offset / 60)
50 return sprintf("%+.4d", hours * 100 + minutes);
52 return sprintf("%+.2d", hours)
56 # Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second.
57 function round_to_second(timestamp, \
58 hh, mm, ss, seconds, dot_dddd, subseconds)
61 if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd))
63 hh = mm = ss = timestamp
64 sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss)
65 sub(/^[-+]?[0-9]+:/, "", mm)
67 seconds = 3600 * hh + 60 * mm + ss
68 subseconds = +dot_dddd
69 seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2));
70 return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \
71 seconds / 3600, seconds / 60 % 60, seconds % 60)
75 dataform_type["vanguard"] = 1
76 dataform_type["main"] = 1
77 dataform_type["rearguard"] = 1
80 while (getline <PACKRATLIST) {
81 if ($0 ~ /^#/) continue
86 # The command line should set DATAFORM.
87 if (!dataform_type[DATAFORM]) exit 1
90 $1 == "#PACKRATLIST" && $2 == PACKRATLIST {
91 sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "")
97 in_comment = $0 ~ /^#/
98 uncomment = comment_out = 0
100 # If this line should differ due to Czechoslovakia using negative SAVE values,
101 # uncomment the desired version and comment out the undesired one.
102 if (zone == "Europe/Prague" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
103 && $0 ~ /1947 Feb 23/) {
104 if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) {
105 uncomment = in_comment
107 comment_out = !in_comment
111 # If this line should differ due to Ireland using negative SAVE values,
112 # uncomment the desired version and comment out the undesired one.
113 Rule_Eire = $0 ~ /^#?Rule[\t ]+Eire[\t ]/
114 Zone_Dublin_post_1968 \
115 = (zone == "Europe/Dublin" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
116 && (!$(in_comment + 4) || 1968 < $(in_comment + 4)))
117 if (Rule_Eire || Zone_Dublin_post_1968) {
119 || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \
120 == (DATAFORM != "rearguard")) {
121 uncomment = in_comment
123 comment_out = !in_comment
127 # If this line should differ due to Namibia using negative SAVE values,
128 # uncomment the desired version and comment out the undesired one.
129 Rule_Namibia = $0 ~ /^#?Rule[\t ]+Namibia[\t ]/
130 Zone_using_Namibia_rule \
131 = (zone == "Africa/Windhoek" && $0 ~ /^#?[\t ]+[12]:00[\t ]/ \
132 && ($(in_comment + 2) == "Namibia" \
133 || ($(in_comment + 2) == "-" && $(in_comment + 3) == "CAT" \
134 && ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \
135 || in_comment + 3 == NF))))
136 if (Rule_Namibia || Zone_using_Namibia_rule) {
138 ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \
139 : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \
140 == (DATAFORM != "rearguard")) {
141 uncomment = in_comment
143 comment_out = !in_comment
147 # If this line should differ due to Portugal benefiting from %z if supported,
148 # uncomment the desired version and comment out the undesired one.
149 if ($0 ~ /^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) {
150 if (($0 ~ /%z/) == (DATAFORM == "vanguard")) {
151 uncomment = in_comment
153 comment_out = !in_comment
157 # In vanguard form, use the line "Zone GMT 0 - GMT" instead of
158 # "Zone Etc/GMT 0 - GMT" and adjust Link lines accordingly.
159 # This works around a bug in TZUpdater 2.3.2.
160 if (/^#?(Zone|Link)[\t ]+(Etc\/)?GMT[\t ]/) {
161 if (($2 == "GMT") == (DATAFORM == "vanguard")) {
162 uncomment = in_comment
164 comment_out = !in_comment
175 # Prefer %z in vanguard form, explicit abbreviations otherwise.
176 if (DATAFORM == "vanguard") {
177 sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \
179 sub(/-00CHANGE-TO-%z/, "-00")
180 sub(/[-+][^\t ]+CHANGE-TO-/, "")
182 if ($0 ~ /^[^#]*%z/) {
183 stdoff_column = 2 * ($0 ~ /^Zone/) + 1
184 rules_column = stdoff_column + 1
185 stdoff = get_minutes($stdoff_column)
186 rules = $rules_column
187 stdabbr = offset_abbr(stdoff)
191 dstabbr_only = rules ~ /^[+0-9-]/
193 dstoff = get_minutes(rules)
195 # The DST offset is normally an hour, but there are special cases.
196 if (rules == "Morocco" && NF == 3) {
198 } else if (rules == "NBorneo") {
200 } else if (((rules == "Cook" || rules == "LH") && NF == 3) \
201 || (rules == "Uruguay" \
202 && $0 ~ /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) {
204 } else if (rules == "Uruguay" && $0 ~ /[\t ]1974 Mar 10$/) {
210 dstabbr = offset_abbr(stdoff + dstoff)
214 abbr = stdabbr "/" dstabbr
221 # Normally, prefer whole seconds. However, prefer subseconds
222 # if generating vanguard form and the otherwise-undocumented
223 # VANGUARD_SUBSECONDS environment variable is set.
224 # This relies on #STDOFF comment lines in the data.
225 # It is for hypothetical clients that support UT offsets that are
226 # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912).
227 # No known clients need this currently, and this experimental
228 # feature may be changed or withdrawn in future releases.
229 if ($1 == "#STDOFF") {
231 rounded_stdoff = round_to_second(stdoff)
232 if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) {
233 stdoff_subst[0] = rounded_stdoff
234 stdoff_subst[1] = stdoff
236 stdoff_subst[0] = stdoff
237 stdoff_subst[1] = rounded_stdoff
239 } else if (stdoff_subst[0]) {
240 stdoff_column = 2 * ($0 ~ /^Zone/) + 1
241 stdoff_column_val = $stdoff_column
242 if (stdoff_column_val == stdoff_subst[0]) {
243 sub(stdoff_subst[0], stdoff_subst[1])
244 } else if (stdoff_column_val != stdoff_subst[1]) {
249 # In rearguard form, change the Japan rule line with "Sat>=8 25:00"
250 # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java.
251 if ($0 ~ /^Rule/ && $2 == "Japan") {
252 if (DATAFORM == "rearguard") {
253 if ($7 == "Sat>=8" && $8 == "25:00") {
254 sub(/Sat>=8/, "Sun>=9")
255 sub(/25:00/, " 1:00")
258 if ($7 == "Sun>=9" && $8 == "1:00") {
259 sub(/Sun>=9/, "Sat>=8")
260 sub(/ 1:00/, "25:00")
265 # In rearguard form, change the Morocco lines with negative SAVE values
266 # to use positive SAVE values.
267 if ($2 == "Morocco") {
269 if ($4 ~ /^201[78]$/ && $6 == "Oct") {
270 if (DATAFORM == "rearguard") {
271 sub(/\t2018\t/, "\t2017\t")
273 sub(/\t2017\t/, "\t2018\t")
279 if (DATAFORM == "rearguard") {
280 sub(/\t0\t/, "\t1:00\t")
282 sub(/\t1:00\t/, "\t0\t")
285 if (DATAFORM == "rearguard") {
286 sub(/\t-1:00\t/, "\t0\t")
288 sub(/\t0\t/, "\t-1:00\t")
293 if ($1 ~ /^[+0-9-]/ && NF == 3) {
294 if (DATAFORM == "rearguard") {
295 sub(/1:00\tMorocco/, "0:00\tMorocco")
296 sub(/\t\+01\/\+00$/, "\t+00/+01")
298 sub(/0:00\tMorocco/, "1:00\tMorocco")
299 sub(/\t\+00\/+01$/, "\t+01/+00")
306 packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2];
309 if (packrat_ignored && $0 !~ /^Rule/) {
314 # Return a link line resulting by changing OLDLINE to link to TARGET
315 # from LINKNAME, instead of linking to OLDTARGET from LINKNAME.
316 # Align data columns the same as they were in OLDLINE.
317 # Also, replace any existing white space followed by comment with COMMENT.
318 function make_linkline(oldline, target, linkname, oldtarget, comment, \
319 oldprefix, oldprefixlen, oldtargettabs, \
320 replsuffix, targettabs)
322 oldprefix = "Link\t" oldtarget "\t"
323 oldprefixlen = length(oldprefix)
324 if (substr(oldline, 1, oldprefixlen) == oldprefix) {
325 # Use tab stops to preserve LINKNAME's column.
326 replsuffix = substr(oldline, oldprefixlen + 1)
327 sub(/[\t ]*#.*/, "", replsuffix)
328 oldtargettabs = int(length(oldtarget) / 8) + 1
329 targettabs = int(length(target) / 8) + 1
330 for (; targettabs < oldtargettabs; targettabs++) {
331 replsuffix = "\t" replsuffix
333 for (; oldtargettabs < targettabs && replsuffix ~ /^\t/; targettabs--) {
334 replsuffix = substr(replsuffix, 2)
337 # Odd format line; don't bother lining up its replacement nicely.
338 replsuffix = linkname
340 return "Link\t" target "\t" replsuffix comment
343 /^Link/ && $4 == "#=" && DATAFORM == "vanguard" {
344 $0 = make_linkline($0, $5, $3, $2)
347 # If a Link line is followed by a Link or Zone line for the same data, comment
348 # out the Link line. This can happen if backzone overrides a Link
349 # with a Zone or a different Link.
351 sub(/^Link/, "#Link", line[linkline[$2]])
354 sub(/^Link/, "#Link", line[linkline[$3]])
361 function cut_link_chains_short( \
362 l, linkname, t, target)
364 for (linkname in linktarget) {
365 target = linktarget[linkname]
366 t = linktarget[target]
368 # TARGET is itself a link name. Replace the line "Link TARGET LINKNAME"
369 # with "Link T LINKNAME #= TARGET", where T is at the end of the chain
370 # of links that LINKNAME points to.
371 while ((u = linktarget[t])) {
374 l = linkline[linkname]
375 line[l] = make_linkline(line[l], t, linkname, target, "\t#= " target)
381 if (DATAFORM != "vanguard") {
382 cut_link_chains_short()
384 for (i = 1; i <= NR; i++)