1 # Convert tzdata source into a smaller version of itself.
3 # Contributed by Paul Eggert. This file is in the public domain.
5 # This is not a general-purpose converter; it is designed for current tzdata.
6 # 'zic' should treat this script's output as if it were identical to
9 # Record a hash N for the new name NAME, checking for collisions.
11 function record_hash(n, name)
14 printf "# ! collision: %s %s\n", used_hashes[n], name
20 # Return a shortened rule name representing NAME,
21 # and record this relationship to the hash table.
23 function gen_rule_name(name, \
26 # Use a simple mnemonic: the first two letters.
27 n = substr(name, 1, 2)
29 # printf "# %s = %s\n", n, name
33 function prehash_rule_names( \
36 # Rule names are not part of the tzdb API, so substitute shorter
37 # ones. Shortening them consistently from one release to the next
38 # simplifies comparison of the output. That being said, the
39 # 1-letter names below are not standardized in any way, and can
40 # change arbitrarily from one release to the next, as the main goal
41 # here is compression not comparison.
43 # Abbreviating these rules names to one letter saved the most space
55 rule["Egypt"] = "K" # "Kemet" in ancient Egyptian
59 rule["Poland"] = "O" # arbitrary
60 rule["Palestine"] = "P"
61 rule["Cuba"] = "Q" # Its start sounds like "Q".
66 rule["Vincennes"] = "V"
68 rule["Mongol"] = "X" # arbitrary
74 rule["Algeria"] = "d" # country code DZ
76 rule["Taiwan"] = "f" # Formosa
81 rule["Chatham"] = "k" # arbitrary
84 rule["Tunisia"] = "n" # country code TN
85 rule["Moncton"] = "o" # arbitrary
87 rule["Albania"] = "q" # arbitrary
92 rule["Louisville"] = "v" # ville
93 rule["Iceland"] = "w" # arbitrary
94 rule["Chile"] = "x" # arbitrary
95 rule["Para"] = "y" # country code PY
96 rule["Romania"] = "z" # arbitrary
97 rule["Macau"] = "_" # arbitrary
99 # Use ISO 3166 alpha-2 country codes for remaining names that are countries.
100 # This is more systematic, and avoids collisions (e.g., Malta and Moldova).
101 rule["Armenia"] = "AM"
107 rule["Bahamas"] = "BS"
108 rule["Belize"] = "BZ"
112 rule["Cyprus"] = "CY"
114 rule["Germany"] = "DE"
116 rule["Ecuador"] = "EC"
117 rule["Finland"] = "FI"
127 rule["Kyrgyz"] = "KG"
129 rule["Latvia"] = "LV"
131 rule["Moldova"] = "MD"
133 rule["Mauritius"] = "MU"
134 rule["Namibia"] = "NA"
136 rule["Norway"] = "NO"
139 rule["Pakistan"] = "PK"
143 rule["Vanuatu"] = "VU"
146 rule["Detroit"] = "Dt" # De = Denver
149 record_hash(rule[name], name)
153 function make_line(n, field, \
157 for (f = 2; f <= n; f++)
162 # Process the input line LINE and save it for later output.
164 function process_input_line(line, \
165 f, field, end, n, outline, r, \
166 linkline, ruleline, zoneline)
168 # Remove comments, normalize spaces, and append a space to each line.
171 gsub(/[\t ]+/, " ", line)
173 # Abbreviate keywords and determine line type.
174 linkline = sub(/^Link /, "L ", line)
175 ruleline = sub(/^Rule /, "R ", line)
176 zoneline = sub(/^Zone /, "Z ", line)
178 # Replace FooAsia rules with the same rules without "Asia", as they
180 if (match(line, /[^ ]Asia /)) {
182 line = substr(line, 1, RSTART) substr(line, RSTART + 5)
186 while (match(line, /[: ]0+[0-9]/))
187 line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1)
188 while (match(line, /:0[^:]/))
189 line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2)
191 # Abbreviate weekday names.
192 while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) {
193 end = RSTART + RLENGTH
194 line = substr(line, 1, end - 4) substr(line, end - 1)
196 while (match(line, / (last)?(Sun|Tue|Thu|Sat)[ <>]/)) {
197 end = RSTART + RLENGTH
198 line = substr(line, 1, end - 3) substr(line, end - 1)
201 # Abbreviate "max", "min", "only" and month names.
202 # Although "max" and "min" can both be abbreviated to just "m",
203 # the longer forms "ma" and "mi" are needed with zic 2023d and earlier.
204 gsub(/ max /, dataform == "vanguard" ? " m " : " ma ", line)
205 gsub(/ min /, dataform == "vanguard" ? " m " : " mi ", line)
206 gsub(/ only /, " o ", line)
207 gsub(/ Jan /, " Ja ", line)
208 gsub(/ Feb /, " F ", line)
209 gsub(/ Apr /, " Ap ", line)
210 gsub(/ Aug /, " Au ", line)
211 gsub(/ Sep /, " S ", line)
212 gsub(/ Oct /, " O ", line)
213 gsub(/ Nov /, " N ", line)
214 gsub(/ Dec /, " D ", line)
216 # Strip leading and trailing space.
220 # Remove unnecessary trailing zero fields.
221 sub(/ 0+$/, "", line)
223 # Remove unnecessary trailing days-of-month "1".
224 if (match(line, /[A-Za-z] 1$/))
225 line = substr(line, 1, RSTART)
227 # Remove unnecessary trailing " Ja" (for January).
228 sub(/ Ja$/, "", line)
230 n = split(line, field)
232 # Record which rule names are used, and generate their abbreviations.
233 f = zoneline ? 4 : linkline || ruleline ? 0 : 2
235 if (r ~ /^[^-+0-9]/) {
240 zonename = startdef = field[2]
242 zonename = startdef = field[3]
246 # Save the information for later output.
247 outline = make_line(n, field)
249 rule_output_line[nrule_out++] = outline
251 # In vanguard format with Gawk, links are output sorted by destination.
252 if (dataform == "vanguard" && PROCINFO["version"])
253 linkdef[zonename] = field[2]
255 link_output_line[nlink_out++] = outline
257 zonedef[zonename] = (zoneline ? "" : zonedef[zonename] "\n") outline
260 function omit_unused_rules( \
263 for (i = 0; i < nrule_out; i++) {
264 split(rule_output_line[i], field)
265 if (!rule_used[field[2]])
266 rule_output_line[i] = ""
270 function abbreviate_rule_names( \
271 abbr, f, field, i, n, newdef, newline, r, \
272 zoneline, zonelines, zonename)
274 for (i = 0; i < nrule_out; i++) {
275 n = split(rule_output_line[i], field)
278 if (r ~ /^[^-+0-9]/) {
281 rule[r] = abbr = gen_rule_name(r)
284 rule_output_line[i] = make_line(n, field)
288 for (zonename in zonedef) {
289 zonelines = split(zonedef[zonename], zoneline, /\n/)
291 for (i = 1; i <= zonelines; i++) {
292 newline = zoneline[i]
293 n = split(newline, field)
298 newline = make_line(n, field)
300 newdef = (newdef ? newdef "\n" : "") newline
302 zonedef[zonename] = newdef
306 function output_saved_lines( \
309 for (i = 0; i < nrule_out; i++)
310 if (rule_output_line[i])
311 print rule_output_line[i]
313 # When using gawk, output zones sorted by name.
314 # This makes the output a bit more compressible.
315 PROCINFO["sorted_in"] = "@ind_str_asc"
316 for (zonename in zonedef)
317 print zonedef[zonename]
320 for (i = 0; i < nlink_out; i++)
321 print link_output_line[i]
323 # When using gawk, output links sorted by destination.
324 # This also helps compressibility a bit.
325 PROCINFO["sorted_in"] = "@val_type_asc"
326 for (zonename in linkdef)
327 printf "L %s %s\n", linkdef[zonename], zonename
332 # Files that the output normally depends on.
333 default_dep["africa"] = 1
334 default_dep["antarctica"] = 1
335 default_dep["asia"] = 1
336 default_dep["australasia"] = 1
337 default_dep["backward"] = 1
338 default_dep["etcetera"] = 1
339 default_dep["europe"] = 1
340 default_dep["factory"] = 1
341 default_dep["northamerica"] = 1
342 default_dep["southamerica"] = 1
343 default_dep["ziguard.awk"] = 1
344 default_dep["zishrink.awk"] = 1
346 # Output a version string from 'version' and related configuration variables
347 # supported by tzdb's Makefile. If you change the makefile or any other files
348 # that affect the output of this script, you should append '-SOMETHING'
349 # to the contents of 'version', where SOMETHING identifies what was changed.
351 ndeps = split(deps, dep)
353 for (i = 1; i <= ndeps; i++) {
354 if (default_dep[dep[i]]) {
355 default_dep[dep[i]]++
357 ddeps = ddeps " " dep[i]
360 for (d in default_dep) {
361 if (default_dep[d] == 1) {
365 print "# version", version
366 if (dataform != "main") {
367 print "# dataform", dataform
369 if (redo != "posix_right") {
373 print "# ddeps" ddeps
375 print "# This zic input file is in the public domain."
381 process_input_line($0)
386 abbreviate_rule_names()