diff options
Diffstat (limited to 'contrib/tzdata/zishrink.awk')
-rw-r--r-- | contrib/tzdata/zishrink.awk | 203 |
1 files changed, 183 insertions, 20 deletions
diff --git a/contrib/tzdata/zishrink.awk b/contrib/tzdata/zishrink.awk index d617644..8876b68 100644 --- a/contrib/tzdata/zishrink.awk +++ b/contrib/tzdata/zishrink.awk @@ -6,28 +6,146 @@ # 'zic' should treat this script's output as if it were identical to # this script's input. +# Record a hash N for the new name NAME, checking for collisions. -# Return a new rule name. -# N_RULE_NAMES keeps track of how many rule names have been generated. +function record_hash(n, name) +{ + if (used_hashes[n]) { + printf "# ! collision: %s %s\n", used_hashes[n], name + exit 1 + } + used_hashes[n] = name +} -function gen_rule_name(alphabet, base, rule_name, n, digit) +# Return a shortened rule name representing NAME, +# and record this relationship to the hash table. + +function gen_rule_name(name, n) { - alphabet = "" - alphabet = alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - alphabet = alphabet "abcdefghijklmnopqrstuvwxyz" - alphabet = alphabet "!$%&'()*+,./:;<=>?@[\\]^_`{|}~" - base = length(alphabet) - rule_name = "" - n = n_rule_names++ - - do { - n -= rule_name && n <= base - digit = n % base - rule_name = substr(alphabet, digit + 1, 1) rule_name - n = (n - digit) / base - } while (n); - - return rule_name + # Use a simple memonic: the first two letters. + n = substr(name, 1, 2) + record_hash(n, name) + # printf "# %s = %s\n", n, name + return n +} + +function prehash_rule_names(name) +{ + # Rule names are not part of the tzdb API, so substitute shorter + # ones. Shortening them consistently from one release to the next + # simplifies comparison of the output. That being said, the + # 1-letter names below are not standardized in any way, and can + # change arbitrarily from one release to the next, as the main goal + # here is compression not comparison. + + # Abbreviating these rules names to one letter saved the most space + # circa 2018e. + rule["Arg"] = "A" + rule["Brazil"] = "B" + rule["Canada"] = "C" + rule["Denmark"] = "D" + rule["EU"] = "E" + rule["France"] = "F" + rule["GB-Eire"] = "G" + rule["Halifax"] = "H" + rule["Italy"] = "I" + rule["Jordan"] = "J" + rule["Egypt"] = "K" # "Kemet" in ancient Egyptian + rule["Libya"] = "L" + rule["Morocco"] = "M" + rule["Neth"] = "N" + rule["Poland"] = "O" # arbitrary + rule["Palestine"] = "P" + rule["Cuba"] = "Q" # Its start sounds like "Q". + rule["Russia"] = "R" + rule["Syria"] = "S" + rule["Turkey"] = "T" + rule["Uruguay"] = "U" + rule["Vincennes"] = "V" + rule["Winn"] = "W" + rule["Mongol"] = "X" # arbitrary + rule["NT_YK"] = "Y" + rule["Zion"] = "Z" + rule["Austria"] = "a" + rule["Belgium"] = "b" + rule["C-Eur"] = "c" + rule["Algeria"] = "d" # country code DZ + rule["E-Eur"] = "e" + rule["Taiwan"] = "f" # Formosa + rule["Greece"] = "g" + rule["Hungary"] = "h" + rule["Iran"] = "i" + rule["StJohns"] = "j" + rule["Chatham"] = "k" # arbitrary + rule["Lebanon"] = "l" + rule["Mexico"] = "m" + rule["Tunisia"] = "n" # country code TN + rule["Moncton"] = "o" # arbitrary + rule["Port"] = "p" + rule["Albania"] = "q" # arbitrary + rule["Regina"] = "r" + rule["Spain"] = "s" + rule["Toronto"] = "t" + rule["US"] = "u" + rule["Louisville"] = "v" # ville + rule["Iceland"] = "w" # arbitrary + rule["Chile"] = "x" # arbitrary + rule["Para"] = "y" # country code PY + rule["Romania"] = "z" # arbitrary + rule["Macau"] = "_" # arbitrary + + # Use ISO 3166 alpha-2 country codes for remaining names that are countries. + # This is more systematic, and avoids collisions (e.g., Malta and Moldova). + rule["Armenia"] = "AM" + rule["Aus"] = "AU" + rule["Azer"] = "AZ" + rule["Barb"] = "BB" + rule["Dhaka"] = "BD" + rule["Bulg"] = "BG" + rule["Bahamas"] = "BS" + rule["Belize"] = "BZ" + rule["Swiss"] = "CH" + rule["Cook"] = "CK" + rule["PRC"] = "CN" + rule["Cyprus"] = "CY" + rule["Czech"] = "CZ" + rule["Germany"] = "DE" + rule["DR"] = "DO" + rule["Ecuador"] = "EC" + rule["Finland"] = "FI" + rule["Fiji"] = "FJ" + rule["Falk"] = "FK" + rule["Ghana"] = "GH" + rule["Guat"] = "GT" + rule["Hond"] = "HN" + rule["Haiti"] = "HT" + rule["Eire"] = "IE" + rule["Iraq"] = "IQ" + rule["Japan"] = "JP" + rule["Kyrgyz"] = "KG" + rule["ROK"] = "KR" + rule["Latvia"] = "LV" + rule["Lux"] = "LX" + rule["Moldova"] = "MD" + rule["Malta"] = "MT" + rule["Mauritius"] = "MU" + rule["Namibia"] = "NA" + rule["Nic"] = "NI" + rule["Norway"] = "NO" + rule["Peru"] = "PE" + rule["Phil"] = "PH" + rule["Pakistan"] = "PK" + rule["Sudan"] = "SD" + rule["Salv"] = "SV" + rule["Tonga"] = "TO" + rule["Vanuatu"] = "VU" + + # Avoid collisions. + rule["Detroit"] = "Dt" # De = Denver + + for (name in rule) { + record_hash(rule[name], name) + } } # Process an input line and save it for later output. @@ -106,7 +224,7 @@ function process_input_line(line, field, end, i, n, startdef) i = field[1] == "Z" ? 4 : field[1] == "Li" ? 0 : 2 if (i && field[i] ~ /^[^-+0-9]/) { if (!rule[field[i]]) - rule[field[i]] = gen_rule_name() + rule[field[i]] = gen_rule_name(field[i]) field[i] = rule[field[i]] } @@ -144,8 +262,53 @@ function output_saved_lines(i) } BEGIN { + # Files that the output normally depends on. + default_dep["africa"] = 1 + default_dep["antarctica"] = 1 + default_dep["asia"] = 1 + default_dep["australasia"] = 1 + default_dep["backward"] = 1 + default_dep["etcetera"] = 1 + default_dep["europe"] = 1 + default_dep["factory"] = 1 + default_dep["northamerica"] = 1 + default_dep["southamerica"] = 1 + default_dep["systemv"] = 1 + default_dep["ziguard.awk"] = 1 + default_dep["zishrink.awk"] = 1 + + # Output a version string from 'version' and related configuration variables + # supported by tzdb's Makefile. If you change the makefile or any other files + # that affect the output of this script, you should append '-SOMETHING' + # to the contents of 'version', where SOMETHING identifies what was changed. + + ndeps = split(deps, dep) + ddeps = "" + for (i = 1; i <= ndeps; i++) { + if (default_dep[dep[i]]) { + default_dep[dep[i]]++ + } else { + ddeps = ddeps " " dep[i] + } + } + for (d in default_dep) { + if (default_dep[d] == 1) { + ddeps = ddeps " !" d + } + } print "# version", version + if (dataform != "main") { + print "# dataform", dataform + } + if (redo != "posix_right") { + print "# redo " redo + } + if (ddeps) { + print "# ddeps" ddeps + } print "# This zic input file is in the public domain." + + prehash_rule_names() } /^[\t ]*[^#\t ]/ { |