summaryrefslogtreecommitdiffstats
path: root/contrib/tzdata/zishrink.awk
blob: d617644e9cee1624eca68bced0580f4c911e3fd6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# Convert tzdata source into a smaller version of itself.

# Contributed by Paul Eggert.  This file is in the public domain.

# This is not a general-purpose converter; it is designed for current tzdata.
# 'zic' should treat this script's output as if it were identical to
# this script's input.


# Return a new rule name.
# N_RULE_NAMES keeps track of how many rule names have been generated.

function gen_rule_name(alphabet, base, rule_name, n, digit)
{
  alphabet = ""
  alphabet = alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  alphabet = alphabet "abcdefghijklmnopqrstuvwxyz"
  alphabet = alphabet "!$%&'()*+,./:;<=>?@[\\]^_`{|}~"
  base = length(alphabet)
  rule_name = ""
  n = n_rule_names++

  do {
    n -= rule_name && n <= base
    digit = n % base
    rule_name = substr(alphabet, digit + 1, 1) rule_name
    n = (n - digit) / base
  } while (n);

  return rule_name
}

# Process an input line and save it for later output.

function process_input_line(line, field, end, i, n, startdef)
{
  # Remove comments, normalize spaces, and append a space to each line.
  sub(/#.*/, "", line)
  line = line " "
  gsub(/[\t ]+/, " ", line)

  # Abbreviate keywords.  Do not abbreviate "Link" to just "L",
  # as pre-2017c zic erroneously diagnoses "Li" as ambiguous.
  sub(/^Link /, "Li ", line)
  sub(/^Rule /, "R ", line)
  sub(/^Zone /, "Z ", line)

  # SystemV rules are not needed.
  if (line ~ /^R SystemV /) return

  # Replace FooAsia rules with the same rules without "Asia", as they
  # are duplicates.
  if (match(line, /[^ ]Asia /)) {
    if (line ~ /^R /) return
    line = substr(line, 1, RSTART) substr(line, RSTART + 5)
  }

  # Abbreviate times.
  while (match(line, /[: ]0+[0-9]/))
    line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1)
  while (match(line, /:0[^:]/))
    line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2)

  # Abbreviate weekday names.  Do not abbreviate "Sun" and "Sat", as
  # pre-2017c zic erroneously diagnoses "Su" and "Sa" as ambiguous.
  while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) {
    end = RSTART + RLENGTH
    line = substr(line, 1, end - 4) substr(line, end - 1)
  }
  while (match(line, / (last)?(Tue|Thu)[ <>]/)) {
    end = RSTART + RLENGTH
    line = substr(line, 1, end - 3) substr(line, end - 1)
  }

  # Abbreviate "max", "only" and month names.
  # Do not abbreviate "min", as pre-2017c zic erroneously diagnoses "mi"
  # as ambiguous.
  gsub(/ max /, " ma ", line)
  gsub(/ only /, " o ", line)
  gsub(/ Jan /, " Ja ", line)
  gsub(/ Feb /, " F ", line)
  gsub(/ Apr /, " Ap ", line)
  gsub(/ Aug /, " Au ", line)
  gsub(/ Sep /, " S ", line)
  gsub(/ Oct /, " O ", line)
  gsub(/ Nov /, " N ", line)
  gsub(/ Dec /, " D ", line)

  # Strip leading and trailing space.
  sub(/^ /, "", line)
  sub(/ $/, "", line)

  # Remove unnecessary trailing zero fields.
  sub(/ 0+$/, "", line)

  # Remove unnecessary trailing days-of-month "1".
  if (match(line, /[A-Za-z] 1$/))
    line = substr(line, 1, RSTART)

  # Remove unnecessary trailing " Ja" (for January).
  sub(/ Ja$/, "", line)

  n = split(line, field)

  # Abbreviate rule names.
  i = field[1] == "Z" ? 4 : field[1] == "Li" ? 0 : 2
  if (i && field[i] ~ /^[^-+0-9]/) {
    if (!rule[field[i]])
      rule[field[i]] = gen_rule_name()
    field[i] = rule[field[i]]
  }

  # If this zone supersedes an earlier one, delete the earlier one
  # from the saved output lines.
  startdef = ""
  if (field[1] == "Z")
    zonename = startdef = field[2]
  else if (field[1] == "Li")
    zonename = startdef = field[3]
  else if (field[1] == "R")
    zonename = ""
  if (startdef) {
    i = zonedef[startdef]
    if (i) {
      do
	output_line[i - 1] = ""
      while (output_line[i++] ~ /^[-+0-9]/);
    }
  }
  zonedef[zonename] = nout + 1

  # Save the line for later output.
  line = field[1]
  for (i = 2; i <= n; i++)
    line = line " " field[i]
  output_line[nout++] = line
}

function output_saved_lines(i)
{
  for (i = 0; i < nout; i++)
    if (output_line[i])
      print output_line[i]
}

BEGIN {
  print "# version", version
  print "# This zic input file is in the public domain."
}

/^[\t ]*[^#\t ]/ {
  process_input_line($0)
}

END {
  output_saved_lines()
}
OpenPOWER on IntegriCloud