From 632c3e2d6e2c42cca517565e05866364dffe8fe5 Mon Sep 17 00:00:00 2001 From: David Hull Date: Tue, 8 Apr 2014 09:52:12 -0700 Subject: [PATCH] Add tz-erl script to generate timezone database from Olson data. --- db/Makefile | 16 ++ db/tz-erl | 490 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 506 insertions(+) create mode 100644 db/Makefile create mode 100755 db/tz-erl diff --git a/db/Makefile b/db/Makefile new file mode 100644 index 0000000..0da5f4c --- /dev/null +++ b/db/Makefile @@ -0,0 +1,16 @@ +TZDIR=tzdata +TZ_FILES=$(addprefix $(TZDIR)/, africa antarctica asia australasia backward etcetera europe northamerica southamerica) + +tzout: $(TZ_FILES) + TZ_VERSION=`perl -n -e 'm/^VERSION\s*=\s*(\S+)/ and print $$1;' $(TZDIR)/Makefile`; \ + ./tz-erl --version $$TZ_VERSION -o $@ $^ + +$(TZ_FILES): $(TZDIR) + +$(TZDIR): tzdata-latest.tar.gz + mkdir $(TZDIR) && cd $(TZDIR) && tar xzf ../$< + +tzdata-latest.tar.gz: + curl -O "ftp://ftp.iana.org/tz/tzdata-latest.tar.gz" + + diff --git a/db/tz-erl b/db/tz-erl new file mode 100755 index 0000000..4936553 --- /dev/null +++ b/db/tz-erl @@ -0,0 +1,490 @@ +#! /usr/bin/env perl + +# This script processes time zone definitions from the Olson database +# and transforms them into the format used by the erlang_localtime +# library. +# +# Some helpful URLs: +# https://www.iana.org/time-zones +# https://github.com/dmitryme/erlang_localtime +# http://www.cstdbill.com/tzdb/tz-how-to.html + +# Known bugs (should fix): +# * Discarding past/future Rules considers only year. Better to +# use a window [now, one-year-from-now) so that our output rules +# are valid for at least a year. +# Known bugs (can't fix without upstream changes): +# * Africa/Casablanca: Has more than one DST transition per year. +# * Pacific/Fiji: DST does not start/end on Nth DayOfWeek in month. +# * America/Godthab: Transition time as UTC moves into previous day. + +use strict; +use warnings; + +use Getopt::Long; +use Time::Local qw( timelocal timelocal_nocheck ); +use Data::Dump; + +use constant DPW => 7; # Days per week. +use constant HPD => 24; # Hours per day. +use constant MPH => 60; # Minutes per hour. +use constant SPECIFICITY_MAX => 100; + +use constant { + RULE_DATE => 0, + RULE_SAVED => 1, + RULE_TIME => 2, + RULE_LETTERS => 3, + RULE_SPECIFICITY => 4, +}; +use constant RULE_NULL => [ undef, 0, '0W', '-', 0 ]; + +my $version = undef; +my $output_file; +my $date; + +GetOptions( + 'version=s' => \$version, + 'output=s' => \$output_file, + 'date=s' => \$date, +) or die; + +my @data = @ARGV; + +my ($current_day, $current_month, $current_year) = do { + if (defined $date) { + $date =~ m/^(\d\d\d\d)-?(\d\d)-?(\d\d)$/ or die "parse date \"$date\" as YYYY-MM-DD failed\n"; + ($3, $2 - 1, $1); + } else { + my @lt = localtime(time); + ($lt[3], $lt[4], $lt[5] + 1900); + } +}; + +my %rule; +my %rule_base; +my %output; +my @problem; + +# Add the null rule. +$rule_base{'-'} = [ 0, RULE_NULL ]; + +# Have to make three passes through the file. +# First pass: process Rule lines. +process_data(\@data, { Rule => \&rule_line }); +# Second pass: process Zone lines. +process_data(\@data, { Zone => \&zone_line }); +# Third pass, process Link lines. +process_data(\@data, { Link => \&link_line }); + +my $ofh; +if (defined $output_file) { + open($ofh, '>', $output_file) or die "open $output_file for write failed: $!\n"; +} else { + $output_file = 'STDOUT'; + open($ofh, '>&STDOUT') or die "dup $output_file failed: $!\n"; +} + +printf $ofh " %%%% Automatically generated from the time zone database%s for %04d-%02d-%02d.\n", + (defined $version ? " version $version" : ''), + $current_year, $current_month + 1, $current_day; + +print $ofh "\n"; +print $ofh " %% Problems:\n"; +print $ofh map(" %% $_\n", @problem); +print $ofh "\n"; + +print $ofh join(",\n", map { + (my $o = $output{$_}) =~ s/#ZONE#/$_/; + " $o"; +} (sort keys %output)), + "\n"; + +close($ofh) or die "close $output_file failed: $!\n"; + +exit(0); + +######################################################################## + +sub process_data { + my ($data, $handler) = @_; + + foreach my $f (@$data) { + my $fh = $f; + { + # We open the file this way so that die and warn print the filename. + no strict 'refs'; + open($fh, '<', $fh) or die "open $fh for read failed: $!"; + } + + # Read the Olson database. + my ($last_linetype, $last_lineprefix); + while (my $l = <$fh>) { + chomp($l); + $l =~ s/\s*#.*//; # Remove comments. + $l =~ s/\s+$//; # Remove trailing whitespace. + $l eq '' and next; + + my $linetype = do { + if ($l =~ m/^((\S+)\s+\S+)/) { + # Continuation line. + $last_linetype = $2; + $last_lineprefix = $1; + } else { + $l = $last_lineprefix . $l; + } + $last_linetype; + }; + + if (defined(my $h = $handler->{$linetype})) { + $h->($l); + } + } + + close($fh); + } +} + +sub offset_minutes { + my ($off, $adj) = @_; + + my $convert_offset = sub { + my $m = $_[0]; + if ($m =~ m/^([\+\-]?)(?:(\d+):)?(\d+)$/) { + $m = (defined $2 ? $2 : 0) * MPH + $3; + if ($1 eq '-') { $m = -$m; } + } + return $m; + }; + + my $offset = $convert_offset->($off); + if (defined $adj) { + $offset -= $convert_offset->($adj); + } + + return $offset; +} + + +sub rule_line { + my ($l) = @_; + my ($RULE, $name, $from, $to, $type, $in, $on, $at, $save, $letters) = split(m/\s+/, $l); + + # The rule lines in the time zone database describe transitions. + # + # Cases we have to handle: + # + # 1. There are no rules still active. We need to determine the last + # transition and apply it statically. (Example: Ghana.) + # + # 2. There are active rules. We should determine the current rules + # (ignore old rules and future rules) and transfer them to the + # output rules. (Example: Morocco.) + + my $save_minutes = offset_minutes($save); + + # The rules that this function generates has these parts. + # These are accessed using the RULE_* constants defined above. + # 1. A description of the date on which the transition happens, + # or undef if this is a base rule. + # 2. The number of minutes "saved" (the difference from the base + # offset for the zone). + # 3. The time in minutes after midnight at which the transition + # happens. + # 4. The letter for the new state (often 'S' for standard, or 'D' + # for daylight). + # 5. The number of years the rule covers (used to select which rules + # to eliminate when there are more than two rules in a year). + + # Update the base rule. + my $rule_last_active_epoch = last_active_epoch($from, $to, $in, $on); + if (defined $rule_last_active_epoch) { + if (! defined $rule_base{$name} || $rule_base{$name}->[0] < $rule_last_active_epoch) { + $rule_base{$name} = [ $rule_last_active_epoch, [ undef, $save_minutes, 0, $letters, SPECIFICITY_MAX ] ]; + } + } + + my $transform_rule = sub { + my $rule; + if ($on =~ m/^(\w+)>=(\d+)$/) { + # If start day is not a multiple of a week, round it to nearest week. + $rule = [ int(($2-1+int(DPW/2))/DPW) + 1, lc($1), lc($in) ]; # e.g. [ '2', sun', 'nov' ] + if (($2-1) % DPW) { + warn "on $on fuzz for $name -> $rule->[0]"; + push(@problem, "Rounded $in $on to [@$rule] in Rule $name."); + } + } elsif ($on =~ m/^last(\w+)$/) { + $rule = [ 'last', lc($1), lc($in) ]; # e.g. [ 'last', 'sun', 'apr' ] + } else { + warn "no match for $on in rule $l"; + push(@problem, "Ignored $in $on in Rule $name."); + } + my $at_minutes = $at; + if ($at =~ m/^([\+\-]?)(?:(\d+)\:)(\d+)([wsguz]?)/) { + $at_minutes = $2 * MPH + $3; my $z = $4; + if ($1 eq '-') { $at_minutes = -$at_minutes; } + if ($z eq '' or $z eq 'w') { + # Change specified at local (pre-change) wall-clock time. + $at_minutes .= 'W'; + } elsif ($z eq 's') { + # Change specified at local standard time. + $at_minutes .= 'S'; + } elsif ($z eq 'g' or $z eq 'u' or $z eq 'z') { + # Change specified at UTC. + $at_minutes .= 'Z'; + } + } + my $specificity = + $to eq 'only' ? 1 : + $to eq 'max' ? SPECIFICITY_MAX : + $to - $from + 1; + + return [ $rule, $save_minutes, $at_minutes, $letters, $specificity ]; + }; + + # We ignore any rule that has a definite to (end) time. + if (($to eq 'only' && $from == $current_year) || + ($to ne 'only' && ($to eq 'max' || $to >= $current_year) && $from <= $current_year)) { + push(@{$rule{$name}}, $transform_rule->()); + } +} + +# Process a Zone line and updates the %output hash. +sub zone_line { + my ($l) = @_; + my ($ZONE, $name, $gmtoff, $rules, $format, $until) = split(m/\s+/, $l, 6); + # We ignore any zone line that has a definite until (end) time that + # is in the past. + if (defined $until) { + my $until_year = ($until =~ m/^(\d+)/)[0]; + if ($until_year >= $current_year) { + die "until $until not handled"; + } + return; + } + + # Set $name1 and $rule1 for (start of) standard time. If zone has + # DST, set name2 and $rule2 for (start of ) DST, otherwise it is + # equal to standard time. + my ($name1, $name2, $rule1, $rule2); + my @rules = @{$rule{$rules} || []}; + + # If we have more than two rules, discard all except for the two + # most-specific rules. + if (scalar(@rules) > 2) { + print STDERR "discarding excess rules from $rules for $name\n", Data::Dump::dump(\@rules), "\n"; + push(@problem, "Discarded excess rules for Zone $name."); + @rules = sort { $b->[RULE_SPECIFICITY] <=> $a->[RULE_SPECIFICITY] } @rules; + $#rules = 1; + } + + if (scalar(@rules) == 0) { + # No active rules, use base rule. + my $rule0 = $rule_base{$rules}->[1]; + $name1 = zonename($format, $rule0->[RULE_LETTERS], undef); + $name2 = undef; + $rule1 = $rule2 = RULE_NULL; + } elsif (scalar(@rules) == 1) { + # One active rule. This is a year that DST started or stopped + # being observed + print STDERR Data::Dump::dump(\@rules), "\n"; + die "one rule for $name"; + $name1 = zonename($format, $rules[0]->[RULE_LETTERS], undef); + $name2 = undef; + $rule1 = $rule2 = RULE_NULL; + } else { # 2 rules + if ($rules[0]->[RULE_SAVED] != 0) { @rules = reverse(@rules); }; # Standard time first. + $rule1 = $rules[0]; + $rule2 = $rules[1]; + $name1 = zonename($format, $rule1->[RULE_LETTERS], 0); + $name2 = zonename($format, $rule2->[RULE_LETTERS], 1); + } + + my $gmtoff_min = offset_minutes($gmtoff); + + # https://github.com/dmitryme/erlang_localtime/blob/master/include/tz_database.hrl + # Documentation for output format. + # + # {TimeZoneName, {StdAbbr, StdName}, {DstAbbr, DstName}, StdMin, DstMin, DstStartDay, DstStartTime, DstEndDay, DstEndTime} + # TimeZoneName = String(), TimeZone name, MUST be unique. It is a key + # StdName = {String(), String()}, name and abbreviations of timezone before daylight shift + # DstName = {String(), String()}, name and abbreviations of timezone after daylight shift + # StdMin = Integer(), GMT offset in minutes. W/o daylight savings + # DstMin = Integer(), daylight saving. Adjustment for GMT offset, when daylight saving is on + # DstStartDay = {NthWeekday, Weekday, Month}, daylight saving transition rule. Can be undef if no daylight saving rule specified + # NthWeekday = Integer(), 1(first week),2(second week),3(...),4(...),5(...),-1(last week) + # Weekday = atom(), sun,mon,tue,wed,thu,fri,sat + # Month = atom(), jan,deb,mar,apr,may,jun,jul,aug,sep,oct,mov,dec + # DstStartTime = {Hour, Min} - time of daylight saving transition + # Hour = Integer(), [0..23] + # Min = Integer(), [0..59] + # DstEndDay = {NthWeekday, Weekday, Month} - transition back to std. Can be undef if no daylight saving rule specified + # {Hour, Min} - time of transition to std. + # + # Example output. + # {"America/Los Angeles",{"PST","Pacific Standard Time"},{"PDT","Pacific Daylight Time"},-480,60,{2,sun,mar},{2,0},{1,sun,nov},{2,0}}, + # {"America/Puerto Rico",{"AST","AST"},undef,-240,0,undef,{0,0},undef,{0,0}}, + + $output{$name} = build_term( + '#ZONE#', # TimeZoneName + [ $name1, $name1 ], # StdAbbr, StdName + defined $name2 ? [ $name2, $name2 ] : undef, # DstAbbr, DstName + $gmtoff_min, # StdMin + $rule2->[RULE_SAVED], # DstMin + $rule2->[RULE_DATE], # DstStartDay, + rulestart($rule2->[RULE_TIME], $gmtoff_min, 0, $name), # DstStartTime + $rule1->[RULE_DATE], # DstEndDay + rulestart($rule1->[RULE_TIME], $gmtoff_min, $rule2->[RULE_SAVED], $name), # DstEndTime + ); +} + +sub zonename { + my ($format, $letter, $dst) = @_; + if (defined($dst) && $format =~ m,/,) { + my @format = split(m,/,, $format); + return $format[$dst]; + } else { + return sprintf($format, $letter eq '-' ? '' : $letter); + } +} + +# erlang_localtime wants the transition time in the local wallclock +# time before the transition. +sub rulestart { + my ($start, $gmtoff, $save, $name) = @_; + $start =~ m/^(\d+)(\w)$/ or die "no match for $start"; + ($start, my $modified) = ($1, $2); + + if ($modified eq 'W') { + # No adjustment needed. + } elsif ($modified eq 'Z') { + $start += $gmtoff + $save; + + # If adding a (negative) GMT offset puts the time in the previous + # day, move it up to the start of the day. If adding a (positive) + # GMT offset puts the time in the next day, move it back to the + # end of the day. I think that this is wrong, and that actually + # the transition should be moved into the previous or next day, + # but it's not possible to consistently handle these rules with + # "third Sunday in May" logic. (For example, the day before the + # third Sunday in May may be either the second or third Saturday + # in May.) Having the transition off by a few hours every year is + # better than having it be off by a week every seven years or so. + if ($start < 0) { + warn "moving rule to beginning of day for $name"; + push(@problem, "Moving rule to beginning of day for Zone $name."); + $start = 0; + } elsif ($start > HPD * MPH) { + warn "moving rule to end of day for $name"; + push(@problem, "Moving rule to end of day for Zone $name."); + $start = HPD * MPH; + } + } elsif ($modified eq 'S') { + # To handle this properly, we would have to know the prevailing + # wall-clock offset from standard time. In the general case this + # could be different from $save, but in practice all of the + # transitions are from standard time to daylight time or vice + # versa and not between two different offsets from standard time, + # and so this works. + $start += $save; + } + + return [ int($start / MPH), $start % MPH ]; +} + +sub build_term { + my @term; + foreach my $e (@_) { + if (! defined $e) { + push(@term, 'undef'); + } elsif (ref($e) eq 'ARRAY') { + push(@term, build_term(@$e)); + } elsif ($e =~ m/^(\-?\d+|[a-z].*)$/) { + push(@term, $e); # number or term + } else { + push(@term, "\"$e\""); # string + } + } + + return '{' . join(',', @term) . '}'; +} + +sub link_line { + my ($l) = @_; + my ($LINK, $canon_name, $old_name) = split(m/\s+/, $l); + if (! defined $output{$canon_name}) { + print STDERR "no zone $canon_name for link $old_name\n"; + } else { + $output{$old_name} = $output{$canon_name}; + } +} + +my (@mon_to_name, %mon_from_name, @dow_to_name, %dow_from_name); +INIT { + @mon_to_name = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); + %mon_from_name = map { $mon_to_name[$_] => $_ } (0..$#mon_to_name); + @dow_to_name = qw(Sun Mon Tue Wed Thu Fri Sat); + %dow_from_name= map { $dow_to_name[$_] => $_ } (0..$#dow_to_name); +} + +sub on_to_day_of_month { + my ($on, $year, $month) = @_; + + my $day; + if ($on =~ m/^\d+$/) { + $day = $on; + } else { + my ($desired_dow, $time_base); + if ($on =~ m/^(\w+)>=(\d+)$/) { + $desired_dow = $dow_from_name{$1}; my $desired_day = $2; + $time_base = timelocal(0, 0, 0, $desired_day, $month, $year); + } elsif ($on =~ m/^last(\w+)$/) { + $desired_dow = $dow_from_name{$1}; + # One week before the beginning of the next month. + $time_base = timelocal_nocheck(0, 0, 0, - DPW, $month + 1, $year); + } else { + die "match $on failed"; + } + ($day, my $dow) = (localtime($time_base))[3,6]; + if ($dow != $desired_dow) { $day += (DPW + $desired_dow - $dow) % DPW; } + } + + return $day; +} + +# Returns the epoch that the rule was last active, or undef if the +# rule has never been active (i.e., it begins in the future). +sub last_active_epoch { + my ($from, $to, $in, $on) = @_; + # $from is a year. + # $to is a year, or 'only', or 'max'. + # $in is a month name (e.g., 'Jan'). + # $on is a day-of-month, or a day-of-week>=day-of-month, or 'last'day-of-week. + + my $month = $mon_from_name{$in}; + + # First check the rule's from time; if in the future return undef. + if ($from > $current_year) { return undef; } + if ($from == $current_year && $month > $current_month) { return undef; } + if ($from == $current_year && $month == $current_month) { + my $day = on_to_day_of_month($on, $current_year, $current_month); + if ($day > $current_day) { return undef; } + } + + # Now check the rule's to time. If the rule covers the current + # year, but doesn't fire until later in the year, subtract a year. + my $year = $to eq 'max' ? $current_year : $to eq 'only' ? $from : $to; + event_fires_later_this_year: { + if ($year < $current_year) { last; } + if ($year == $current_year && $month < $current_month) { last; } + my $day = on_to_day_of_month($on, $year, $month); + if ($year == $current_year && $month == $current_month && $day < $current_day) { last; } + # If we get here, this year's rule instance is not active until + # later in the year, so subtract a year. + $year -= 1; + } + + my $day = on_to_day_of_month($on, $year, $month); + return timelocal(0, 0, 0, $day, $month, $year); +} +