#! /usr/bin/env perl # This script processes time zone definitions from the Olson database # and transforms them into the format used by the erlang_localtime # library. # # Some helpful URLs: # https://www.iana.org/time-zones # https://github.com/dmitryme/erlang_localtime # http://www.cstdbill.com/tzdb/tz-how-to.html # Known bugs (should fix): # * Discarding past/future Rules considers only year. Better to # use a window [now, one-year-from-now) so that our output rules # are valid for at least a year. # Known bugs (can't fix without upstream changes): # * Africa/Casablanca: Has more than one DST transition per year. # * Pacific/Fiji: DST does not start/end on Nth DayOfWeek in month. # * America/Godthab: Transition time as UTC moves into previous day. use strict; use warnings; use Getopt::Long; use Time::Local qw( timelocal timelocal_nocheck ); use Data::Dump; use constant DPW => 7; # Days per week. use constant HPD => 24; # Hours per day. use constant MPH => 60; # Minutes per hour. use constant SPECIFICITY_MAX => 100; use constant { RULE_DATE => 0, RULE_SAVED => 1, RULE_TIME => 2, RULE_LETTERS => 3, RULE_SPECIFICITY => 4, }; use constant RULE_NULL => [ undef, 0, '0W', '-', 0 ]; my $version = undef; my $output_file; my $date; GetOptions( 'version=s' => \$version, 'output=s' => \$output_file, 'date=s' => \$date, ) or die; my @data = @ARGV; my ($current_day, $current_month, $current_year) = do { if (defined $date) { $date =~ m/^(\d\d\d\d)-?(\d\d)-?(\d\d)$/ or die "parse date \"$date\" as YYYY-MM-DD failed\n"; ($3, $2 - 1, $1); } else { my @lt = localtime(time); ($lt[3], $lt[4], $lt[5] + 1900); } }; my %rule; my %rule_base; my %output; my @problem; # Add the null rule. $rule_base{'-'} = [ 0, RULE_NULL ]; # Have to make three passes through the file. # First pass: process Rule lines. process_data(\@data, { Rule => \&rule_line }); # Second pass: process Zone lines. process_data(\@data, { Zone => \&zone_line }); # Third pass, process Link lines. process_data(\@data, { Link => \&link_line }); my $ofh; if (defined $output_file) { open($ofh, '>', $output_file) or die "open $output_file for write failed: $!\n"; } else { $output_file = 'STDOUT'; open($ofh, '>&STDOUT') or die "dup $output_file failed: $!\n"; } printf $ofh " %%%% Automatically generated from the time zone database%s for %04d-%02d-%02d.\n", (defined $version ? " version $version" : ''), $current_year, $current_month + 1, $current_day; print $ofh "\n"; print $ofh " %% Problems:\n"; print $ofh map(" %% $_\n", @problem); print $ofh "\n"; print $ofh join(",\n", map { (my $o = $output{$_}) =~ s/#ZONE#/$_/; " $o"; } (sort keys %output)), "\n"; close($ofh) or die "close $output_file failed: $!\n"; exit(0); ######################################################################## sub process_data { my ($data, $handler) = @_; foreach my $f (@$data) { my $fh = $f; { # We open the file this way so that die and warn print the filename. no strict 'refs'; open($fh, '<', $fh) or die "open $fh for read failed: $!"; } # Read the Olson database. my ($last_linetype, $last_lineprefix); while (my $l = <$fh>) { chomp($l); $l =~ s/\s*#.*//; # Remove comments. $l =~ s/\s+$//; # Remove trailing whitespace. $l eq '' and next; my $linetype = do { if ($l =~ m/^((\S+)\s+\S+)/) { # Continuation line. $last_linetype = $2; $last_lineprefix = $1; } else { $l = $last_lineprefix . $l; } $last_linetype; }; if (defined(my $h = $handler->{$linetype})) { $h->($l); } } close($fh); } } # Converts an offset in the format "[+-]?HH:MM" or "[+-]?HH" into minutes. # For example, "2:00" -> 120, "-0:30" -> -30, "+5" -> 300. sub offset_minutes { my ($off, $adj) = @_; my $convert_offset = sub { $_[0] =~ m/^([\+\-]?)(\d+)(?::(\d+))?$/ or die "offset \"$_[0]\" did not match"; my $m = $2 * MPH; if (defined $3) { $m += $3; } if ($1 eq '-') { $m = -$m; } return $m; }; my $offset = $convert_offset->($off); if (defined $adj) { $offset -= $convert_offset->($adj); } return $offset; } sub rule_line { my ($l) = @_; my ($RULE, $name, $from, $to, $type, $in, $on, $at, $save, $letters) = split(m/\s+/, $l); # The rule lines in the time zone database describe transitions. # # Cases we have to handle: # # 1. There are no rules still active. We need to determine the last # transition and apply it statically. (Example: Ghana.) # # 2. There are active rules. We should determine the current rules # (ignore old rules and future rules) and transfer them to the # output rules. (Example: Morocco.) my $save_minutes = offset_minutes($save); # The rules that this function generates has these parts. # These are accessed using the RULE_* constants defined above. # 1. A description of the date on which the transition happens, # or undef if this is a base rule. # 2. The number of minutes "saved" (the difference from the base # offset for the zone). # 3. The time in minutes after midnight at which the transition # happens. # 4. The letter for the new state (often 'S' for standard, or 'D' # for daylight). # 5. The number of years the rule covers (used to select which rules # to eliminate when there are more than two rules in a year). # Update the base rule. my $rule_last_active_epoch = last_active_epoch($from, $to, $in, $on); if (defined $rule_last_active_epoch) { if (! defined $rule_base{$name} || $rule_base{$name}->[0] < $rule_last_active_epoch) { $rule_base{$name} = [ $rule_last_active_epoch, [ undef, $save_minutes, 0, $letters, SPECIFICITY_MAX ] ]; } } my $transform_rule = sub { my $rule; if ($on =~ m/^(\w+)>=(\d+)$/) { # If start day is not a multiple of a week, round it to nearest week. $rule = [ int(($2-1+int(DPW/2))/DPW) + 1, lc($1), lc($in) ]; # e.g. [ '2', sun', 'nov' ] if (($2-1) % DPW) { warn "on $on fuzz for $name -> $rule->[0]"; push(@problem, "Rounded $in $on to [@$rule] in Rule $name."); } } elsif ($on =~ m/^last(\w+)$/) { $rule = [ 'last', lc($1), lc($in) ]; # e.g. [ 'last', 'sun', 'apr' ] } else { warn "no match for $on in rule $l"; push(@problem, "Ignored $in $on in Rule $name."); } my $at_minutes = $at; if ($at =~ m/^([\+\-]?)(?:(\d+)\:)(\d+)([wsguz]?)/) { $at_minutes = $2 * MPH + $3; my $z = $4; if ($1 eq '-') { $at_minutes = -$at_minutes; } if ($z eq '' or $z eq 'w') { # Change specified at local (pre-change) wall-clock time. $at_minutes .= 'W'; } elsif ($z eq 's') { # Change specified at local standard time. $at_minutes .= 'S'; } elsif ($z eq 'g' or $z eq 'u' or $z eq 'z') { # Change specified at UTC. $at_minutes .= 'Z'; } } my $specificity = $to eq 'only' ? 1 : $to eq 'max' ? SPECIFICITY_MAX : $to - $from + 1; return [ $rule, $save_minutes, $at_minutes, $letters, $specificity ]; }; # We ignore any rule that has a definite to (end) time. if (($to eq 'only' && $from == $current_year) || ($to ne 'only' && ($to eq 'max' || $to >= $current_year) && $from <= $current_year)) { push(@{$rule{$name}}, $transform_rule->()); } } # Process a Zone line and updates the %output hash. sub zone_line { my ($l) = @_; my ($ZONE, $name, $gmtoff, $rules, $format, $until) = split(m/\s+/, $l, 6); # We ignore any zone line that has a definite until (end) time that # is in the past. if (defined $until) { my ($until_year, $until_month, $until_day) = split_ymd($until); if (($until_year > $current_year) || (($until_year == $current_year) && (($until_month > $current_month) || (($until_month == $current_month) && ($until_day >= $current_day))))) { "future until \"$until\" not handled"; } return; } # Set $name1 and $rule1 for (start of) standard time. If zone has # DST, set name2 and $rule2 for (start of ) DST, otherwise it is # equal to standard time. my ($name1, $name2, $rule1, $rule2); my @rules = @{$rule{$rules} || []}; # If we have more than two rules, discard all except for the two # most-specific rules. if (scalar(@rules) > 2) { print STDERR "discarding excess rules from $rules for $name\n", Data::Dump::dump(\@rules), "\n"; push(@problem, "Discarded excess rules for Zone $name."); @rules = sort { $b->[RULE_SPECIFICITY] <=> $a->[RULE_SPECIFICITY] } @rules; $#rules = 1; } if (scalar(@rules) == 0) { # No active rules, use base rule. my $rule0 = $rule_base{$rules}->[1]; $name1 = zonename($format, $rule0->[RULE_LETTERS], undef); $name2 = undef; $rule1 = $rule2 = RULE_NULL; } elsif (scalar(@rules) == 1) { # One active rule. This is a year that DST started or stopped # being observed. erlang_localtime doesn't handle this. If DST # stopped being observed in this year, don't output a DST rule. # If DST started being observed, do. (Except that we don't handle # this yet.) print STDERR Data::Dump::dump(\@rules), "\n"; die "one rule for $name"; $name1 = zonename($format, $rules[0]->[RULE_LETTERS], undef); $name2 = undef; $rule1 = $rule2 = RULE_NULL; } else { # 2 rules if ($rules[0]->[RULE_SAVED] != 0) { @rules = reverse(@rules); }; # Standard time first. $rule1 = $rules[0]; $rule2 = $rules[1]; $name1 = zonename($format, $rule1->[RULE_LETTERS], 0); $name2 = zonename($format, $rule2->[RULE_LETTERS], 1); } my $gmtoff_min = offset_minutes($gmtoff); # https://github.com/dmitryme/erlang_localtime/blob/master/include/tz_database.hrl # Documentation for output format. # # {TimeZoneName, {StdAbbr, StdName}, {DstAbbr, DstName}, StdMin, DstMin, DstStartDay, DstStartTime, DstEndDay, DstEndTime} # TimeZoneName = String(), TimeZone name, MUST be unique. It is a key # StdName = {String(), String()}, name and abbreviations of timezone before daylight shift # DstName = {String(), String()}, name and abbreviations of timezone after daylight shift # StdMin = Integer(), GMT offset in minutes. W/o daylight savings # DstMin = Integer(), daylight saving. Adjustment for GMT offset, when daylight saving is on # DstStartDay = {NthWeekday, Weekday, Month}, daylight saving transition rule. Can be undef if no daylight saving rule specified # NthWeekday = Integer(), 1(first week),2(second week),3(...),4(...),5(...),-1(last week) # Weekday = atom(), sun,mon,tue,wed,thu,fri,sat # Month = atom(), jan,deb,mar,apr,may,jun,jul,aug,sep,oct,mov,dec # DstStartTime = {Hour, Min} - time of daylight saving transition # Hour = Integer(), [0..23] # Min = Integer(), [0..59] # DstEndDay = {NthWeekday, Weekday, Month} - transition back to std. Can be undef if no daylight saving rule specified # {Hour, Min} - time of transition to std. # # Example output. # {"America/Los Angeles",{"PST","Pacific Standard Time"},{"PDT","Pacific Daylight Time"},-480,60,{2,sun,mar},{2,0},{1,sun,nov},{2,0}}, # {"America/Puerto Rico",{"AST","AST"},undef,-240,0,undef,{0,0},undef,{0,0}}, $output{$name} = build_term( '#ZONE#', # TimeZoneName [ $name1, $name1 ], # StdAbbr, StdName defined $name2 ? [ $name2, $name2 ] : undef, # DstAbbr, DstName $gmtoff_min, # StdMin $rule2->[RULE_SAVED], # DstMin $rule2->[RULE_DATE], # DstStartDay, rulestart($rule2->[RULE_TIME], $gmtoff_min, 0, $name), # DstStartTime $rule1->[RULE_DATE], # DstEndDay rulestart($rule1->[RULE_TIME], $gmtoff_min, $rule2->[RULE_SAVED], $name), # DstEndTime ); } sub zonename { my ($format, $letter, $dst) = @_; if (defined($dst) && $format =~ m,/,) { my @format = split(m,/,, $format); return $format[$dst]; } else { return sprintf($format, $letter eq '-' ? '' : $letter); } } # erlang_localtime wants the transition time in the local wallclock # time before the transition. sub rulestart { my ($start, $gmtoff, $save, $name) = @_; $start =~ m/^(\d+)(\w)$/ or die "no match for $start"; ($start, my $modified) = ($1, $2); if ($modified eq 'W') { # No adjustment needed. } elsif ($modified eq 'Z') { $start += $gmtoff + $save; # If adding a (negative) GMT offset puts the time in the previous # day, move it up to the start of the day. If adding a (positive) # GMT offset puts the time in the next day, move it back to the # end of the day. I think that this is wrong, and that actually # the transition should be moved into the previous or next day, # but it's not possible to consistently handle these rules with # "third Sunday in May" logic. (For example, the day before the # third Sunday in May may be either the second or third Saturday # in May.) Having the transition off by a few hours every year is # better than having it be off by a week every seven years or so. if ($start < 0) { warn "moving rule to beginning of day for $name"; push(@problem, "Moving rule to beginning of day for Zone $name."); $start = 0; } elsif ($start > HPD * MPH) { warn "moving rule to end of day for $name"; push(@problem, "Moving rule to end of day for Zone $name."); $start = HPD * MPH; } } elsif ($modified eq 'S') { # To handle this properly, we would have to know the prevailing # wall-clock offset from standard time. In the general case this # could be different from $save, but in practice all of the # transitions are from standard time to daylight time or vice # versa and not between two different offsets from standard time, # and so this works. $start += $save; } return [ int($start / MPH), $start % MPH ]; } sub build_term { my @term; foreach my $e (@_) { if (! defined $e) { push(@term, 'undef'); } elsif (ref($e) eq 'ARRAY') { push(@term, build_term(@$e)); } elsif ($e =~ m/^(\-?\d+|[a-z].*)$/) { push(@term, $e); # number or term } else { push(@term, "\"$e\""); # string } } return '{' . join(',', @term) . '}'; } sub link_line { my ($l) = @_; my ($LINK, $canon_name, $old_name) = split(m/\s+/, $l); if (! defined $output{$canon_name}) { print STDERR "no zone $canon_name for link $old_name\n"; } else { $output{$old_name} = $output{$canon_name}; } } my (@mon_to_name, %mon_from_name, @dow_to_name, %dow_from_name); INIT { @mon_to_name = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); %mon_from_name = map { $mon_to_name[$_] => $_ } (0..$#mon_to_name); @dow_to_name = qw(Sun Mon Tue Wed Thu Fri Sat); %dow_from_name= map { $dow_to_name[$_] => $_ } (0..$#dow_to_name); } sub split_ymd { my ($ymd) = @_; $ymd =~ m/^(\d+)(?:\s+(\w+)(?:\s+(\d+)))?/ or die "parse \"$ymd\" for ymd failed"; my $year = $1; my $month = do { if (defined $2) { defined $mon_from_name{$2} or die "parse \"$ymd\" for month failed"; $mon_from_name{$2}; } else { 0 } }; my $day = defined $3 ? $3 : 0; return ($year, $month, $day); } sub on_to_day_of_month { my ($on, $year, $month) = @_; my $day; if ($on =~ m/^\d+$/) { $day = $on; print "Day extracted via regex to: $day\n"; } else { my ($desired_dow, $time_base); if ($on =~ m/^(\w+)>=(\d+)$/) { $desired_dow = $dow_from_name{$1}; my $desired_day = $2; $time_base = timelocal(0, 0, 0, $desired_day, $month, $year); print "Regex: $on => desired_dow: $desired_dow. Time_base: $time_base\n"; } elsif ($on =~ m/^(\w+)<=(\d+)$/) { $desired_dow = $dow_from_name{$1}; my $desired_day = $2; $time_base = timelocal(0, 0, 0, $desired_day, $month, $year); } elsif ($on =~ m/^last(\w+)$/) { $desired_dow = $dow_from_name{$1}; # One week before the beginning of the next month. $time_base = timelocal_nocheck(0, 0, 0, - DPW, $month + 1, $year); } else { die "match $on failed"; } ($day, my $dow) = (localtime($time_base))[3,6]; print "Current Day: $day\n"; print "Comparing $dow = $desired_dow\n"; if ($dow != $desired_dow) { $day += (DPW + $desired_dow - $dow) % DPW; } print "After comparison: Current Day: $day\n"; } return $day; } # Returns the epoch that the rule was last active, or undef if the # rule has never been active (i.e., it begins in the future). sub last_active_epoch { my ($from, $to, $in, $on) = @_; # $from is a year. # $to is a year, or 'only', or 'max'. # $in is a month name (e.g., 'Jan'). # $on is a day-of-month, or a day-of-week>=day-of-month, or 'last'day-of-week. my $month = $mon_from_name{$in}; # First check the rule's from time; if in the future return undef. if ($from > $current_year) { return undef; } if ($from == $current_year && $month > $current_month) { return undef; } if ($from == $current_year && $month == $current_month) { my $day = on_to_day_of_month($on, $current_year, $current_month); if ($day > $current_day) { return undef; } } # Now check the rule's to time. If the rule covers the current # year, but doesn't fire until later in the year, subtract a year. my $year = $to eq 'max' ? $current_year : $to eq 'only' ? $from : $to; event_fires_later_this_year: { if ($year < $current_year) { last; } if ($year == $current_year && $month < $current_month) { last; } my $day = on_to_day_of_month($on, $year, $month); if ($year == $current_year && $month == $current_month && $day < $current_day) { last; } # If we get here, this year's rule instance is not active until # later in the year, so subtract a year. $year -= 1; } my $day = on_to_day_of_month($on, $year, $month); print "On=$on, Year=$year, Month=$month ====> Day=$day\n"; ## changing the below call to timelocal_nocheck stops the crash, but it feels wrong ## especially since without it, it seems to produce a lot of "redundant" error messages ## I don't have time to explore this now, but I'll come back to it return timelocal(0, 0, 0, $day, $month, $year); }