527 lines
18 KiB
Perl
Executable file
527 lines
18 KiB
Perl
Executable file
#! /usr/bin/env perl
|
|
|
|
# This script processes time zone definitions from the Olson database
|
|
# and transforms them into the format used by the erlang_localtime
|
|
# library.
|
|
#
|
|
# Some helpful URLs:
|
|
# https://www.iana.org/time-zones
|
|
# https://github.com/dmitryme/erlang_localtime
|
|
# http://www.cstdbill.com/tzdb/tz-how-to.html
|
|
|
|
# Known bugs (should fix):
|
|
# * Discarding past/future Rules considers only year. Better to
|
|
# use a window [now, one-year-from-now) so that our output rules
|
|
# are valid for at least a year.
|
|
# Known bugs (can't fix without upstream changes):
|
|
# * Africa/Casablanca: Has more than one DST transition per year.
|
|
# * Pacific/Fiji: DST does not start/end on Nth DayOfWeek in month.
|
|
# * America/Godthab: Transition time as UTC moves into previous day.
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
use Getopt::Long;
|
|
use Time::Local qw( timelocal timelocal_nocheck );
|
|
use Data::Dump;
|
|
|
|
use constant DPW => 7; # Days per week.
|
|
use constant HPD => 24; # Hours per day.
|
|
use constant MPH => 60; # Minutes per hour.
|
|
use constant SPECIFICITY_MAX => 100;
|
|
|
|
use constant {
|
|
RULE_DATE => 0,
|
|
RULE_SAVED => 1,
|
|
RULE_TIME => 2,
|
|
RULE_LETTERS => 3,
|
|
RULE_SPECIFICITY => 4,
|
|
};
|
|
use constant RULE_NULL => [ undef, 0, '0W', '-', 0 ];
|
|
|
|
my $version = undef;
|
|
my $output_file;
|
|
my $date;
|
|
|
|
GetOptions(
|
|
'version=s' => \$version,
|
|
'output=s' => \$output_file,
|
|
'date=s' => \$date,
|
|
) or die;
|
|
|
|
my @data = @ARGV;
|
|
|
|
my ($current_day, $current_month, $current_year) = do {
|
|
if (defined $date) {
|
|
$date =~ m/^(\d\d\d\d)-?(\d\d)-?(\d\d)$/ or die "parse date \"$date\" as YYYY-MM-DD failed\n";
|
|
($3, $2 - 1, $1);
|
|
} else {
|
|
my @lt = localtime(time);
|
|
($lt[3], $lt[4], $lt[5] + 1900);
|
|
}
|
|
};
|
|
|
|
my %rule;
|
|
my %rule_base;
|
|
my %output;
|
|
my @problem;
|
|
|
|
# Add the null rule.
|
|
$rule_base{'-'} = [ 0, RULE_NULL ];
|
|
|
|
# Have to make three passes through the file.
|
|
# First pass: process Rule lines.
|
|
process_data(\@data, { Rule => \&rule_line });
|
|
# Second pass: process Zone lines.
|
|
process_data(\@data, { Zone => \&zone_line });
|
|
# Third pass, process Link lines.
|
|
process_data(\@data, { Link => \&link_line });
|
|
|
|
my $ofh;
|
|
if (defined $output_file) {
|
|
open($ofh, '>', $output_file) or die "open $output_file for write failed: $!\n";
|
|
} else {
|
|
$output_file = 'STDOUT';
|
|
open($ofh, '>&STDOUT') or die "dup $output_file failed: $!\n";
|
|
}
|
|
|
|
printf $ofh " %%%% Automatically generated from the time zone database%s for %04d-%02d-%02d.\n",
|
|
(defined $version ? " version $version" : ''),
|
|
$current_year, $current_month + 1, $current_day;
|
|
|
|
print $ofh "\n";
|
|
print $ofh " %% Problems:\n";
|
|
print $ofh map(" %% $_\n", @problem);
|
|
print $ofh "\n";
|
|
|
|
print $ofh join(",\n", map {
|
|
(my $o = $output{$_}) =~ s/#ZONE#/$_/;
|
|
" $o";
|
|
} (sort keys %output)),
|
|
"\n";
|
|
|
|
close($ofh) or die "close $output_file failed: $!\n";
|
|
|
|
exit(0);
|
|
|
|
########################################################################
|
|
|
|
sub process_data {
|
|
my ($data, $handler) = @_;
|
|
|
|
foreach my $f (@$data) {
|
|
my $fh = $f;
|
|
{
|
|
# We open the file this way so that die and warn print the filename.
|
|
no strict 'refs';
|
|
open($fh, '<', $fh) or die "open $fh for read failed: $!";
|
|
}
|
|
|
|
# Read the Olson database.
|
|
my ($last_linetype, $last_lineprefix);
|
|
while (my $l = <$fh>) {
|
|
chomp($l);
|
|
$l =~ s/\s*#.*//; # Remove comments.
|
|
$l =~ s/\s+$//; # Remove trailing whitespace.
|
|
$l eq '' and next;
|
|
|
|
my $linetype = do {
|
|
if ($l =~ m/^((\S+)\s+\S+)/) {
|
|
# Continuation line.
|
|
$last_linetype = $2;
|
|
$last_lineprefix = $1;
|
|
} else {
|
|
$l = $last_lineprefix . $l;
|
|
}
|
|
$last_linetype;
|
|
};
|
|
|
|
if (defined(my $h = $handler->{$linetype})) {
|
|
$h->($l);
|
|
}
|
|
}
|
|
|
|
close($fh);
|
|
}
|
|
}
|
|
|
|
# Converts an offset in the format "[+-]?HH:MM" or "[+-]?HH" into minutes.
|
|
# For example, "2:00" -> 120, "-0:30" -> -30, "+5" -> 300.
|
|
sub offset_minutes {
|
|
my ($off, $adj) = @_;
|
|
|
|
my $convert_offset = sub {
|
|
$_[0] =~ m/^([\+\-]?)(\d+)(?::(\d+))?$/
|
|
or die "offset \"$_[0]\" did not match";
|
|
my $m = $2 * MPH;
|
|
if (defined $3) { $m += $3; }
|
|
if ($1 eq '-') { $m = -$m; }
|
|
return $m;
|
|
};
|
|
|
|
my $offset = $convert_offset->($off);
|
|
if (defined $adj) {
|
|
$offset -= $convert_offset->($adj);
|
|
}
|
|
|
|
return $offset;
|
|
}
|
|
|
|
|
|
sub rule_line {
|
|
my ($l) = @_;
|
|
my ($RULE, $name, $from, $to, $type, $in, $on, $at, $save, $letters) = split(m/\s+/, $l);
|
|
|
|
# The rule lines in the time zone database describe transitions.
|
|
#
|
|
# Cases we have to handle:
|
|
#
|
|
# 1. There are no rules still active. We need to determine the last
|
|
# transition and apply it statically. (Example: Ghana.)
|
|
#
|
|
# 2. There are active rules. We should determine the current rules
|
|
# (ignore old rules and future rules) and transfer them to the
|
|
# output rules. (Example: Morocco.)
|
|
|
|
my $save_minutes = offset_minutes($save);
|
|
|
|
# The rules that this function generates has these parts.
|
|
# These are accessed using the RULE_* constants defined above.
|
|
# 1. A description of the date on which the transition happens,
|
|
# or undef if this is a base rule.
|
|
# 2. The number of minutes "saved" (the difference from the base
|
|
# offset for the zone).
|
|
# 3. The time in minutes after midnight at which the transition
|
|
# happens.
|
|
# 4. The letter for the new state (often 'S' for standard, or 'D'
|
|
# for daylight).
|
|
# 5. The number of years the rule covers (used to select which rules
|
|
# to eliminate when there are more than two rules in a year).
|
|
|
|
# Update the base rule.
|
|
my $rule_last_active_epoch = last_active_epoch($from, $to, $in, $on);
|
|
if (defined $rule_last_active_epoch) {
|
|
if (! defined $rule_base{$name} || $rule_base{$name}->[0] < $rule_last_active_epoch) {
|
|
$rule_base{$name} = [ $rule_last_active_epoch, [ undef, $save_minutes, 0, $letters, SPECIFICITY_MAX ] ];
|
|
}
|
|
}
|
|
|
|
my $transform_rule = sub {
|
|
my $rule;
|
|
if ($on =~ m/^(\w+)>=(\d+)$/) {
|
|
# If start day is not a multiple of a week, round it to nearest week.
|
|
$rule = [ int(($2-1+int(DPW/2))/DPW) + 1, lc($1), lc($in) ]; # e.g. [ '2', sun', 'nov' ]
|
|
if (($2-1) % DPW) {
|
|
warn "on $on fuzz for $name -> $rule->[0]";
|
|
push(@problem, "Rounded $in $on to [@$rule] in Rule $name.");
|
|
}
|
|
} elsif ($on =~ m/^last(\w+)$/) {
|
|
$rule = [ 'last', lc($1), lc($in) ]; # e.g. [ 'last', 'sun', 'apr' ]
|
|
} else {
|
|
warn "no match for $on in rule $l";
|
|
push(@problem, "Ignored $in $on in Rule $name.");
|
|
}
|
|
my $at_minutes = $at;
|
|
if ($at =~ m/^([\+\-]?)(?:(\d+)\:)(\d+)([wsguz]?)/) {
|
|
$at_minutes = $2 * MPH + $3; my $z = $4;
|
|
if ($1 eq '-') { $at_minutes = -$at_minutes; }
|
|
if ($z eq '' or $z eq 'w') {
|
|
# Change specified at local (pre-change) wall-clock time.
|
|
$at_minutes .= 'W';
|
|
} elsif ($z eq 's') {
|
|
# Change specified at local standard time.
|
|
$at_minutes .= 'S';
|
|
} elsif ($z eq 'g' or $z eq 'u' or $z eq 'z') {
|
|
# Change specified at UTC.
|
|
$at_minutes .= 'Z';
|
|
}
|
|
}
|
|
my $specificity =
|
|
$to eq 'only' ? 1 :
|
|
$to eq 'max' ? SPECIFICITY_MAX :
|
|
$to - $from + 1;
|
|
|
|
return [ $rule, $save_minutes, $at_minutes, $letters, $specificity ];
|
|
};
|
|
|
|
# We ignore any rule that has a definite to (end) time.
|
|
if (($to eq 'only' && $from == $current_year) ||
|
|
($to ne 'only' && ($to eq 'max' || $to >= $current_year) && $from <= $current_year)) {
|
|
push(@{$rule{$name}}, $transform_rule->());
|
|
}
|
|
}
|
|
|
|
# Process a Zone line and updates the %output hash.
|
|
sub zone_line {
|
|
my ($l) = @_;
|
|
my ($ZONE, $name, $gmtoff, $rules, $format, $until) = split(m/\s+/, $l, 6);
|
|
# We ignore any zone line that has a definite until (end) time that
|
|
# is in the past.
|
|
if (defined $until) {
|
|
my ($until_year, $until_month, $until_day) = split_ymd($until);
|
|
if (($until_year > $current_year) ||
|
|
(($until_year == $current_year) &&
|
|
(($until_month > $current_month) ||
|
|
(($until_month == $current_month) &&
|
|
($until_day >= $current_day))))) {
|
|
"future until \"$until\" not handled";
|
|
}
|
|
return;
|
|
}
|
|
|
|
# Set $name1 and $rule1 for (start of) standard time. If zone has
|
|
# DST, set name2 and $rule2 for (start of ) DST, otherwise it is
|
|
# equal to standard time.
|
|
my ($name1, $name2, $rule1, $rule2);
|
|
my @rules = @{$rule{$rules} || []};
|
|
|
|
# If we have more than two rules, discard all except for the two
|
|
# most-specific rules.
|
|
if (scalar(@rules) > 2) {
|
|
print STDERR "discarding excess rules from $rules for $name\n", Data::Dump::dump(\@rules), "\n";
|
|
push(@problem, "Discarded excess rules for Zone $name.");
|
|
@rules = sort { $b->[RULE_SPECIFICITY] <=> $a->[RULE_SPECIFICITY] } @rules;
|
|
$#rules = 1;
|
|
}
|
|
|
|
if (scalar(@rules) == 0) {
|
|
# No active rules, use base rule.
|
|
my $rule0 = $rule_base{$rules}->[1];
|
|
$name1 = zonename($format, $rule0->[RULE_LETTERS], undef);
|
|
$name2 = undef;
|
|
$rule1 = $rule2 = RULE_NULL;
|
|
} elsif (scalar(@rules) == 1) {
|
|
# One active rule. This is a year that DST started or stopped
|
|
# being observed. erlang_localtime doesn't handle this. If DST
|
|
# stopped being observed in this year, don't output a DST rule.
|
|
# If DST started being observed, do. (Except that we don't handle
|
|
# this yet.)
|
|
print STDERR Data::Dump::dump(\@rules), "\n";
|
|
die "one rule for $name";
|
|
$name1 = zonename($format, $rules[0]->[RULE_LETTERS], undef);
|
|
$name2 = undef;
|
|
$rule1 = $rule2 = RULE_NULL;
|
|
} else { # 2 rules
|
|
if ($rules[0]->[RULE_SAVED] != 0) { @rules = reverse(@rules); }; # Standard time first.
|
|
$rule1 = $rules[0];
|
|
$rule2 = $rules[1];
|
|
$name1 = zonename($format, $rule1->[RULE_LETTERS], 0);
|
|
$name2 = zonename($format, $rule2->[RULE_LETTERS], 1);
|
|
}
|
|
|
|
my $gmtoff_min = offset_minutes($gmtoff);
|
|
|
|
# https://github.com/dmitryme/erlang_localtime/blob/master/include/tz_database.hrl
|
|
# Documentation for output format.
|
|
#
|
|
# {TimeZoneName, {StdAbbr, StdName}, {DstAbbr, DstName}, StdMin, DstMin, DstStartDay, DstStartTime, DstEndDay, DstEndTime}
|
|
# TimeZoneName = String(), TimeZone name, MUST be unique. It is a key
|
|
# StdName = {String(), String()}, name and abbreviations of timezone before daylight shift
|
|
# DstName = {String(), String()}, name and abbreviations of timezone after daylight shift
|
|
# StdMin = Integer(), GMT offset in minutes. W/o daylight savings
|
|
# DstMin = Integer(), daylight saving. Adjustment for GMT offset, when daylight saving is on
|
|
# DstStartDay = {NthWeekday, Weekday, Month}, daylight saving transition rule. Can be undef if no daylight saving rule specified
|
|
# NthWeekday = Integer(), 1(first week),2(second week),3(...),4(...),5(...),-1(last week)
|
|
# Weekday = atom(), sun,mon,tue,wed,thu,fri,sat
|
|
# Month = atom(), jan,deb,mar,apr,may,jun,jul,aug,sep,oct,mov,dec
|
|
# DstStartTime = {Hour, Min} - time of daylight saving transition
|
|
# Hour = Integer(), [0..23]
|
|
# Min = Integer(), [0..59]
|
|
# DstEndDay = {NthWeekday, Weekday, Month} - transition back to std. Can be undef if no daylight saving rule specified
|
|
# {Hour, Min} - time of transition to std.
|
|
#
|
|
# Example output.
|
|
# {"America/Los Angeles",{"PST","Pacific Standard Time"},{"PDT","Pacific Daylight Time"},-480,60,{2,sun,mar},{2,0},{1,sun,nov},{2,0}},
|
|
# {"America/Puerto Rico",{"AST","AST"},undef,-240,0,undef,{0,0},undef,{0,0}},
|
|
|
|
$output{$name} = build_term(
|
|
'#ZONE#', # TimeZoneName
|
|
[ $name1, $name1 ], # StdAbbr, StdName
|
|
defined $name2 ? [ $name2, $name2 ] : undef, # DstAbbr, DstName
|
|
$gmtoff_min, # StdMin
|
|
$rule2->[RULE_SAVED], # DstMin
|
|
$rule2->[RULE_DATE], # DstStartDay,
|
|
rulestart($rule2->[RULE_TIME], $gmtoff_min, 0, $name), # DstStartTime
|
|
$rule1->[RULE_DATE], # DstEndDay
|
|
rulestart($rule1->[RULE_TIME], $gmtoff_min, $rule2->[RULE_SAVED], $name), # DstEndTime
|
|
);
|
|
}
|
|
|
|
sub zonename {
|
|
my ($format, $letter, $dst) = @_;
|
|
if (defined($dst) && $format =~ m,/,) {
|
|
my @format = split(m,/,, $format);
|
|
return $format[$dst];
|
|
} else {
|
|
return sprintf($format, $letter eq '-' ? '' : $letter);
|
|
}
|
|
}
|
|
|
|
# erlang_localtime wants the transition time in the local wallclock
|
|
# time before the transition.
|
|
sub rulestart {
|
|
my ($start, $gmtoff, $save, $name) = @_;
|
|
$start =~ m/^(\d+)(\w)$/ or die "no match for $start";
|
|
($start, my $modified) = ($1, $2);
|
|
|
|
if ($modified eq 'W') {
|
|
# No adjustment needed.
|
|
} elsif ($modified eq 'Z') {
|
|
$start += $gmtoff + $save;
|
|
|
|
# If adding a (negative) GMT offset puts the time in the previous
|
|
# day, move it up to the start of the day. If adding a (positive)
|
|
# GMT offset puts the time in the next day, move it back to the
|
|
# end of the day. I think that this is wrong, and that actually
|
|
# the transition should be moved into the previous or next day,
|
|
# but it's not possible to consistently handle these rules with
|
|
# "third Sunday in May" logic. (For example, the day before the
|
|
# third Sunday in May may be either the second or third Saturday
|
|
# in May.) Having the transition off by a few hours every year is
|
|
# better than having it be off by a week every seven years or so.
|
|
if ($start < 0) {
|
|
warn "moving rule to beginning of day for $name";
|
|
push(@problem, "Moving rule to beginning of day for Zone $name.");
|
|
$start = 0;
|
|
} elsif ($start > HPD * MPH) {
|
|
warn "moving rule to end of day for $name";
|
|
push(@problem, "Moving rule to end of day for Zone $name.");
|
|
$start = HPD * MPH;
|
|
}
|
|
} elsif ($modified eq 'S') {
|
|
# To handle this properly, we would have to know the prevailing
|
|
# wall-clock offset from standard time. In the general case this
|
|
# could be different from $save, but in practice all of the
|
|
# transitions are from standard time to daylight time or vice
|
|
# versa and not between two different offsets from standard time,
|
|
# and so this works.
|
|
$start += $save;
|
|
}
|
|
|
|
return [ int($start / MPH), $start % MPH ];
|
|
}
|
|
|
|
sub build_term {
|
|
my @term;
|
|
foreach my $e (@_) {
|
|
if (! defined $e) {
|
|
push(@term, 'undef');
|
|
} elsif (ref($e) eq 'ARRAY') {
|
|
push(@term, build_term(@$e));
|
|
} elsif ($e =~ m/^(\-?\d+|[a-z].*)$/) {
|
|
push(@term, $e); # number or term
|
|
} else {
|
|
push(@term, "\"$e\""); # string
|
|
}
|
|
}
|
|
|
|
return '{' . join(',', @term) . '}';
|
|
}
|
|
|
|
sub link_line {
|
|
my ($l) = @_;
|
|
my ($LINK, $canon_name, $old_name) = split(m/\s+/, $l);
|
|
if (! defined $output{$canon_name}) {
|
|
print STDERR "no zone $canon_name for link $old_name\n";
|
|
} else {
|
|
$output{$old_name} = $output{$canon_name};
|
|
}
|
|
}
|
|
|
|
my (@mon_to_name, %mon_from_name, @dow_to_name, %dow_from_name);
|
|
INIT {
|
|
@mon_to_name = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
|
|
%mon_from_name = map { $mon_to_name[$_] => $_ } (0..$#mon_to_name);
|
|
@dow_to_name = qw(Sun Mon Tue Wed Thu Fri Sat);
|
|
%dow_from_name= map { $dow_to_name[$_] => $_ } (0..$#dow_to_name);
|
|
}
|
|
|
|
sub split_ymd {
|
|
my ($ymd) = @_;
|
|
$ymd =~ m/^(\d+)(?:\s+(\w+)(?:\s+(\d+)))?/
|
|
or die "parse \"$ymd\" for ymd failed";
|
|
my $year = $1;
|
|
my $month = do {
|
|
if (defined $2) {
|
|
defined $mon_from_name{$2} or die "parse \"$ymd\" for month failed";
|
|
$mon_from_name{$2};
|
|
} else {
|
|
0
|
|
}
|
|
};
|
|
my $day = defined $3 ? $3 : 0;
|
|
return ($year, $month, $day);
|
|
}
|
|
|
|
sub on_to_day_of_month {
|
|
my ($on, $year, $month) = @_;
|
|
|
|
my $day;
|
|
if ($on =~ m/^\d+$/) {
|
|
$day = $on;
|
|
print "Day extracted via regex to: $day\n";
|
|
} else {
|
|
my ($desired_dow, $time_base);
|
|
if ($on =~ m/^(\w+)>=(\d+)$/) {
|
|
$desired_dow = $dow_from_name{$1}; my $desired_day = $2;
|
|
$time_base = timelocal(0, 0, 0, $desired_day, $month, $year);
|
|
print "Regex: $on => desired_dow: $desired_dow. Time_base: $time_base\n";
|
|
} elsif ($on =~ m/^(\w+)<=(\d+)$/) {
|
|
$desired_dow = $dow_from_name{$1}; my $desired_day = $2;
|
|
$time_base = timelocal(0, 0, 0, $desired_day, $month, $year);
|
|
} elsif ($on =~ m/^last(\w+)$/) {
|
|
$desired_dow = $dow_from_name{$1};
|
|
# One week before the beginning of the next month.
|
|
$time_base = timelocal_nocheck(0, 0, 0, - DPW, $month + 1, $year);
|
|
} else {
|
|
die "match $on failed";
|
|
}
|
|
($day, my $dow) = (localtime($time_base))[3,6];
|
|
print "Current Day: $day\n";
|
|
print "Comparing $dow = $desired_dow\n";
|
|
if ($dow != $desired_dow) {
|
|
$day += (DPW + $desired_dow - $dow) % DPW;
|
|
}
|
|
print "After comparison: Current Day: $day\n";
|
|
}
|
|
|
|
return $day;
|
|
}
|
|
|
|
# Returns the epoch that the rule was last active, or undef if the
|
|
# rule has never been active (i.e., it begins in the future).
|
|
sub last_active_epoch {
|
|
my ($from, $to, $in, $on) = @_;
|
|
# $from is a year.
|
|
# $to is a year, or 'only', or 'max'.
|
|
# $in is a month name (e.g., 'Jan').
|
|
# $on is a day-of-month, or a day-of-week>=day-of-month, or 'last'day-of-week.
|
|
|
|
my $month = $mon_from_name{$in};
|
|
|
|
# First check the rule's from time; if in the future return undef.
|
|
if ($from > $current_year) { return undef; }
|
|
if ($from == $current_year && $month > $current_month) { return undef; }
|
|
if ($from == $current_year && $month == $current_month) {
|
|
my $day = on_to_day_of_month($on, $current_year, $current_month);
|
|
if ($day > $current_day) { return undef; }
|
|
}
|
|
|
|
# Now check the rule's to time. If the rule covers the current
|
|
# year, but doesn't fire until later in the year, subtract a year.
|
|
my $year = $to eq 'max' ? $current_year : $to eq 'only' ? $from : $to;
|
|
event_fires_later_this_year: {
|
|
if ($year < $current_year) { last; }
|
|
if ($year == $current_year && $month < $current_month) { last; }
|
|
my $day = on_to_day_of_month($on, $year, $month);
|
|
if ($year == $current_year && $month == $current_month && $day < $current_day) { last; }
|
|
# If we get here, this year's rule instance is not active until
|
|
# later in the year, so subtract a year.
|
|
$year -= 1;
|
|
}
|
|
|
|
my $day = on_to_day_of_month($on, $year, $month);
|
|
print "On=$on, Year=$year, Month=$month ====> Day=$day\n";
|
|
return timelocal_nocheck(0, 0, 0, $day, $month, $year);
|
|
}
|
|
|