qdate_localtime/priv/db/tz-erl

528 lines
18 KiB
Text
Raw Normal View History

#! /usr/bin/env perl
# This script processes time zone definitions from the Olson database
# and transforms them into the format used by the erlang_localtime
# library.
#
# Some helpful URLs:
# https://www.iana.org/time-zones
# https://github.com/dmitryme/erlang_localtime
# http://www.cstdbill.com/tzdb/tz-how-to.html
# Known bugs (should fix):
# * Discarding past/future Rules considers only year. Better to
# use a window [now, one-year-from-now) so that our output rules
# are valid for at least a year.
# Known bugs (can't fix without upstream changes):
# * Africa/Casablanca: Has more than one DST transition per year.
# * Pacific/Fiji: DST does not start/end on Nth DayOfWeek in month.
# * America/Godthab: Transition time as UTC moves into previous day.
use strict;
use warnings;
use Getopt::Long;
use Time::Local qw( timelocal timelocal_nocheck );
use Data::Dump;
use constant DPW => 7; # Days per week.
use constant HPD => 24; # Hours per day.
use constant MPH => 60; # Minutes per hour.
use constant SPECIFICITY_MAX => 100;
use constant {
RULE_DATE => 0,
RULE_SAVED => 1,
RULE_TIME => 2,
RULE_LETTERS => 3,
RULE_SPECIFICITY => 4,
};
use constant RULE_NULL => [ undef, 0, '0W', '-', 0 ];
my $version = undef;
my $output_file;
my $date;
GetOptions(
'version=s' => \$version,
'output=s' => \$output_file,
'date=s' => \$date,
) or die;
my @data = @ARGV;
my ($current_day, $current_month, $current_year) = do {
if (defined $date) {
$date =~ m/^(\d\d\d\d)-?(\d\d)-?(\d\d)$/ or die "parse date \"$date\" as YYYY-MM-DD failed\n";
($3, $2 - 1, $1);
} else {
my @lt = localtime(time);
($lt[3], $lt[4], $lt[5] + 1900);
}
};
my %rule;
my %rule_base;
my %output;
my @problem;
# Add the null rule.
$rule_base{'-'} = [ 0, RULE_NULL ];
# Have to make three passes through the file.
# First pass: process Rule lines.
process_data(\@data, { Rule => \&rule_line });
# Second pass: process Zone lines.
process_data(\@data, { Zone => \&zone_line });
# Third pass, process Link lines.
process_data(\@data, { Link => \&link_line });
my $ofh;
if (defined $output_file) {
open($ofh, '>', $output_file) or die "open $output_file for write failed: $!\n";
} else {
$output_file = 'STDOUT';
open($ofh, '>&STDOUT') or die "dup $output_file failed: $!\n";
}
printf $ofh " %%%% Automatically generated from the time zone database%s for %04d-%02d-%02d.\n",
(defined $version ? " version $version" : ''),
$current_year, $current_month + 1, $current_day;
print $ofh "\n";
print $ofh " %% Problems:\n";
print $ofh map(" %% $_\n", @problem);
print $ofh "\n";
print $ofh join(",\n", map {
(my $o = $output{$_}) =~ s/#ZONE#/$_/;
" $o";
} (sort keys %output)),
"\n";
close($ofh) or die "close $output_file failed: $!\n";
exit(0);
########################################################################
sub process_data {
my ($data, $handler) = @_;
foreach my $f (@$data) {
my $fh = $f;
{
# We open the file this way so that die and warn print the filename.
no strict 'refs';
open($fh, '<', $fh) or die "open $fh for read failed: $!";
}
# Read the Olson database.
my ($last_linetype, $last_lineprefix);
while (my $l = <$fh>) {
chomp($l);
$l =~ s/\s*#.*//; # Remove comments.
$l =~ s/\s+$//; # Remove trailing whitespace.
$l eq '' and next;
my $linetype = do {
if ($l =~ m/^((\S+)\s+\S+)/) {
# Continuation line.
$last_linetype = $2;
$last_lineprefix = $1;
} else {
$l = $last_lineprefix . $l;
}
$last_linetype;
};
if (defined(my $h = $handler->{$linetype})) {
$h->($l);
}
}
close($fh);
}
}
# Converts an offset in the format "[+-]?HH:MM" or "[+-]?HH" into minutes.
# For example, "2:00" -> 120, "-0:30" -> -30, "+5" -> 300.
sub offset_minutes {
my ($off, $adj) = @_;
my $convert_offset = sub {
$_[0] =~ m/^([\+\-]?)(\d+)(?::(\d+))?$/
or die "offset \"$_[0]\" did not match";
my $m = $2 * MPH;
if (defined $3) { $m += $3; }
if ($1 eq '-') { $m = -$m; }
return $m;
};
my $offset = $convert_offset->($off);
if (defined $adj) {
$offset -= $convert_offset->($adj);
}
return $offset;
}
sub rule_line {
my ($l) = @_;
my ($RULE, $name, $from, $to, $type, $in, $on, $at, $save, $letters) = split(m/\s+/, $l);
# The rule lines in the time zone database describe transitions.
#
# Cases we have to handle:
#
# 1. There are no rules still active. We need to determine the last
# transition and apply it statically. (Example: Ghana.)
#
# 2. There are active rules. We should determine the current rules
# (ignore old rules and future rules) and transfer them to the
# output rules. (Example: Morocco.)
my $save_minutes = offset_minutes($save);
# The rules that this function generates has these parts.
# These are accessed using the RULE_* constants defined above.
# 1. A description of the date on which the transition happens,
# or undef if this is a base rule.
# 2. The number of minutes "saved" (the difference from the base
# offset for the zone).
# 3. The time in minutes after midnight at which the transition
# happens.
# 4. The letter for the new state (often 'S' for standard, or 'D'
# for daylight).
# 5. The number of years the rule covers (used to select which rules
# to eliminate when there are more than two rules in a year).
# Update the base rule.
my $rule_last_active_epoch = last_active_epoch($from, $to, $in, $on);
if (defined $rule_last_active_epoch) {
if (! defined $rule_base{$name} || $rule_base{$name}->[0] < $rule_last_active_epoch) {
$rule_base{$name} = [ $rule_last_active_epoch, [ undef, $save_minutes, 0, $letters, SPECIFICITY_MAX ] ];
}
}
my $transform_rule = sub {
my $rule;
if ($on =~ m/^(\w+)>=(\d+)$/) {
# If start day is not a multiple of a week, round it to nearest week.
$rule = [ int(($2-1+int(DPW/2))/DPW) + 1, lc($1), lc($in) ]; # e.g. [ '2', sun', 'nov' ]
if (($2-1) % DPW) {
warn "on $on fuzz for $name -> $rule->[0]";
push(@problem, "Rounded $in $on to [@$rule] in Rule $name.");
}
} elsif ($on =~ m/^last(\w+)$/) {
$rule = [ 'last', lc($1), lc($in) ]; # e.g. [ 'last', 'sun', 'apr' ]
} else {
warn "no match for $on in rule $l";
push(@problem, "Ignored $in $on in Rule $name.");
}
my $at_minutes = $at;
if ($at =~ m/^([\+\-]?)(?:(\d+)\:)(\d+)([wsguz]?)/) {
$at_minutes = $2 * MPH + $3; my $z = $4;
if ($1 eq '-') { $at_minutes = -$at_minutes; }
if ($z eq '' or $z eq 'w') {
# Change specified at local (pre-change) wall-clock time.
$at_minutes .= 'W';
} elsif ($z eq 's') {
# Change specified at local standard time.
$at_minutes .= 'S';
} elsif ($z eq 'g' or $z eq 'u' or $z eq 'z') {
# Change specified at UTC.
$at_minutes .= 'Z';
}
}
my $specificity =
$to eq 'only' ? 1 :
$to eq 'max' ? SPECIFICITY_MAX :
$to - $from + 1;
return [ $rule, $save_minutes, $at_minutes, $letters, $specificity ];
};
# We ignore any rule that has a definite to (end) time.
if (($to eq 'only' && $from == $current_year) ||
($to ne 'only' && ($to eq 'max' || $to >= $current_year) && $from <= $current_year)) {
push(@{$rule{$name}}, $transform_rule->());
}
}
# Process a Zone line and updates the %output hash.
sub zone_line {
my ($l) = @_;
my ($ZONE, $name, $gmtoff, $rules, $format, $until) = split(m/\s+/, $l, 6);
# We ignore any zone line that has a definite until (end) time that
# is in the past.
if (defined $until) {
my ($until_year, $until_month, $until_day) = split_ymd($until);
if (($until_year > $current_year) ||
(($until_year == $current_year) &&
(($until_month > $current_month) ||
(($until_month == $current_month) &&
($until_day >= $current_day))))) {
"future until \"$until\" not handled";
}
return;
}
# Set $name1 and $rule1 for (start of) standard time. If zone has
# DST, set name2 and $rule2 for (start of ) DST, otherwise it is
# equal to standard time.
my ($name1, $name2, $rule1, $rule2);
my @rules = @{$rule{$rules} || []};
# If we have more than two rules, discard all except for the two
# most-specific rules.
if (scalar(@rules) > 2) {
print STDERR "discarding excess rules from $rules for $name\n", Data::Dump::dump(\@rules), "\n";
push(@problem, "Discarded excess rules for Zone $name.");
@rules = sort { $b->[RULE_SPECIFICITY] <=> $a->[RULE_SPECIFICITY] } @rules;
$#rules = 1;
}
if (scalar(@rules) == 0) {
# No active rules, use base rule.
my $rule0 = $rule_base{$rules}->[1];
$name1 = zonename($format, $rule0->[RULE_LETTERS], undef);
$name2 = undef;
$rule1 = $rule2 = RULE_NULL;
} elsif (scalar(@rules) == 1) {
# One active rule. This is a year that DST started or stopped
# being observed. erlang_localtime doesn't handle this. If DST
# stopped being observed in this year, don't output a DST rule.
# If DST started being observed, do. (Except that we don't handle
# this yet.)
print STDERR Data::Dump::dump(\@rules), "\n";
die "one rule for $name";
$name1 = zonename($format, $rules[0]->[RULE_LETTERS], undef);
$name2 = undef;
$rule1 = $rule2 = RULE_NULL;
} else { # 2 rules
if ($rules[0]->[RULE_SAVED] != 0) { @rules = reverse(@rules); }; # Standard time first.
$rule1 = $rules[0];
$rule2 = $rules[1];
$name1 = zonename($format, $rule1->[RULE_LETTERS], 0);
$name2 = zonename($format, $rule2->[RULE_LETTERS], 1);
}
my $gmtoff_min = offset_minutes($gmtoff);
# https://github.com/dmitryme/erlang_localtime/blob/master/include/tz_database.hrl
# Documentation for output format.
#
# {TimeZoneName, {StdAbbr, StdName}, {DstAbbr, DstName}, StdMin, DstMin, DstStartDay, DstStartTime, DstEndDay, DstEndTime}
# TimeZoneName = String(), TimeZone name, MUST be unique. It is a key
# StdName = {String(), String()}, name and abbreviations of timezone before daylight shift
# DstName = {String(), String()}, name and abbreviations of timezone after daylight shift
# StdMin = Integer(), GMT offset in minutes. W/o daylight savings
# DstMin = Integer(), daylight saving. Adjustment for GMT offset, when daylight saving is on
# DstStartDay = {NthWeekday, Weekday, Month}, daylight saving transition rule. Can be undef if no daylight saving rule specified
# NthWeekday = Integer(), 1(first week),2(second week),3(...),4(...),5(...),-1(last week)
# Weekday = atom(), sun,mon,tue,wed,thu,fri,sat
# Month = atom(), jan,deb,mar,apr,may,jun,jul,aug,sep,oct,mov,dec
# DstStartTime = {Hour, Min} - time of daylight saving transition
# Hour = Integer(), [0..23]
# Min = Integer(), [0..59]
# DstEndDay = {NthWeekday, Weekday, Month} - transition back to std. Can be undef if no daylight saving rule specified
# {Hour, Min} - time of transition to std.
#
# Example output.
# {"America/Los Angeles",{"PST","Pacific Standard Time"},{"PDT","Pacific Daylight Time"},-480,60,{2,sun,mar},{2,0},{1,sun,nov},{2,0}},
# {"America/Puerto Rico",{"AST","AST"},undef,-240,0,undef,{0,0},undef,{0,0}},
$output{$name} = build_term(
'#ZONE#', # TimeZoneName
[ $name1, $name1 ], # StdAbbr, StdName
defined $name2 ? [ $name2, $name2 ] : undef, # DstAbbr, DstName
$gmtoff_min, # StdMin
$rule2->[RULE_SAVED], # DstMin
$rule2->[RULE_DATE], # DstStartDay,
rulestart($rule2->[RULE_TIME], $gmtoff_min, 0, $name), # DstStartTime
$rule1->[RULE_DATE], # DstEndDay
rulestart($rule1->[RULE_TIME], $gmtoff_min, $rule2->[RULE_SAVED], $name), # DstEndTime
);
}
sub zonename {
my ($format, $letter, $dst) = @_;
if (defined($dst) && $format =~ m,/,) {
my @format = split(m,/,, $format);
return $format[$dst];
} else {
return sprintf($format, $letter eq '-' ? '' : $letter);
}
}
# erlang_localtime wants the transition time in the local wallclock
# time before the transition.
sub rulestart {
my ($start, $gmtoff, $save, $name) = @_;
$start =~ m/^(\d+)(\w)$/ or die "no match for $start";
($start, my $modified) = ($1, $2);
if ($modified eq 'W') {
# No adjustment needed.
} elsif ($modified eq 'Z') {
$start += $gmtoff + $save;
# If adding a (negative) GMT offset puts the time in the previous
# day, move it up to the start of the day. If adding a (positive)
# GMT offset puts the time in the next day, move it back to the
# end of the day. I think that this is wrong, and that actually
# the transition should be moved into the previous or next day,
# but it's not possible to consistently handle these rules with
# "third Sunday in May" logic. (For example, the day before the
# third Sunday in May may be either the second or third Saturday
# in May.) Having the transition off by a few hours every year is
# better than having it be off by a week every seven years or so.
if ($start < 0) {
warn "moving rule to beginning of day for $name";
push(@problem, "Moving rule to beginning of day for Zone $name.");
$start = 0;
} elsif ($start > HPD * MPH) {
warn "moving rule to end of day for $name";
push(@problem, "Moving rule to end of day for Zone $name.");
$start = HPD * MPH;
}
} elsif ($modified eq 'S') {
# To handle this properly, we would have to know the prevailing
# wall-clock offset from standard time. In the general case this
# could be different from $save, but in practice all of the
# transitions are from standard time to daylight time or vice
# versa and not between two different offsets from standard time,
# and so this works.
$start += $save;
}
return [ int($start / MPH), $start % MPH ];
}
sub build_term {
my @term;
foreach my $e (@_) {
if (! defined $e) {
push(@term, 'undef');
} elsif (ref($e) eq 'ARRAY') {
push(@term, build_term(@$e));
} elsif ($e =~ m/^(\-?\d+|[a-z].*)$/) {
push(@term, $e); # number or term
} else {
push(@term, "\"$e\""); # string
}
}
return '{' . join(',', @term) . '}';
}
sub link_line {
my ($l) = @_;
my ($LINK, $canon_name, $old_name) = split(m/\s+/, $l);
if (! defined $output{$canon_name}) {
print STDERR "no zone $canon_name for link $old_name\n";
} else {
$output{$old_name} = $output{$canon_name};
}
}
my (@mon_to_name, %mon_from_name, @dow_to_name, %dow_from_name);
INIT {
@mon_to_name = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
%mon_from_name = map { $mon_to_name[$_] => $_ } (0..$#mon_to_name);
@dow_to_name = qw(Sun Mon Tue Wed Thu Fri Sat);
%dow_from_name= map { $dow_to_name[$_] => $_ } (0..$#dow_to_name);
}
sub split_ymd {
my ($ymd) = @_;
$ymd =~ m/^(\d+)(?:\s+(\w+)(?:\s+(\d+)))?/
or die "parse \"$ymd\" for ymd failed";
my $year = $1;
my $month = do {
if (defined $2) {
defined $mon_from_name{$2} or die "parse \"$ymd\" for month failed";
$mon_from_name{$2};
} else {
0
}
};
my $day = defined $3 ? $3 : 0;
return ($year, $month, $day);
}
sub on_to_day_of_month {
my ($on, $year, $month) = @_;
my $day;
if ($on =~ m/^\d+$/) {
$day = $on;
print "Day extracted via regex to: $day\n";
} else {
my ($desired_dow, $time_base);
if ($on =~ m/^(\w+)>=(\d+)$/) {
$desired_dow = $dow_from_name{$1}; my $desired_day = $2;
$time_base = timelocal(0, 0, 0, $desired_day, $month, $year);
print "Regex: $on => desired_dow: $desired_dow. Time_base: $time_base\n";
} elsif ($on =~ m/^(\w+)<=(\d+)$/) {
$desired_dow = $dow_from_name{$1}; my $desired_day = $2;
$time_base = timelocal(0, 0, 0, $desired_day, $month, $year);
} elsif ($on =~ m/^last(\w+)$/) {
$desired_dow = $dow_from_name{$1};
# One week before the beginning of the next month.
$time_base = timelocal_nocheck(0, 0, 0, - DPW, $month + 1, $year);
} else {
die "match $on failed";
}
($day, my $dow) = (localtime($time_base))[3,6];
print "Current Day: $day\n";
print "Comparing $dow = $desired_dow\n";
if ($dow != $desired_dow) {
$day += (DPW + $desired_dow - $dow) % DPW;
}
print "After comparison: Current Day: $day\n";
}
return $day;
}
# Returns the epoch that the rule was last active, or undef if the
# rule has never been active (i.e., it begins in the future).
sub last_active_epoch {
my ($from, $to, $in, $on) = @_;
# $from is a year.
# $to is a year, or 'only', or 'max'.
# $in is a month name (e.g., 'Jan').
# $on is a day-of-month, or a day-of-week>=day-of-month, or 'last'day-of-week.
my $month = $mon_from_name{$in};
# First check the rule's from time; if in the future return undef.
if ($from > $current_year) { return undef; }
if ($from == $current_year && $month > $current_month) { return undef; }
if ($from == $current_year && $month == $current_month) {
my $day = on_to_day_of_month($on, $current_year, $current_month);
if ($day > $current_day) { return undef; }
}
# Now check the rule's to time. If the rule covers the current
# year, but doesn't fire until later in the year, subtract a year.
my $year = $to eq 'max' ? $current_year : $to eq 'only' ? $from : $to;
event_fires_later_this_year: {
if ($year < $current_year) { last; }
if ($year == $current_year && $month < $current_month) { last; }
my $day = on_to_day_of_month($on, $year, $month);
if ($year == $current_year && $month == $current_month && $day < $current_day) { last; }
# If we get here, this year's rule instance is not active until
# later in the year, so subtract a year.
$year -= 1;
}
my $day = on_to_day_of_month($on, $year, $month);
print "On=$on, Year=$year, Month=$month ====> Day=$day\n";
return timelocal_nocheck(0, 0, 0, $day, $month, $year);
}