Add tz-erl script to generate timezone database from Olson data.
This commit is contained in:
parent
9caa2b2a0b
commit
632c3e2d6e
2 changed files with 506 additions and 0 deletions
16
db/Makefile
Normal file
16
db/Makefile
Normal file
|
@ -0,0 +1,16 @@
|
|||
TZDIR=tzdata
|
||||
TZ_FILES=$(addprefix $(TZDIR)/, africa antarctica asia australasia backward etcetera europe northamerica southamerica)
|
||||
|
||||
tzout: $(TZ_FILES)
|
||||
TZ_VERSION=`perl -n -e 'm/^VERSION\s*=\s*(\S+)/ and print $$1;' $(TZDIR)/Makefile`; \
|
||||
./tz-erl --version $$TZ_VERSION -o $@ $^
|
||||
|
||||
$(TZ_FILES): $(TZDIR)
|
||||
|
||||
$(TZDIR): tzdata-latest.tar.gz
|
||||
mkdir $(TZDIR) && cd $(TZDIR) && tar xzf ../$<
|
||||
|
||||
tzdata-latest.tar.gz:
|
||||
curl -O "ftp://ftp.iana.org/tz/tzdata-latest.tar.gz"
|
||||
|
||||
|
490
db/tz-erl
Executable file
490
db/tz-erl
Executable file
|
@ -0,0 +1,490 @@
|
|||
#! /usr/bin/env perl
|
||||
|
||||
# This script processes time zone definitions from the Olson database
|
||||
# and transforms them into the format used by the erlang_localtime
|
||||
# library.
|
||||
#
|
||||
# Some helpful URLs:
|
||||
# https://www.iana.org/time-zones
|
||||
# https://github.com/dmitryme/erlang_localtime
|
||||
# http://www.cstdbill.com/tzdb/tz-how-to.html
|
||||
|
||||
# Known bugs (should fix):
|
||||
# * Discarding past/future Rules considers only year. Better to
|
||||
# use a window [now, one-year-from-now) so that our output rules
|
||||
# are valid for at least a year.
|
||||
# Known bugs (can't fix without upstream changes):
|
||||
# * Africa/Casablanca: Has more than one DST transition per year.
|
||||
# * Pacific/Fiji: DST does not start/end on Nth DayOfWeek in month.
|
||||
# * America/Godthab: Transition time as UTC moves into previous day.
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Getopt::Long;
|
||||
use Time::Local qw( timelocal timelocal_nocheck );
|
||||
use Data::Dump;
|
||||
|
||||
use constant DPW => 7; # Days per week.
|
||||
use constant HPD => 24; # Hours per day.
|
||||
use constant MPH => 60; # Minutes per hour.
|
||||
use constant SPECIFICITY_MAX => 100;
|
||||
|
||||
use constant {
|
||||
RULE_DATE => 0,
|
||||
RULE_SAVED => 1,
|
||||
RULE_TIME => 2,
|
||||
RULE_LETTERS => 3,
|
||||
RULE_SPECIFICITY => 4,
|
||||
};
|
||||
use constant RULE_NULL => [ undef, 0, '0W', '-', 0 ];
|
||||
|
||||
my $version = undef;
|
||||
my $output_file;
|
||||
my $date;
|
||||
|
||||
GetOptions(
|
||||
'version=s' => \$version,
|
||||
'output=s' => \$output_file,
|
||||
'date=s' => \$date,
|
||||
) or die;
|
||||
|
||||
my @data = @ARGV;
|
||||
|
||||
my ($current_day, $current_month, $current_year) = do {
|
||||
if (defined $date) {
|
||||
$date =~ m/^(\d\d\d\d)-?(\d\d)-?(\d\d)$/ or die "parse date \"$date\" as YYYY-MM-DD failed\n";
|
||||
($3, $2 - 1, $1);
|
||||
} else {
|
||||
my @lt = localtime(time);
|
||||
($lt[3], $lt[4], $lt[5] + 1900);
|
||||
}
|
||||
};
|
||||
|
||||
my %rule;
|
||||
my %rule_base;
|
||||
my %output;
|
||||
my @problem;
|
||||
|
||||
# Add the null rule.
|
||||
$rule_base{'-'} = [ 0, RULE_NULL ];
|
||||
|
||||
# Have to make three passes through the file.
|
||||
# First pass: process Rule lines.
|
||||
process_data(\@data, { Rule => \&rule_line });
|
||||
# Second pass: process Zone lines.
|
||||
process_data(\@data, { Zone => \&zone_line });
|
||||
# Third pass, process Link lines.
|
||||
process_data(\@data, { Link => \&link_line });
|
||||
|
||||
my $ofh;
|
||||
if (defined $output_file) {
|
||||
open($ofh, '>', $output_file) or die "open $output_file for write failed: $!\n";
|
||||
} else {
|
||||
$output_file = 'STDOUT';
|
||||
open($ofh, '>&STDOUT') or die "dup $output_file failed: $!\n";
|
||||
}
|
||||
|
||||
printf $ofh " %%%% Automatically generated from the time zone database%s for %04d-%02d-%02d.\n",
|
||||
(defined $version ? " version $version" : ''),
|
||||
$current_year, $current_month + 1, $current_day;
|
||||
|
||||
print $ofh "\n";
|
||||
print $ofh " %% Problems:\n";
|
||||
print $ofh map(" %% $_\n", @problem);
|
||||
print $ofh "\n";
|
||||
|
||||
print $ofh join(",\n", map {
|
||||
(my $o = $output{$_}) =~ s/#ZONE#/$_/;
|
||||
" $o";
|
||||
} (sort keys %output)),
|
||||
"\n";
|
||||
|
||||
close($ofh) or die "close $output_file failed: $!\n";
|
||||
|
||||
exit(0);
|
||||
|
||||
########################################################################
|
||||
|
||||
sub process_data {
|
||||
my ($data, $handler) = @_;
|
||||
|
||||
foreach my $f (@$data) {
|
||||
my $fh = $f;
|
||||
{
|
||||
# We open the file this way so that die and warn print the filename.
|
||||
no strict 'refs';
|
||||
open($fh, '<', $fh) or die "open $fh for read failed: $!";
|
||||
}
|
||||
|
||||
# Read the Olson database.
|
||||
my ($last_linetype, $last_lineprefix);
|
||||
while (my $l = <$fh>) {
|
||||
chomp($l);
|
||||
$l =~ s/\s*#.*//; # Remove comments.
|
||||
$l =~ s/\s+$//; # Remove trailing whitespace.
|
||||
$l eq '' and next;
|
||||
|
||||
my $linetype = do {
|
||||
if ($l =~ m/^((\S+)\s+\S+)/) {
|
||||
# Continuation line.
|
||||
$last_linetype = $2;
|
||||
$last_lineprefix = $1;
|
||||
} else {
|
||||
$l = $last_lineprefix . $l;
|
||||
}
|
||||
$last_linetype;
|
||||
};
|
||||
|
||||
if (defined(my $h = $handler->{$linetype})) {
|
||||
$h->($l);
|
||||
}
|
||||
}
|
||||
|
||||
close($fh);
|
||||
}
|
||||
}
|
||||
|
||||
sub offset_minutes {
|
||||
my ($off, $adj) = @_;
|
||||
|
||||
my $convert_offset = sub {
|
||||
my $m = $_[0];
|
||||
if ($m =~ m/^([\+\-]?)(?:(\d+):)?(\d+)$/) {
|
||||
$m = (defined $2 ? $2 : 0) * MPH + $3;
|
||||
if ($1 eq '-') { $m = -$m; }
|
||||
}
|
||||
return $m;
|
||||
};
|
||||
|
||||
my $offset = $convert_offset->($off);
|
||||
if (defined $adj) {
|
||||
$offset -= $convert_offset->($adj);
|
||||
}
|
||||
|
||||
return $offset;
|
||||
}
|
||||
|
||||
|
||||
sub rule_line {
|
||||
my ($l) = @_;
|
||||
my ($RULE, $name, $from, $to, $type, $in, $on, $at, $save, $letters) = split(m/\s+/, $l);
|
||||
|
||||
# The rule lines in the time zone database describe transitions.
|
||||
#
|
||||
# Cases we have to handle:
|
||||
#
|
||||
# 1. There are no rules still active. We need to determine the last
|
||||
# transition and apply it statically. (Example: Ghana.)
|
||||
#
|
||||
# 2. There are active rules. We should determine the current rules
|
||||
# (ignore old rules and future rules) and transfer them to the
|
||||
# output rules. (Example: Morocco.)
|
||||
|
||||
my $save_minutes = offset_minutes($save);
|
||||
|
||||
# The rules that this function generates has these parts.
|
||||
# These are accessed using the RULE_* constants defined above.
|
||||
# 1. A description of the date on which the transition happens,
|
||||
# or undef if this is a base rule.
|
||||
# 2. The number of minutes "saved" (the difference from the base
|
||||
# offset for the zone).
|
||||
# 3. The time in minutes after midnight at which the transition
|
||||
# happens.
|
||||
# 4. The letter for the new state (often 'S' for standard, or 'D'
|
||||
# for daylight).
|
||||
# 5. The number of years the rule covers (used to select which rules
|
||||
# to eliminate when there are more than two rules in a year).
|
||||
|
||||
# Update the base rule.
|
||||
my $rule_last_active_epoch = last_active_epoch($from, $to, $in, $on);
|
||||
if (defined $rule_last_active_epoch) {
|
||||
if (! defined $rule_base{$name} || $rule_base{$name}->[0] < $rule_last_active_epoch) {
|
||||
$rule_base{$name} = [ $rule_last_active_epoch, [ undef, $save_minutes, 0, $letters, SPECIFICITY_MAX ] ];
|
||||
}
|
||||
}
|
||||
|
||||
my $transform_rule = sub {
|
||||
my $rule;
|
||||
if ($on =~ m/^(\w+)>=(\d+)$/) {
|
||||
# If start day is not a multiple of a week, round it to nearest week.
|
||||
$rule = [ int(($2-1+int(DPW/2))/DPW) + 1, lc($1), lc($in) ]; # e.g. [ '2', sun', 'nov' ]
|
||||
if (($2-1) % DPW) {
|
||||
warn "on $on fuzz for $name -> $rule->[0]";
|
||||
push(@problem, "Rounded $in $on to [@$rule] in Rule $name.");
|
||||
}
|
||||
} elsif ($on =~ m/^last(\w+)$/) {
|
||||
$rule = [ 'last', lc($1), lc($in) ]; # e.g. [ 'last', 'sun', 'apr' ]
|
||||
} else {
|
||||
warn "no match for $on in rule $l";
|
||||
push(@problem, "Ignored $in $on in Rule $name.");
|
||||
}
|
||||
my $at_minutes = $at;
|
||||
if ($at =~ m/^([\+\-]?)(?:(\d+)\:)(\d+)([wsguz]?)/) {
|
||||
$at_minutes = $2 * MPH + $3; my $z = $4;
|
||||
if ($1 eq '-') { $at_minutes = -$at_minutes; }
|
||||
if ($z eq '' or $z eq 'w') {
|
||||
# Change specified at local (pre-change) wall-clock time.
|
||||
$at_minutes .= 'W';
|
||||
} elsif ($z eq 's') {
|
||||
# Change specified at local standard time.
|
||||
$at_minutes .= 'S';
|
||||
} elsif ($z eq 'g' or $z eq 'u' or $z eq 'z') {
|
||||
# Change specified at UTC.
|
||||
$at_minutes .= 'Z';
|
||||
}
|
||||
}
|
||||
my $specificity =
|
||||
$to eq 'only' ? 1 :
|
||||
$to eq 'max' ? SPECIFICITY_MAX :
|
||||
$to - $from + 1;
|
||||
|
||||
return [ $rule, $save_minutes, $at_minutes, $letters, $specificity ];
|
||||
};
|
||||
|
||||
# We ignore any rule that has a definite to (end) time.
|
||||
if (($to eq 'only' && $from == $current_year) ||
|
||||
($to ne 'only' && ($to eq 'max' || $to >= $current_year) && $from <= $current_year)) {
|
||||
push(@{$rule{$name}}, $transform_rule->());
|
||||
}
|
||||
}
|
||||
|
||||
# Process a Zone line and updates the %output hash.
|
||||
sub zone_line {
|
||||
my ($l) = @_;
|
||||
my ($ZONE, $name, $gmtoff, $rules, $format, $until) = split(m/\s+/, $l, 6);
|
||||
# We ignore any zone line that has a definite until (end) time that
|
||||
# is in the past.
|
||||
if (defined $until) {
|
||||
my $until_year = ($until =~ m/^(\d+)/)[0];
|
||||
if ($until_year >= $current_year) {
|
||||
die "until $until not handled";
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
# Set $name1 and $rule1 for (start of) standard time. If zone has
|
||||
# DST, set name2 and $rule2 for (start of ) DST, otherwise it is
|
||||
# equal to standard time.
|
||||
my ($name1, $name2, $rule1, $rule2);
|
||||
my @rules = @{$rule{$rules} || []};
|
||||
|
||||
# If we have more than two rules, discard all except for the two
|
||||
# most-specific rules.
|
||||
if (scalar(@rules) > 2) {
|
||||
print STDERR "discarding excess rules from $rules for $name\n", Data::Dump::dump(\@rules), "\n";
|
||||
push(@problem, "Discarded excess rules for Zone $name.");
|
||||
@rules = sort { $b->[RULE_SPECIFICITY] <=> $a->[RULE_SPECIFICITY] } @rules;
|
||||
$#rules = 1;
|
||||
}
|
||||
|
||||
if (scalar(@rules) == 0) {
|
||||
# No active rules, use base rule.
|
||||
my $rule0 = $rule_base{$rules}->[1];
|
||||
$name1 = zonename($format, $rule0->[RULE_LETTERS], undef);
|
||||
$name2 = undef;
|
||||
$rule1 = $rule2 = RULE_NULL;
|
||||
} elsif (scalar(@rules) == 1) {
|
||||
# One active rule. This is a year that DST started or stopped
|
||||
# being observed
|
||||
print STDERR Data::Dump::dump(\@rules), "\n";
|
||||
die "one rule for $name";
|
||||
$name1 = zonename($format, $rules[0]->[RULE_LETTERS], undef);
|
||||
$name2 = undef;
|
||||
$rule1 = $rule2 = RULE_NULL;
|
||||
} else { # 2 rules
|
||||
if ($rules[0]->[RULE_SAVED] != 0) { @rules = reverse(@rules); }; # Standard time first.
|
||||
$rule1 = $rules[0];
|
||||
$rule2 = $rules[1];
|
||||
$name1 = zonename($format, $rule1->[RULE_LETTERS], 0);
|
||||
$name2 = zonename($format, $rule2->[RULE_LETTERS], 1);
|
||||
}
|
||||
|
||||
my $gmtoff_min = offset_minutes($gmtoff);
|
||||
|
||||
# https://github.com/dmitryme/erlang_localtime/blob/master/include/tz_database.hrl
|
||||
# Documentation for output format.
|
||||
#
|
||||
# {TimeZoneName, {StdAbbr, StdName}, {DstAbbr, DstName}, StdMin, DstMin, DstStartDay, DstStartTime, DstEndDay, DstEndTime}
|
||||
# TimeZoneName = String(), TimeZone name, MUST be unique. It is a key
|
||||
# StdName = {String(), String()}, name and abbreviations of timezone before daylight shift
|
||||
# DstName = {String(), String()}, name and abbreviations of timezone after daylight shift
|
||||
# StdMin = Integer(), GMT offset in minutes. W/o daylight savings
|
||||
# DstMin = Integer(), daylight saving. Adjustment for GMT offset, when daylight saving is on
|
||||
# DstStartDay = {NthWeekday, Weekday, Month}, daylight saving transition rule. Can be undef if no daylight saving rule specified
|
||||
# NthWeekday = Integer(), 1(first week),2(second week),3(...),4(...),5(...),-1(last week)
|
||||
# Weekday = atom(), sun,mon,tue,wed,thu,fri,sat
|
||||
# Month = atom(), jan,deb,mar,apr,may,jun,jul,aug,sep,oct,mov,dec
|
||||
# DstStartTime = {Hour, Min} - time of daylight saving transition
|
||||
# Hour = Integer(), [0..23]
|
||||
# Min = Integer(), [0..59]
|
||||
# DstEndDay = {NthWeekday, Weekday, Month} - transition back to std. Can be undef if no daylight saving rule specified
|
||||
# {Hour, Min} - time of transition to std.
|
||||
#
|
||||
# Example output.
|
||||
# {"America/Los Angeles",{"PST","Pacific Standard Time"},{"PDT","Pacific Daylight Time"},-480,60,{2,sun,mar},{2,0},{1,sun,nov},{2,0}},
|
||||
# {"America/Puerto Rico",{"AST","AST"},undef,-240,0,undef,{0,0},undef,{0,0}},
|
||||
|
||||
$output{$name} = build_term(
|
||||
'#ZONE#', # TimeZoneName
|
||||
[ $name1, $name1 ], # StdAbbr, StdName
|
||||
defined $name2 ? [ $name2, $name2 ] : undef, # DstAbbr, DstName
|
||||
$gmtoff_min, # StdMin
|
||||
$rule2->[RULE_SAVED], # DstMin
|
||||
$rule2->[RULE_DATE], # DstStartDay,
|
||||
rulestart($rule2->[RULE_TIME], $gmtoff_min, 0, $name), # DstStartTime
|
||||
$rule1->[RULE_DATE], # DstEndDay
|
||||
rulestart($rule1->[RULE_TIME], $gmtoff_min, $rule2->[RULE_SAVED], $name), # DstEndTime
|
||||
);
|
||||
}
|
||||
|
||||
sub zonename {
|
||||
my ($format, $letter, $dst) = @_;
|
||||
if (defined($dst) && $format =~ m,/,) {
|
||||
my @format = split(m,/,, $format);
|
||||
return $format[$dst];
|
||||
} else {
|
||||
return sprintf($format, $letter eq '-' ? '' : $letter);
|
||||
}
|
||||
}
|
||||
|
||||
# erlang_localtime wants the transition time in the local wallclock
|
||||
# time before the transition.
|
||||
sub rulestart {
|
||||
my ($start, $gmtoff, $save, $name) = @_;
|
||||
$start =~ m/^(\d+)(\w)$/ or die "no match for $start";
|
||||
($start, my $modified) = ($1, $2);
|
||||
|
||||
if ($modified eq 'W') {
|
||||
# No adjustment needed.
|
||||
} elsif ($modified eq 'Z') {
|
||||
$start += $gmtoff + $save;
|
||||
|
||||
# If adding a (negative) GMT offset puts the time in the previous
|
||||
# day, move it up to the start of the day. If adding a (positive)
|
||||
# GMT offset puts the time in the next day, move it back to the
|
||||
# end of the day. I think that this is wrong, and that actually
|
||||
# the transition should be moved into the previous or next day,
|
||||
# but it's not possible to consistently handle these rules with
|
||||
# "third Sunday in May" logic. (For example, the day before the
|
||||
# third Sunday in May may be either the second or third Saturday
|
||||
# in May.) Having the transition off by a few hours every year is
|
||||
# better than having it be off by a week every seven years or so.
|
||||
if ($start < 0) {
|
||||
warn "moving rule to beginning of day for $name";
|
||||
push(@problem, "Moving rule to beginning of day for Zone $name.");
|
||||
$start = 0;
|
||||
} elsif ($start > HPD * MPH) {
|
||||
warn "moving rule to end of day for $name";
|
||||
push(@problem, "Moving rule to end of day for Zone $name.");
|
||||
$start = HPD * MPH;
|
||||
}
|
||||
} elsif ($modified eq 'S') {
|
||||
# To handle this properly, we would have to know the prevailing
|
||||
# wall-clock offset from standard time. In the general case this
|
||||
# could be different from $save, but in practice all of the
|
||||
# transitions are from standard time to daylight time or vice
|
||||
# versa and not between two different offsets from standard time,
|
||||
# and so this works.
|
||||
$start += $save;
|
||||
}
|
||||
|
||||
return [ int($start / MPH), $start % MPH ];
|
||||
}
|
||||
|
||||
sub build_term {
|
||||
my @term;
|
||||
foreach my $e (@_) {
|
||||
if (! defined $e) {
|
||||
push(@term, 'undef');
|
||||
} elsif (ref($e) eq 'ARRAY') {
|
||||
push(@term, build_term(@$e));
|
||||
} elsif ($e =~ m/^(\-?\d+|[a-z].*)$/) {
|
||||
push(@term, $e); # number or term
|
||||
} else {
|
||||
push(@term, "\"$e\""); # string
|
||||
}
|
||||
}
|
||||
|
||||
return '{' . join(',', @term) . '}';
|
||||
}
|
||||
|
||||
sub link_line {
|
||||
my ($l) = @_;
|
||||
my ($LINK, $canon_name, $old_name) = split(m/\s+/, $l);
|
||||
if (! defined $output{$canon_name}) {
|
||||
print STDERR "no zone $canon_name for link $old_name\n";
|
||||
} else {
|
||||
$output{$old_name} = $output{$canon_name};
|
||||
}
|
||||
}
|
||||
|
||||
my (@mon_to_name, %mon_from_name, @dow_to_name, %dow_from_name);
|
||||
INIT {
|
||||
@mon_to_name = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
|
||||
%mon_from_name = map { $mon_to_name[$_] => $_ } (0..$#mon_to_name);
|
||||
@dow_to_name = qw(Sun Mon Tue Wed Thu Fri Sat);
|
||||
%dow_from_name= map { $dow_to_name[$_] => $_ } (0..$#dow_to_name);
|
||||
}
|
||||
|
||||
sub on_to_day_of_month {
|
||||
my ($on, $year, $month) = @_;
|
||||
|
||||
my $day;
|
||||
if ($on =~ m/^\d+$/) {
|
||||
$day = $on;
|
||||
} else {
|
||||
my ($desired_dow, $time_base);
|
||||
if ($on =~ m/^(\w+)>=(\d+)$/) {
|
||||
$desired_dow = $dow_from_name{$1}; my $desired_day = $2;
|
||||
$time_base = timelocal(0, 0, 0, $desired_day, $month, $year);
|
||||
} elsif ($on =~ m/^last(\w+)$/) {
|
||||
$desired_dow = $dow_from_name{$1};
|
||||
# One week before the beginning of the next month.
|
||||
$time_base = timelocal_nocheck(0, 0, 0, - DPW, $month + 1, $year);
|
||||
} else {
|
||||
die "match $on failed";
|
||||
}
|
||||
($day, my $dow) = (localtime($time_base))[3,6];
|
||||
if ($dow != $desired_dow) { $day += (DPW + $desired_dow - $dow) % DPW; }
|
||||
}
|
||||
|
||||
return $day;
|
||||
}
|
||||
|
||||
# Returns the epoch that the rule was last active, or undef if the
|
||||
# rule has never been active (i.e., it begins in the future).
|
||||
sub last_active_epoch {
|
||||
my ($from, $to, $in, $on) = @_;
|
||||
# $from is a year.
|
||||
# $to is a year, or 'only', or 'max'.
|
||||
# $in is a month name (e.g., 'Jan').
|
||||
# $on is a day-of-month, or a day-of-week>=day-of-month, or 'last'day-of-week.
|
||||
|
||||
my $month = $mon_from_name{$in};
|
||||
|
||||
# First check the rule's from time; if in the future return undef.
|
||||
if ($from > $current_year) { return undef; }
|
||||
if ($from == $current_year && $month > $current_month) { return undef; }
|
||||
if ($from == $current_year && $month == $current_month) {
|
||||
my $day = on_to_day_of_month($on, $current_year, $current_month);
|
||||
if ($day > $current_day) { return undef; }
|
||||
}
|
||||
|
||||
# Now check the rule's to time. If the rule covers the current
|
||||
# year, but doesn't fire until later in the year, subtract a year.
|
||||
my $year = $to eq 'max' ? $current_year : $to eq 'only' ? $from : $to;
|
||||
event_fires_later_this_year: {
|
||||
if ($year < $current_year) { last; }
|
||||
if ($year == $current_year && $month < $current_month) { last; }
|
||||
my $day = on_to_day_of_month($on, $year, $month);
|
||||
if ($year == $current_year && $month == $current_month && $day < $current_day) { last; }
|
||||
# If we get here, this year's rule instance is not active until
|
||||
# later in the year, so subtract a year.
|
||||
$year -= 1;
|
||||
}
|
||||
|
||||
my $day = on_to_day_of_month($on, $year, $month);
|
||||
return timelocal(0, 0, 0, $day, $month, $year);
|
||||
}
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue