timeseries/TimeSeries.pm

710 lines
18 KiB
Perl

#!/usr/bin/perl
package TimeSeries;
=head1 NAME
TimeSeries - create plots of time series
=head1 SYNOPSIS
my $ts = TimeSeries->new(style=>lines, output_format => ps);
$ts->legend('Bugs reported', 'Bugs fixed');
$ts->add(1108394622, 42, 23);
$ts->add_timestring('2005-02-01', 33, 39);
print PSFILE $ts->plot;
=head1 DESCRIPTION
This module uses Gnuplot to create plots of multiple timeseries.
Actually it should do all kinds of useful operations on timeseries,
but right now only adding data and plotting the whole thing is
implemented.
=cut
use warnings;
use strict;
use File::Temp qw(tempfile);
use Time::Local;
use Data::Dumper;
use HTTP::Date qw(parse_date);
use Time::Local qw(timegm_nocheck);
use POSIX qw(floor strftime);
our $VERSION = do { my @r=(q$Revision: 1.24 $=~/\d+/g);sprintf "%d."."%02d"x$#r,@r};
our $debug;
=head2 new(%opts)
Creates a new timeseries object. Possible options are:
=over
=item style
The style for the data. This must be one of the styles supported by
Gnuplot for two-column 2D data, e.g, "bargraph", "boxes", "dots",
"fsteps", "impulses", "lines", "linespoints", "points", "steps".
The default is "lines".
=item output_format
The output file format. Possible values are "ps" (Postscript), "png",
"gif" and "jpeg". In the last three cases, a postscript file is created
first, then printed to a 150dpi ppm file, rotated, scaled down (with
antialiasing) to 75 dpi and finally converted to the requested file
format. This usually results in prettier output than letting gnuplot
create the file directly.
The default is "png".
=back
=cut
sub new {
my ($class, %opts) = @_;
my $self = {};
bless ($self, $class);
$self->{data} = [];
$self->{style} = $opts{style} || "lines";
$self->{output_format} = $opts{output_format} || "png";
$self->{gsresolution} = $opts{gsresolution} || 150;
$self->finalresolution($opts{finalresolution} || 75);
return $self;
}
=item add($timestamp, @data)
Adds data (one entry for each timeseries) for time $timestamp.
$timestamp is in seconds since the epoch.
=cut
sub add {
my ($self, $timestamp, @data) = @_;
push(@{$self->{data}}, [ $timestamp, [ @data ] ]);
#print Dumper($self);
}
=item add_timestring($timestring, @data)
Adds data (one entry for each timeseries) for time $timestring.
$timestring can be any string parseable by the parse_date function of
the HTTP::Date module.
=cut
sub add_timestring {
my ($self, $timestring, @data) = @_;
my ($year, $mon, $day, $hour, $min, $sec, $zone)
= parse_date($timestring);
# print STDERR "date = ($year, $mon, $day, $hour, $min, $sec, $zone)\n";
my $timestamp;
if (defined ($zone)) {
# adjust for timezone
my ($zs, $zh, $zm) = $zone =~ /([+-])(\d\d)(\d\d)?/;
$zm //= 0;
$min -= ($zs eq '-' ? -1 : +1) * ($zh * 60 + $zm);
$timestamp = timegm_nocheck($sec, $min, $hour, $day, $mon-1, $year);
} else {
$timestamp = timelocal($sec, $min, $hour, $day, $mon-1, $year);
}
# print STDERR "\$timestamp = $timestamp\n";
$self->add($timestamp, @data);
}
=head2 legend(@legend)
Set the legend for the timeseries (One string per series).
=cut
sub legend {
my ($self, @legend) = @_;
my $oldlegend = $self->{legend};
$self->{legend} = [@legend] if (@legend);
return $oldlegend ? @$oldlegend : ();
}
=head2 legend_position($position)
Set the position of the legend. Currently, this is simply a string passed
to gnuplot's "set key" command. Valid positions are left, right, top, bottom,
outside, below and everything else gnuplot understands.
=cut
sub legend_position {
my ($self, $legend_position) = @_;
my $oldlegend_position = $self->{legend_position};
$self->{legend_position} = $legend_position if ($legend_position);
return $oldlegend_position;
}
=head2 style([$style])
Sets a new style if $style is given. In any case the old style is
returned.
See new() for details about styles.
=cut
sub style {
my ($self, $style) = @_;
my $oldstyle = $self->{style};
$self->{style} = $style if ($style);
return $oldstyle;
}
=head2 log_x([$log])
if $log is non-zero, the x axis is scaled logarithmically,
if it is 0, the x axis is scaled linearly.
The return value is the old value of this setting.
=cut
sub log_x {
my ($self, $log_x) = @_;
my $oldlog_x = $self->{log_x};
$self->{log_x} = $log_x if (defined($log_x));
return $oldlog_x;
}
=head2 log_y([$log])
if $log is non-zero, the y axis is scaled logarithmically,
if it is 0, the y axis is scaled linearly.
The return value is the old value of this setting.
=cut
sub log_y {
my ($self, $log_y) = @_;
my $oldlog_y = $self->{log_y};
$self->{log_y} = $log_y if (defined($log_y));
return $oldlog_y;
}
=head2 stacked([$stacked])
If $stacked is non-zero, the timeseries are stacked.
The return value is the old value of this setting.
=cut
sub stacked {
my ($self, $stacked) = @_;
my $oldstacked = $self->{stacked};
$self->{stacked} = $stacked if (defined($stacked));
return $oldstacked;
}
=head2 output_format([$output_format])
Sets a new output format if $output_format is given. In any case the old
output format is returned.
See new() for details about output formats.
=cut
sub output_format {
my ($self, $output_format) = @_;
my $oldoutput_format = $self->{output_format};
$self->{output_format} = $output_format if ($output_format);
return $oldoutput_format;
}
=head2 gsresolution([$gsresolution])
Sets the resolution of the ghostscript output when plotting to a pixmap
format. The previous resolution is returned.
See new() for details about output formats.
=cut
sub gsresolution {
my ($self, $gsresolution) = @_;
my $oldgsresolution = $self->{gsresolution};
$self->{gsresolution} = $gsresolution if ($gsresolution);
return $oldgsresolution;
}
=head2 finalresolution([$finalresolution])
Sets the resolution when plotting to a pixmap
format. The previous resolution is returned.
If this resolution is higher than the gsresolution, gsresolution is set
the same value. If it is higher than half of the gsresolution then the
gsresolution is set to twice the finalresolution.
See new() for details about output formats.
=cut
sub finalresolution {
my ($self, $finalresolution) = @_;
my $oldfinalresolution = $self->{finalresolution};
if (defined($finalresolution)) {
$self->{finalresolution} = $finalresolution if ($finalresolution);
if ($finalresolution >= $self->{gsresolution}) {
$self->{gsresolution} = $finalresolution;
} elsif ($finalresolution >= $self->{gsresolution} / 2) {
$self->{gsresolution} = $finalresolution * 2;
}
}
return $oldfinalresolution;
}
=head2 yrange([$ymin, $ymax])
Set the y range. If either $ymin or $ymax is undef it is automatically
determined.
=cut
sub yrange {
my ($self, $ymin, $ymax) = @_;
if (@_ == 3) {
$self->{yrange} = [ $ymin, $ymax ];
}
return $self->{yrange} ? @{ $self->{yrange} } : ();
}
=head2 colors(@colors)
Get or set the color palette.
=cut
sub colors {
my ($self, @colors) = @_;
if (@colors) {
$self->{colors} = [@colors];
}
return @colors;
}
=head2 pointtype($pointtype)
Get or set the point type. This sets the point type for all the
timeseries. There probably should be a way to set the point type for
each timeseries, like for the colors.
=cut
sub pointtype {
my ($self, $pointtype) = @_;
if (defined $pointtype) {
$self->{pointtype} = $pointtype;
}
return $self->{pointtype};
}
=head2 pointsize($pointsize)
Get or set the point size. This sets the point size for all the
timeseries. I think that's what we usually want. Different point sizes
for different timeseries are probably not a good idea.
=cut
sub pointsize {
my ($self, $pointsize) = @_;
if (defined $pointsize) {
$self->{pointsize} = $pointsize;
}
return $self->{pointsize};
}
=head2 dstcorr $time [, $period]
corrects for time shifts caused by DST switches by aligning the
time to the given period in local time.
Example:
1048989600 is 2003-03-30 00:00:00 CET. 4 hours (14400
seconds) later, the time is 2003-03-30 05:00:00 CEST. To get back to a
4 hour period starting at midnight, 1 hour needs to be subtracted, so
C<dstcorr(1048993200, 14400)> returns 1048989600, which is 2003-03-30
04:00:00 CEST.
This is an internal function which normally doesn't need to be called by
the user.
=cut
sub dstcorr {
my ($time, $period) = @_;
$period = 24 * 3600 unless ($period);
my ($sec,$min,$hour,$mday,$mon,$year) = localtime($time);
my $toff = ($hour * 3600 + $min * 60 * $sec) % $period;
if ($toff != 0) {
if ($toff > $period/2) {
$toff -= $period;
}
print STDERR "correcting time by $toff seconds " if ($debug);
printf STDERR "from %04d-%02d-%02d %02d:%02d:%02d ", $year+1900, $mon+1, $mday, $hour, $min, $sec if ($debug);
$time -= $toff;
($sec,$min,$hour,$mday,$mon,$year) = localtime($time);
printf STDERR "to %04d-%02d-%02d %02d:%02d:%02d\n", $year+1900, $mon+1, $mday, $hour, $min, $sec if ($debug);
}
return $time;
}
=head2 plot
Returns a string with the plot of the timeseries.
=cut
sub plot {
my ($self) = @_;
#print Dumper($self);
my ($datafh, $datafn) = tempfile("tsplotXXXXXXXX", UNLINK => !$self->{keeptempfiles});
for my $i (@{$self->{data}}) {
my $time = $i->[0];
my $data = $i->[1];
print $datafh $time;
if ($self->{stacked}) {
my $v = 0;
my @stackeddata;
for (my $j = $#$data; $j >= 0; $j--) {
$v += ($data->[$j] || 0);
$stackeddata[$j] = $v;
}
for my $v (@stackeddata) {
print $datafh "\t", $v;
}
} else {
for my $j (@$data) {
print $datafh "\t", (defined $j ? $j : '?');
}
}
print $datafh "\n";
}
close($datafh);
my ($ctlfh, $ctlfn) = tempfile(UNLINK => !$self->{keeptempfiles});
my ($psfh, $psfn) = tempfile(UNLINK => !$self->{keeptempfiles});
# generic settings
print $ctlfh "set encoding utf8\n";
if ($self->{output_format} eq 'svg') {
print $ctlfh "set term $self->{output_format}\n";
} else {
print $ctlfh "set term postscript color solid 10\n";
}
print $ctlfh "set output '$psfn'\n";
print $ctlfh "set style data $self->{style}\n";
print $ctlfh "set grid\n";
print $ctlfh "set log x\n" if ($self->{log_x});
print $ctlfh "set log y\n" if ($self->{log_y});
print $ctlfh "set key $self->{legend_position}\n" if ($self->{legend_position});
print $ctlfh "set datafile missing '?'\n";
print $ctlfh "set yrange [", $self->{yrange}[0] // "*", ":", $self->{yrange}[1] // "*", "]\n";
my $firsttime = $self->{data}[0][0];
my $lasttime = $self->{data}[$#{$self->{data}}][0];
my @tics = get_ticks($firsttime, $lasttime);
# force tick values to look like fp numbers to avoid warnings about
# exceeding the int range from gnuplot
print $ctlfh "set xtics rotate (",
join(", ", map sprintf(qq|"%s" %.16e|, $_->[1], $_->[0]), @tics),
")\n";
# what to plot
print $ctlfh "plot ";
my $comma = 0;
my $col = 2;
for my $i (0 .. $#{$self->{legend}}) {
if ($comma) {
print $ctlfh ", ";
} else {
$comma = 1;
}
my $legend = $self->{legend}[$i];
my $escaped_legend = $legend =~ s/_/\\_/gr;
if ($self->{style} eq 'filledcurves') {
print $ctlfh "'$datafn' using 1:(\$", $col++, "):(0) title '$escaped_legend'";
} else {
print $ctlfh "'$datafn' using 1:(\$", $col++, ") title '$escaped_legend'";
}
if (my $color = $self->{colors}[$i]) {
print $ctlfh " linecolor rgbcolor '$color'";
}
if (defined $self->{pointtype}) {
print $ctlfh " pointtype $self->{pointtype}";
}
if (defined $self->{pointsize}) {
print $ctlfh " pointsize $self->{pointsize}";
}
}
print $ctlfh "\n";
close ($ctlfh);
my $rc = system("gnuplot", $ctlfn);
#print STDERR "system returned $rc\n";
my $pipe;
if ($self->{output_format} eq "ps") {
$pipe = "< $psfn";
} elsif ($self->{output_format} eq "svg") {
$pipe = "< $psfn";
} else {
$pipe = "gs -sDEVICE=ppmraw -r" . $self->{gsresolution} . " -dBATCH -sOutputFile=- -q - < $psfn |";
if ($self->{gsresolution} != $self->{finalresolution}) {
$pipe .= "pnmscale " . ($self->{finalresolution} / $self->{gsresolution}) . " |";
}
$pipe .= "pnmflip -cw |";
if ($self->{crop}) {
$pipe .= "pnmcrop 2> /dev/null |";
}
}
if ($self->{output_format} eq "png") {
$pipe .= "pnmtopng 2>/dev/null |";
}
if ($self->{output_format} eq "gif") {
# the ppm tools are noisy. Shut them up.
$pipe .= "ppmquant 256 2> /dev/null |" .
"ppmtogif 2> /dev/null |";
}
if ($self->{output_format} eq "jpeg") {
$pipe .= "cjpeg -sample 1x1,1x1,1x1 |";
}
open(PNG, $pipe);
my $graph;
{ local $/ = undef; $graph = <PNG>; }
close(PNG);
return $graph;
}
=head2 get_ticks($firsttime, $lasttime)
Compute a "reasonable" set of ticks
covering the interval between $firsttime and $lasttime.
The spacing of the ticks a bit tricky: They should be related to
common time units (1 hour, 1 day, 1 week, ...), which are
irregular and not even of constant length (a day can be 23, 24 or
25 hours, a month 28 to 31 days, a year 365 or 366 days). Also the
spacing shouldn't be too tight or too sparse.
The function returns an ordered list of [ $timestamp, $label ] pairs.
$firsttime falls into the interval between the first and second timestamp.
=cut
sub get_ticks {
my ($firsttime, $lasttime) = @_;
my @ticks = ();
my $label;
my $nexttime;
if ($lasttime - $firsttime > 50 * 365 * 24 * 3600) {
# more than 50 years: 1 tick/5 years
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = $hour = 0;
$mday = 1;
$mon = 0;
$year = floor(($year + 1900) / 5) * 5;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
$label = '%Y-%m-%d';
$nexttime = sub { return add_years($_[0], 5) };
} elsif ($lasttime - $firsttime > 10 * 365 * 24 * 3600) {
# more than 10 years: 1 tick/year
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = $hour = 0;
$mday = 1;
$mon = 0;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
$label = '%Y-%m-%d';
$nexttime = sub { return add_years($_[0], 1) };
} elsif ($lasttime - $firsttime > 3 * 365 * 24 * 3600) {
# more than 3 years: 4 ticks/year
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = $hour = 0;
$mday = 1;
$mon = int($mon/3) * 3;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
$label = '%Y-%m-%d';
$nexttime = sub { return add_months($_[0], 3) };
} elsif ($lasttime - $firsttime > 3 * 30 * 24 * 3600) {
# 3 to 36 months: 1 tick/month
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = $hour = 0;
$mday = 1;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
$label = '%Y-%m-%d';
$nexttime = sub { return add_months($_[0], 1) };
} elsif ($lasttime - $firsttime > 30 * 24 * 3600) {
# 30 ... 90 days: 1 tick/week.
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$firsttime -= 86400 * (($wday + 6) % 7);
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = $hour = 0;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
$label = '%Y-%m-%d';
$nexttime = sub { return dstcorr($_[0] + 7 * 24 * 3600) };
} elsif ($lasttime - $firsttime > 8 * 24 * 3600) {
# 8 .. 30 days: 1 tick per day.
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = $hour = 0;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
$label = '%Y-%m-%d';
$nexttime = sub { return dstcorr($_[0] + 24 * 3600) };
} elsif ($lasttime - $firsttime > 2 * 24 * 3600) {
# 2 .. 8 days: 1 tick/4 hours
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = 0;
$hour = int($hour / 4) * 4;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
$label = '%Y-%m-%d %H:%M';
$nexttime = sub { return dstcorr($_[0] + 4 * 3600, 4 * 3600) };
} elsif ($lasttime - $firsttime > 6 * 3600) {
# 6 hours to 2 days: 1 tick per hour.
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = 0;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
$label = '%Y-%m-%d %H:%M';
$nexttime = sub { return $_[0] + 3600 };
} elsif ($lasttime - $firsttime > 1 * 3600) {
# 1 to 6 hours: 1 tick per 15 minutes.
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = 0;
$min = int($min / 15) * 15;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
$label = '%Y-%m-%d %H:%M';
$nexttime = sub { return $_[0] + 15 * 60 };
} else {
# less than 1 hour: 1 tick per minute.
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = 0;
my $time = $firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
$label = '%Y-%m-%d %H:%M';
$nexttime = sub { return $_[0] + 60 };
}
my $time = $nexttime->($firsttime);
my $llb;
for (;;) {
my $lb = strftime($label, localtime($time));
my $dlb = $lb;
if ($llb) {
my @lc = split(/(\d+)/, $llb);
my @c = split(/(\d+)/, $lb);
for my $i (0 .. $#c) {
if ($c[$i] eq $lc[$i]) {
$c[$i] = " " x length($c[$i]);
} else {
last;
}
}
$dlb = join("", @c);
}
push @ticks, [$time, $dlb];
if ($time > $lasttime) {last}
$time = $nexttime->($time);
$llb = $lb;
}
return @ticks;
}
sub add_months {
my ($time, $d_mon) = @_;
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)
= localtime($time);
$year += 1900; # localtime/timelocal mismatch
$mon += $d_mon;
if ($mon >= 12) {
$mon -= 12; $year++;
}
$time = timelocal($sec,$min,$hour,$mday,$mon,$year);
}
sub add_years {
my ($time, $d_year) = @_;
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)
= localtime($time);
$year += 1900; # localtime/timelocal mismatch
$year += $d_year;
$time = timelocal($sec,$min,$hour,$mday,$mon,$year);
}
1;