timeseries/TimeSeries.pm

547 lines
14 KiB
Perl

package TimeSeries;
=head1 NAME
TimeSeries - create plots of time series
=head1 SYNOPSIS
my $ts = TimeSeries->new(style=>lines, output_format => ps);
$ts->legend('Bugs reported', 'Bugs fixed');
$ts->add(1108394622, 42, 23);
$ts->add_timestring('2005-02-01', 33, 39);
print PSFILE $ts->plot;
=head1 DESCRIPTION
This module uses Gnuplot to create plots of multiple timeseries.
Actually it should do all kinds of useful operations on timeseries,
but right now only adding data and plotting the whole thing is
implemented.
=cut
use File::Temp qw(tempfile);
use Time::Local;
use Data::Dumper;
use HTTP::Date qw(parse_date);
use Time::Local qw(timegm_nocheck);
$VERSION = do { my @r=(q$Revision: 1.11 $=~/\d+/g);sprintf "%d."."%02d"x$#r,@r};
=head2 new(%opts)
Creates a new timeseries object. Possible options are:
=over
=item style
The style for the data. This must be one of the styles supported by
Gnuplot for two-column 2D data, e.g, "bargraph", "boxes", "dots",
"fsteps", "impulses", "lines", "linespoints", "points", "steps".
The default is "lines".
=item output_format
The output file format. Possible values are "ps" (Postscript), "png",
"gif" and "jpeg". In the last three cases, a postscript file is created
first, then printed to a 150dpi ppm file, rotated, scaled down (with
antialiasing) to 75 dpi and finally converted to the requested file
format. This usually results in prettier output than letting gnuplot
create the file directly.
The default is "png".
=back
=cut
sub new {
my ($class, %opts) = @_;
my $self = {};
bless ($self, $class);
$self->{data} = [];
$self->{style} = $opts{style} || "lines";
$self->{output_format} = $opts{output_format} || "png";
$self->{gsresolution} = $opts{gsresolution} || 150;
$self->finalresolution($opts{finalresolution} || 75);
return $self;
}
=item add($timestamp, @data)
Adds data (one entry for each timeseries) for time $timestamp.
$timestamp is in seconds since the epoch.
=cut
sub add {
my ($self, $timestamp, @data) = @_;
push(@{$self->{data}}, [ $timestamp, [ @data ] ]);
#print Dumper($self);
}
=item add_timestring($timestring, @data)
Adds data (one entry for each timeseries) for time $timestring.
$timestring can be any string parseable by the parse_date function of
the HTTP::Date module.
=cut
sub add_timestring {
my ($self, $timestring, @data) = @_;
my ($year, $mon, $day, $hour, $min, $sec, $zone)
= parse_date($timestring);
# print STDERR "date = ($year, $mon, $day, $hour, $min, $sec, $zone)\n";
my $timestamp;
if (defined ($zone)) {
# adjust for timezone
my ($zs, $zh, $zm) = $zone =~ /([+-])(\d\d)(\d\d)/;
$min -= ($zs eq '-' ? -1 : +1) * ($zh * 60 + $zm);
$timestamp = timegm_nocheck($sec, $min, $hour, $day, $mon-1, $year);
} else {
$timestamp = timelocal($sec, $min, $hour, $day, $mon-1, $year);
}
# print STDERR "\$timestamp = $timestamp\n";
$self->add($timestamp, @data);
}
=head2 legend(@legend)
Set the legend for the timeseries (One string per series).
=cut
sub legend {
my ($self, @legend) = @_;
my $oldlegend = $self->{legend};
$self->{legend} = [@legend] if (@legend);
return $oldlegend ? @$oldlegend : ();
}
=head2 legend_position($position)
Set the position of the legend. Currently, this is simply a string passed
to gnuplot's "set key" command. Valid positions are left, right, top, bottom,
outside, below and everything else gnuplot understands.
=cut
sub legend_position {
my ($self, $legend_position) = @_;
my $oldlegend_position = $self->{legend_position};
$self->{legend_position} = $legend_position if ($legend_position);
return $oldlegend_position;
}
=head2 style([$style])
Sets a new style if $style is given. In any case the old style is
returned.
See new() for details about styles.
=cut
sub style {
my ($self, $style) = @_;
my $oldstyle = $self->{style};
$self->{style} = $style if ($style);
return $oldstyle;
}
=head2 log_x([$log])
if $log is non-zero, the x axis is scaled logarithmically,
if it is 0, the x axis is scaled linearly.
The return value is the old value of this setting.
=cut
sub log_x {
my ($self, $log_x) = @_;
my $oldlog_x = $self->{log_x};
$self->{log_x} = $log_x if (defined($log_x));
return $oldlog_x;
}
=head2 log_y([$log])
if $log is non-zero, the y axis is scaled logarithmically,
if it is 0, the y axis is scaled linearly.
The return value is the old value of this setting.
=cut
sub log_y {
my ($self, $log_y) = @_;
my $oldlog_y = $self->{log_y};
$self->{log_y} = $log_y if (defined($log_y));
return $oldlog_y;
}
=head2 output_format([$output_format])
Sets a new output format if $output_format is given. In any case the old
output format is returned.
See new() for details about output formats.
=cut
sub output_format {
my ($self, $output_format) = @_;
my $oldoutput_format = $self->{output_format};
$self->{output_format} = $output_format if ($output_format);
return $oldoutput_format;
}
=head2 gsresolution([$gsresolution])
Sets the resolution of the ghostscript output when plotting to a pixmap
format. The previous resolution is returned.
See new() for details about output formats.
=cut
sub gsresolution {
my ($self, $gsresolution) = @_;
my $oldgsresolution = $self->{gsresolution};
$self->{gsresolution} = $gsresolution if ($gsresolution);
return $oldgsresolution;
}
=head2 finalresolution([$finalresolution])
Sets the resolution when plotting to a pixmap
format. The previous resolution is returned.
If this resolution is higher than the gsresolution, gsresolution is set
the same value. If it is higher than half of the gsresolution then the
gsresolution is set to twice the finalresolution.
See new() for details about output formats.
=cut
sub finalresolution {
my ($self, $finalresolution) = @_;
my $oldfinalresolution = $self->{finalresolution};
if (defined($finalresolution)) {
$self->{finalresolution} = $finalresolution if ($finalresolution);
if ($finalresolution >= $self->{gsresolution}) {
$self->{gsresolution} = $finalresolution;
} elsif ($finalresolution >= $self->{gsresolution} / 2) {
$self->{gsresolution} = $finalresolution * 2;
}
}
return $oldfinalresolution;
}
=head2 dstcorr $time [, $period]
corrects for time shifts caused by DST switches by aligning the
time to the given period in local time.
Example:
1048989600 is 2003-03-30 00:00:00 CET. 4 hours (14400
seconds) later, the time is 2003-03-30 05:00:00 CEST. To get back to a
4 hour period starting at midnight, 1 hour needs to be subtracted, so
C<dstcorr(1048993200, 14400)> returns 1048989600, which is 2003-03-30
04:00:00 CEST.
This is an internal function which normally doesn't need to be called by
the user.
=cut
sub dstcorr {
my ($time, $period) = @_;
$period = 24 * 3600 unless ($period);
($sec,$min,$hour,$mday,$mon,$year) = localtime($time);
my $toff = ($hour * 3600 + $min * 60 * $sec) % $period;
if ($toff != 0) {
if ($toff > $period/2) {
$toff -= $period;
}
print STDERR "correcting time by $toff seconds ";
printf STDERR "from %04d-%02d-%02d %02d:%02d:%02d ", $year+1900, $mon+1, $mday, $hour, $min, $sec;
$time -= $toff;
($sec,$min,$hour,$mday,$mon,$year) = localtime($time);
printf STDERR "to %04d-%02d-%02d %02d:%02d:%02d\n", $year+1900, $mon+1, $mday, $hour, $min, $sec;
}
return $time;
}
=head2 plot
Returns a string with the plot of the timeseries.
=cut
sub plot {
my ($self) = @_;
#print Dumper($self);
my ($datafh, $datafn) = tempfile();
for my $i (@{$self->{data}}) {
my $time = $i->[0];
my $data = $i->[1];
print $datafh $time;
for my $j (@$data) {
print $datafh "\t", $j + 0;
}
print $datafh "\n";
}
close($datafh);
my ($ctlfh, $ctlfn) = tempfile();
my ($psfh, $psfn) = tempfile();
# generic settings
print $ctlfh "set term postscript color\n";
print $ctlfh "set output '$psfn'\n";
print $ctlfh "set data style $self->{style}\n";
print $ctlfh "set grid\n";
print $ctlfh "set log x\n" if ($self->{log_x});
print $ctlfh "set log y\n" if ($self->{log_y});
print $ctlfh "set key $self->{legend_position}\n" if ($self->{legend_position});
# compute ticks
# The spacing of the ticks a bit tricky: They should be related to
# common time units (1 hour, 1 day, 1 week, ...), which are
# irregular and not even of constant length (a day can be 23, 24 or
# 25 hours, a month 28 to 31 days, a year 365 or 366 days). Also the
# spacing shouldn't be too tight or too sparse. So there's quite a
# bit of special-case code below (but also much code duplication
# which should be cleaned up).
my $firsttime = $self->{data}[0][0];
my $lasttime = $self->{data}[$#{$self->{data}}][0];
if ($lasttime - $firsttime > 3 * 365 * 24 * 3600) {
# more than 3 years: 1 tick/year
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = $hour = 0;
$mday = 1;
$mon = int($mon/3) * 3;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
print $ctlfh "set xtics rotate (";
my $comma = 0;
my $time;
for (;;) {
$time = timelocal($sec,$min,$hour,$mday,$mon,$year);
if ($comma) {
print $ctlfh ", ";
} else {
$comma = 1;
}
printf $ctlfh qq|"%04d-%02d-%02d" %d|, $year+1900, $mon+1, $mday, $time;
$mon += 3;
if ($mon >= 12) {
$mon -= 12; $year++;
}
if ($time > $lasttime) {last}
}
$lasttime = $time;
print $ctlfh ")\n";
} elsif ($lasttime - $firsttime > 3 * 30 * 24 * 3600) {
# 3 to 36 months: 1 tick/month
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = $hour = 0;
$mday = 1;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
print $ctlfh "set xtics rotate (";
my $comma = 0;
my $time;
for (;;) {
$time = timelocal($sec,$min,$hour,$mday,$mon,$year);
if ($comma) {
print $ctlfh ", ";
} else {
$comma = 1;
}
printf $ctlfh qq|"%04d-%02d-%02d" %d|, $year+1900, $mon+1, $mday, $time;
if (++$mon >= 12) {
$mon = 0; $year++;
}
if ($time > $lasttime) {last}
}
$lasttime = $time;
print $ctlfh ")\n";
} elsif ($lasttime - $firsttime > 30 * 24 * 3600) {
# 30 ... 90 days: 1 tick/week.
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$firsttime -= 86400 * $wday;
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = $hour = 0;
my $time = $firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
print $ctlfh "set xtics rotate (";
my $comma = 0;
for (;;) {
($sec,$min,$hour,$mday,$mon,$year) = localtime($time);
if ($comma) {
print $ctlfh ", ";
} else {
$comma = 1;
}
printf $ctlfh qq|"%04d-%02d-%02d" %d|, $year+1900, $mon+1, $mday, $time;
if ($time > $lasttime) {last}
$time += 7 * 24 * 3600;
$time = dstcorr($time);
}
$lasttime = $time;
print $ctlfh ")\n";
} elsif ($lasttime - $firsttime > 8 * 24 * 3600) {
# 8 .. 30 days: 1 tick per day.
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = $hour = 0;
my $time = $firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
print $ctlfh "set xtics rotate (";
my $comma = 0;
for (;;) {
($sec,$min,$hour,$mday,$mon,$year) = localtime($time);
if ($comma) {
print $ctlfh ", ";
} else {
$comma = 1;
}
printf $ctlfh qq|"%04d-%02d-%02d" %d|, $year+1900, $mon+1, $mday, $time;
if ($time > $lasttime) {last}
$time += 24 * 3600;
$time = dstcorr($time);
}
$lasttime = $time;
print $ctlfh ")\n";
} elsif ($lasttime - $firsttime > 2 * 24 * 3600) {
# 2 .. 8 days: 1 tick/4 hours
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = $hour = 0;
my $time = $firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
print $ctlfh "set xtics rotate (";
my $comma = 0;
for (;;) {
($sec,$min,$hour,$mday,$mon,$year) = localtime($time);
if ($comma) {
print $ctlfh ", ";
} else {
$comma = 1;
}
printf $ctlfh qq|"%04d-%02d-%02d %02d:%02d" %d|, $year+1900, $mon+1, $mday, $hour, $min, $time;
if ($time > $lasttime) {last}
$time += 4 * 3600;
$time = dstcorr($time, 4 * 3600);
}
$lasttime = $time;
print $ctlfh ")\n";
} else {
# less than 2 days: 1 tick per hour.
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = 0;
my $time = $firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
print $ctlfh "set xtics rotate (";
my $comma = 0;
for (;;) {
($sec,$min,$hour,$mday,$mon,$year) = localtime($time);
if ($comma) {
print $ctlfh ", ";
} else {
$comma = 1;
}
printf $ctlfh qq|"%04d-%02d-%02d %02d:%02d" %d|, $year+1900, $mon+1, $mday, $hour, $min, $time;
if ($time > $lasttime) {last}
$time += 3600;
}
$lasttime = $time;
print $ctlfh ")\n";
}
# what to plot
print $ctlfh "plot ";
$comma = 0;
$col = 2;
for $i (@{$self->{legend}}) {
if ($comma) {
print $ctlfh ", ";
} else {
$comma = 1;
}
print $ctlfh "'$datafn' using 1:", $col++, " title '$i'";
}
print $ctlfh "\n";
close ($ctlfh);
my $rc = system("gnuplot", $ctlfn);
#print STDERR "system returned $rc\n";
my $pipe;
if ($self->{output_format} eq "ps") {
$pipe = "< $psfn";
} else {
$pipe = "gs -sDEVICE=ppmraw -r" . $self->{gsresolution} . " -dBATCH -sOutputFile=- -q - < $psfn |";
if ($self->{gsresolution} != $self->{finalresolution}) {
$pipe .= "pnmscale " . ($self->{finalresolution} / $self->{gsresolution}) . " |";
}
$pipe .=
"pnmflip -cw |" .
"pnmcrop 2> /dev/null |";
}
if ($self->{output_format} eq "png") {
$pipe .= "pnmtopng |";
}
if ($self->{output_format} eq "gif") {
# the ppm tools are noisy. Shut them up.
$pipe .= "ppmquant 256 2> /dev/null |" .
"ppmtogif 2> /dev/null |";
}
if ($self->{output_format} eq "jpeg") {
$pipe .= "cjpeg -sample 1x1,1x1,1x1 |";
}
open(PNG, $pipe);
my $graph;
{ local $/ = undef; $graph = <PNG>; }
close(PNG);
return $graph;
}
1;