timeseries/TimeSeries.pm

561 lines
14 KiB
Perl
Raw Normal View History

#!/usr/bin/perl
2003-03-03 12:54:09 +01:00
package TimeSeries;
=head1 NAME
TimeSeries - create plots of time series
=head1 SYNOPSIS
my $ts = TimeSeries->new(style=>lines, output_format => ps);
$ts->legend('Bugs reported', 'Bugs fixed');
$ts->add(1108394622, 42, 23);
$ts->add_timestring('2005-02-01', 33, 39);
print PSFILE $ts->plot;
=head1 DESCRIPTION
This module uses Gnuplot to create plots of multiple timeseries.
Actually it should do all kinds of useful operations on timeseries,
but right now only adding data and plotting the whole thing is
implemented.
=cut
use warnings;
use strict;
2003-03-03 12:54:09 +01:00
use File::Temp qw(tempfile);
use Time::Local;
use Data::Dumper;
use HTTP::Date qw(parse_date);
use Time::Local qw(timegm_nocheck);
use POSIX qw(strftime);
2003-03-03 12:54:09 +01:00
2006-12-18 13:32:10 +01:00
our $VERSION = do { my @r=(q$Revision: 1.19 $=~/\d+/g);sprintf "%d."."%02d"x$#r,@r};
our $debug;
=head2 new(%opts)
Creates a new timeseries object. Possible options are:
=over
=item style
The style for the data. This must be one of the styles supported by
Gnuplot for two-column 2D data, e.g, "bargraph", "boxes", "dots",
"fsteps", "impulses", "lines", "linespoints", "points", "steps".
The default is "lines".
=item output_format
The output file format. Possible values are "ps" (Postscript), "png",
"gif" and "jpeg". In the last three cases, a postscript file is created
first, then printed to a 150dpi ppm file, rotated, scaled down (with
antialiasing) to 75 dpi and finally converted to the requested file
format. This usually results in prettier output than letting gnuplot
create the file directly.
The default is "png".
=back
=cut
2003-03-03 12:54:09 +01:00
sub new {
my ($class, %opts) = @_;
my $self = {};
bless ($self, $class);
$self->{data} = [];
$self->{style} = $opts{style} || "lines";
$self->{output_format} = $opts{output_format} || "png";
$self->{gsresolution} = $opts{gsresolution} || 150;
$self->finalresolution($opts{finalresolution} || 75);
2003-03-03 12:54:09 +01:00
return $self;
}
=item add($timestamp, @data)
Adds data (one entry for each timeseries) for time $timestamp.
$timestamp is in seconds since the epoch.
=cut
2003-03-03 12:54:09 +01:00
sub add {
my ($self, $timestamp, @data) = @_;
push(@{$self->{data}}, [ $timestamp, [ @data ] ]);
#print Dumper($self);
}
=item add_timestring($timestring, @data)
Adds data (one entry for each timeseries) for time $timestring.
$timestring can be any string parseable by the parse_date function of
the HTTP::Date module.
=cut
sub add_timestring {
my ($self, $timestring, @data) = @_;
my ($year, $mon, $day, $hour, $min, $sec, $zone)
= parse_date($timestring);
# print STDERR "date = ($year, $mon, $day, $hour, $min, $sec, $zone)\n";
my $timestamp;
if (defined ($zone)) {
# adjust for timezone
my ($zs, $zh, $zm) = $zone =~ /([+-])(\d\d)(\d\d)/;
$min -= ($zs eq '-' ? -1 : +1) * ($zh * 60 + $zm);
$timestamp = timegm_nocheck($sec, $min, $hour, $day, $mon-1, $year);
} else {
$timestamp = timelocal($sec, $min, $hour, $day, $mon-1, $year);
}
# print STDERR "\$timestamp = $timestamp\n";
$self->add($timestamp, @data);
}
2003-03-03 12:54:09 +01:00
=head2 legend(@legend)
Set the legend for the timeseries (One string per series).
=cut
2003-03-03 12:54:09 +01:00
sub legend {
my ($self, @legend) = @_;
my $oldlegend = $self->{legend};
$self->{legend} = [@legend] if (@legend);
return $oldlegend ? @$oldlegend : ();
2003-03-03 12:54:09 +01:00
}
2005-11-27 12:16:55 +01:00
=head2 legend_position($position)
Set the position of the legend. Currently, this is simply a string passed
to gnuplot's "set key" command. Valid positions are left, right, top, bottom,
outside, below and everything else gnuplot understands.
=cut
sub legend_position {
my ($self, $legend_position) = @_;
my $oldlegend_position = $self->{legend_position};
$self->{legend_position} = $legend_position if ($legend_position);
return $oldlegend_position;
}
=head2 style([$style])
Sets a new style if $style is given. In any case the old style is
returned.
See new() for details about styles.
=cut
2003-03-03 12:54:09 +01:00
sub style {
my ($self, $style) = @_;
my $oldstyle = $self->{style};
$self->{style} = $style if ($style);
return $oldstyle;
}
=head2 log_x([$log])
if $log is non-zero, the x axis is scaled logarithmically,
if it is 0, the x axis is scaled linearly.
The return value is the old value of this setting.
=cut
sub log_x {
my ($self, $log_x) = @_;
my $oldlog_x = $self->{log_x};
$self->{log_x} = $log_x if (defined($log_x));
return $oldlog_x;
}
=head2 log_y([$log])
if $log is non-zero, the y axis is scaled logarithmically,
if it is 0, the y axis is scaled linearly.
The return value is the old value of this setting.
=cut
sub log_y {
my ($self, $log_y) = @_;
my $oldlog_y = $self->{log_y};
$self->{log_y} = $log_y if (defined($log_y));
return $oldlog_y;
}
=head2 stacked([$stacked])
If $stacked is non-zero, the timeseries are stacked.
The return value is the old value of this setting.
=cut
sub stacked {
my ($self, $stacked) = @_;
my $oldstacked = $self->{stacked};
$self->{stacked} = $stacked if (defined($stacked));
return $oldstacked;
}
=head2 output_format([$output_format])
Sets a new output format if $output_format is given. In any case the old
output format is returned.
See new() for details about output formats.
=cut
sub output_format {
my ($self, $output_format) = @_;
my $oldoutput_format = $self->{output_format};
$self->{output_format} = $output_format if ($output_format);
return $oldoutput_format;
}
=head2 gsresolution([$gsresolution])
Sets the resolution of the ghostscript output when plotting to a pixmap
format. The previous resolution is returned.
See new() for details about output formats.
=cut
sub gsresolution {
my ($self, $gsresolution) = @_;
my $oldgsresolution = $self->{gsresolution};
$self->{gsresolution} = $gsresolution if ($gsresolution);
return $oldgsresolution;
}
=head2 finalresolution([$finalresolution])
Sets the resolution when plotting to a pixmap
format. The previous resolution is returned.
If this resolution is higher than the gsresolution, gsresolution is set
the same value. If it is higher than half of the gsresolution then the
gsresolution is set to twice the finalresolution.
See new() for details about output formats.
=cut
sub finalresolution {
my ($self, $finalresolution) = @_;
my $oldfinalresolution = $self->{finalresolution};
if (defined($finalresolution)) {
$self->{finalresolution} = $finalresolution if ($finalresolution);
if ($finalresolution >= $self->{gsresolution}) {
$self->{gsresolution} = $finalresolution;
} elsif ($finalresolution >= $self->{gsresolution} / 2) {
$self->{gsresolution} = $finalresolution * 2;
}
}
return $oldfinalresolution;
}
2003-03-03 12:54:09 +01:00
2003-08-15 23:08:33 +02:00
=head2 dstcorr $time [, $period]
corrects for time shifts caused by DST switches by aligning the
time to the given period in local time.
Example:
1048989600 is 2003-03-30 00:00:00 CET. 4 hours (14400
seconds) later, the time is 2003-03-30 05:00:00 CEST. To get back to a
4 hour period starting at midnight, 1 hour needs to be subtracted, so
C<dstcorr(1048993200, 14400)> returns 1048989600, which is 2003-03-30
04:00:00 CEST.
This is an internal function which normally doesn't need to be called by
the user.
2003-08-15 23:08:33 +02:00
=cut
2003-03-03 12:54:09 +01:00
sub dstcorr {
2003-08-15 23:08:33 +02:00
my ($time, $period) = @_;
$period = 24 * 3600 unless ($period);
2003-03-03 12:54:09 +01:00
my ($sec,$min,$hour,$mday,$mon,$year) = localtime($time);
2003-08-15 23:08:33 +02:00
my $toff = ($hour * 3600 + $min * 60 * $sec) % $period;
2003-03-03 12:54:09 +01:00
if ($toff != 0) {
2003-08-15 23:08:33 +02:00
if ($toff > $period/2) {
$toff -= $period;
2003-03-03 12:54:09 +01:00
}
2006-12-18 13:32:10 +01:00
print STDERR "correcting time by $toff seconds " if ($debug);
printf STDERR "from %04d-%02d-%02d %02d:%02d:%02d ", $year+1900, $mon+1, $mday, $hour, $min, $sec if ($debug);
2003-03-03 12:54:09 +01:00
$time -= $toff;
($sec,$min,$hour,$mday,$mon,$year) = localtime($time);
2006-12-18 13:32:10 +01:00
printf STDERR "to %04d-%02d-%02d %02d:%02d:%02d\n", $year+1900, $mon+1, $mday, $hour, $min, $sec if ($debug);
2003-03-03 12:54:09 +01:00
}
return $time;
}
=head2 plot
Returns a string with the plot of the timeseries.
=cut
2003-03-03 12:54:09 +01:00
sub plot {
my ($self) = @_;
#print Dumper($self);
2006-05-21 17:08:33 +02:00
my ($datafh, $datafn) = tempfile("tsplotXXXXXXXX", UNLINK => 1);
2003-03-03 12:54:09 +01:00
for my $i (@{$self->{data}}) {
my $time = $i->[0];
my $data = $i->[1];
print $datafh $time;
if ($self->{stacked}) {
my $v = 0;
for my $j (@$data) {
$v += ($j || 0);
print $datafh "\t", $v;
}
} else {
for my $j (@$data) {
2006-09-08 12:26:36 +02:00
print $datafh "\t", (defined $j ? $j : '?');
}
2003-03-03 12:54:09 +01:00
}
print $datafh "\n";
}
close($datafh);
2006-05-21 17:08:33 +02:00
my ($ctlfh, $ctlfn) = tempfile(UNLINK => 1);
my ($psfh, $psfn) = tempfile(UNLINK => 1);
2003-03-03 12:54:09 +01:00
# generic settings
print $ctlfh "set term postscript color\n";
print $ctlfh "set output '$psfn'\n";
print $ctlfh "set data style $self->{style}\n";
print $ctlfh "set grid\n";
print $ctlfh "set log x\n" if ($self->{log_x});
print $ctlfh "set log y\n" if ($self->{log_y});
2005-11-27 12:16:55 +01:00
print $ctlfh "set key $self->{legend_position}\n" if ($self->{legend_position});
2006-09-08 12:26:36 +02:00
print $ctlfh "set datafile missing '?'\n";
2003-03-03 12:54:09 +01:00
my $firsttime = $self->{data}[0][0];
my $lasttime = $self->{data}[$#{$self->{data}}][0];
my @tics = get_ticks($firsttime, $lasttime);
print $ctlfh "set xtics rotate (",
join(", ", map sprintf(qq|"%s" %d|, $_->[1], $_->[0]), @tics),
")\n";
# what to plot
print $ctlfh "plot ";
my $comma = 0;
my $col = 2;
for my $i (@{$self->{legend}}) {
if ($comma) {
print $ctlfh ", ";
} else {
$comma = 1;
}
print $ctlfh "'$datafn' using 1:(\$", $col++, ") title '$i'";
}
print $ctlfh "\n";
close ($ctlfh);
my $rc = system("gnuplot", $ctlfn);
#print STDERR "system returned $rc\n";
my $pipe;
if ($self->{output_format} eq "ps") {
$pipe = "< $psfn";
} else {
$pipe = "gs -sDEVICE=ppmraw -r" . $self->{gsresolution} . " -dBATCH -sOutputFile=- -q - < $psfn |";
if ($self->{gsresolution} != $self->{finalresolution}) {
$pipe .= "pnmscale " . ($self->{finalresolution} / $self->{gsresolution}) . " |";
}
$pipe .=
"pnmflip -cw |" .
"pnmcrop 2> /dev/null |";
}
if ($self->{output_format} eq "png") {
2006-12-18 13:32:10 +01:00
$pipe .= "pnmtopng 2>/dev/null |";
}
if ($self->{output_format} eq "gif") {
# the ppm tools are noisy. Shut them up.
$pipe .= "ppmquant 256 2> /dev/null |" .
"ppmtogif 2> /dev/null |";
}
if ($self->{output_format} eq "jpeg") {
$pipe .= "cjpeg -sample 1x1,1x1,1x1 |";
}
open(PNG, $pipe);
my $graph;
{ local $/ = undef; $graph = <PNG>; }
close(PNG);
return $graph;
}
=head2 get_ticks($firsttime, $lasttime)
Compute a "reasonable" set of ticks
covering the interval between $firsttime and $lasttime.
The spacing of the ticks a bit tricky: They should be related to
common time units (1 hour, 1 day, 1 week, ...), which are
irregular and not even of constant length (a day can be 23, 24 or
25 hours, a month 28 to 31 days, a year 365 or 366 days). Also the
spacing shouldn't be too tight or too sparse.
The function returns an ordered list of [ $timestamp, $label ] pairs.
$firsttime falls into the interval between the first and second timestamp.
=cut
sub get_ticks {
my ($firsttime, $lasttime) = @_;
my @ticks = ();
my $label;
my $nexttime;
if ($lasttime - $firsttime > 3 * 365 * 24 * 3600) {
# more than 3 years: 4 ticks/year
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = $hour = 0;
$mday = 1;
$mon = int($mon/3) * 3;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
$label = '%Y-%m-%d';
$nexttime = sub { return add_months($_[0], 3) };
} elsif ($lasttime - $firsttime > 3 * 30 * 24 * 3600) {
2003-08-15 23:08:33 +02:00
# 3 to 36 months: 1 tick/month
2003-03-03 12:54:09 +01:00
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = $hour = 0;
$mday = 1;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
$label = '%Y-%m-%d';
$nexttime = sub { return add_months($_[0], 1) };
2003-03-03 12:54:09 +01:00
} elsif ($lasttime - $firsttime > 30 * 24 * 3600) {
2003-08-15 23:08:33 +02:00
# 30 ... 90 days: 1 tick/week.
2003-03-03 12:54:09 +01:00
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$firsttime -= 86400 * $wday;
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = $hour = 0;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
2003-03-03 12:54:09 +01:00
$label = '%Y-%m-%d';
$nexttime = sub { return dstcorr($_[0] + 7 * 24 * 3600) };
} elsif ($lasttime - $firsttime > 8 * 24 * 3600) {
# 8 .. 30 days: 1 tick per day.
2003-08-15 23:08:33 +02:00
2003-03-03 12:54:09 +01:00
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = $hour = 0;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
2003-03-03 12:54:09 +01:00
$label = '%Y-%m-%d';
$nexttime = sub { return dstcorr($_[0] + 24 * 3600) };
2003-08-15 23:08:33 +02:00
} elsif ($lasttime - $firsttime > 2 * 24 * 3600) {
# 2 .. 8 days: 1 tick/4 hours
2003-08-15 23:08:33 +02:00
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = 0;
$hour = int($hour / 4) * 4;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
2003-08-15 23:08:33 +02:00
$label = '%Y-%m-%d %H:%M';
$nexttime = sub { return dstcorr($_[0] + 4 * 3600, 4 * 3600) };
} elsif ($lasttime - $firsttime > 6 * 3600) {
# 6 hours to 2 days: 1 tick per hour.
2003-08-15 23:08:33 +02:00
2003-03-03 12:54:09 +01:00
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = $min = 0;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
2003-03-03 12:54:09 +01:00
$label = '%Y-%m-%d %H:%M';
$nexttime = sub { return $_[0] + 3600 };
2006-10-25 10:51:24 +02:00
} elsif ($lasttime - $firsttime > 1 * 3600) {
# 1 to 6 hours: 1 tick per 15 minutes.
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = 0;
$min = int($min / 15) * 15;
$firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
$label = '%Y-%m-%d %H:%M';
$nexttime = sub { return $_[0] + 15 * 60 };
} else {
2006-10-25 10:51:24 +02:00
# less than 1 hour: 1 tick per minute.
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime);
$sec = 0;
my $time = $firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year);
$label = '%Y-%m-%d %H:%M';
$nexttime = sub { return $_[0] + 60 };
}
my $time = $firsttime;
for (;;) {
push @ticks, [$time, strftime($label, localtime($time))];
if ($time > $lasttime) {last}
$time = $nexttime->($time);
2003-03-03 12:54:09 +01:00
}
return @ticks;
2003-03-03 12:54:09 +01:00
}
sub add_months {
my ($time, $d_mon) = @_;
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)
= localtime($time);
$mon += $d_mon;
if ($mon >= 12) {
$mon -= 12; $year++;
}
$time = timelocal($sec,$min,$hour,$mday,$mon,$year);
};
2003-03-03 12:54:09 +01:00
1;