#!/usr/bin/perl package TimeSeries; =head1 NAME TimeSeries - create plots of time series =head1 SYNOPSIS my $ts = TimeSeries->new(style=>lines, output_format => ps); $ts->legend('Bugs reported', 'Bugs fixed'); $ts->add(1108394622, 42, 23); $ts->add_timestring('2005-02-01', 33, 39); print PSFILE $ts->plot; =head1 DESCRIPTION This module uses Gnuplot to create plots of multiple timeseries. Actually it should do all kinds of useful operations on timeseries, but right now only adding data and plotting the whole thing is implemented. =cut use warnings; use strict; use File::Temp qw(tempfile); use Time::Local; use Data::Dumper; use HTTP::Date qw(parse_date); use Time::Local qw(timegm_nocheck); use POSIX qw(floor strftime); our $VERSION = do { my @r=(q$Revision: 1.24 $=~/\d+/g);sprintf "%d."."%02d"x$#r,@r}; our $debug; =head2 new(%opts) Creates a new timeseries object. Possible options are: =over =item style The style for the data. This must be one of the styles supported by Gnuplot for two-column 2D data, e.g, "bargraph", "boxes", "dots", "fsteps", "impulses", "lines", "linespoints", "points", "steps". The default is "lines". =item output_format The output file format. Possible values are "ps" (Postscript), "png", "gif" and "jpeg". In the last three cases, a postscript file is created first, then printed to a 150dpi ppm file, rotated, scaled down (with antialiasing) to 75 dpi and finally converted to the requested file format. This usually results in prettier output than letting gnuplot create the file directly. The default is "png". =back =cut sub new { my ($class, %opts) = @_; my $self = {}; bless ($self, $class); $self->{data} = []; $self->{style} = $opts{style} || "lines"; $self->{output_format} = $opts{output_format} || "png"; $self->{gsresolution} = $opts{gsresolution} || 150; $self->finalresolution($opts{finalresolution} || 75); return $self; } =item add($timestamp, @data) Adds data (one entry for each timeseries) for time $timestamp. $timestamp is in seconds since the epoch. =cut sub add { my ($self, $timestamp, @data) = @_; push(@{$self->{data}}, [ $timestamp, [ @data ] ]); #print Dumper($self); } =item add_timestring($timestring, @data) Adds data (one entry for each timeseries) for time $timestring. $timestring can be any string parseable by the parse_date function of the HTTP::Date module. =cut sub add_timestring { my ($self, $timestring, @data) = @_; my ($year, $mon, $day, $hour, $min, $sec, $zone) = parse_date($timestring); # print STDERR "date = ($year, $mon, $day, $hour, $min, $sec, $zone)\n"; my $timestamp; if (defined ($zone)) { # adjust for timezone my ($zs, $zh, $zm) = $zone =~ /([+-])(\d\d)(\d\d)?/; $zm //= 0; $min -= ($zs eq '-' ? -1 : +1) * ($zh * 60 + $zm); $timestamp = timegm_nocheck($sec, $min, $hour, $day, $mon-1, $year); } else { $timestamp = timelocal($sec, $min, $hour, $day, $mon-1, $year); } # print STDERR "\$timestamp = $timestamp\n"; $self->add($timestamp, @data); } =head2 legend(@legend) Set the legend for the timeseries (One string per series). =cut sub legend { my ($self, @legend) = @_; my $oldlegend = $self->{legend}; $self->{legend} = [@legend] if (@legend); return $oldlegend ? @$oldlegend : (); } =head2 legend_position($position) Set the position of the legend. Currently, this is simply a string passed to gnuplot's "set key" command. Valid positions are left, right, top, bottom, outside, below and everything else gnuplot understands. =cut sub legend_position { my ($self, $legend_position) = @_; my $oldlegend_position = $self->{legend_position}; $self->{legend_position} = $legend_position if ($legend_position); return $oldlegend_position; } =head2 style([$style]) Sets a new style if $style is given. In any case the old style is returned. See new() for details about styles. =cut sub style { my ($self, $style) = @_; my $oldstyle = $self->{style}; $self->{style} = $style if ($style); return $oldstyle; } =head2 log_x([$log]) if $log is non-zero, the x axis is scaled logarithmically, if it is 0, the x axis is scaled linearly. The return value is the old value of this setting. =cut sub log_x { my ($self, $log_x) = @_; my $oldlog_x = $self->{log_x}; $self->{log_x} = $log_x if (defined($log_x)); return $oldlog_x; } =head2 log_y([$log]) if $log is non-zero, the y axis is scaled logarithmically, if it is 0, the y axis is scaled linearly. The return value is the old value of this setting. =cut sub log_y { my ($self, $log_y) = @_; my $oldlog_y = $self->{log_y}; $self->{log_y} = $log_y if (defined($log_y)); return $oldlog_y; } =head2 stacked([$stacked]) If $stacked is non-zero, the timeseries are stacked. The return value is the old value of this setting. =cut sub stacked { my ($self, $stacked) = @_; my $oldstacked = $self->{stacked}; $self->{stacked} = $stacked if (defined($stacked)); return $oldstacked; } =head2 output_format([$output_format]) Sets a new output format if $output_format is given. In any case the old output format is returned. See new() for details about output formats. =cut sub output_format { my ($self, $output_format) = @_; my $oldoutput_format = $self->{output_format}; $self->{output_format} = $output_format if ($output_format); return $oldoutput_format; } =head2 gsresolution([$gsresolution]) Sets the resolution of the ghostscript output when plotting to a pixmap format. The previous resolution is returned. See new() for details about output formats. =cut sub gsresolution { my ($self, $gsresolution) = @_; my $oldgsresolution = $self->{gsresolution}; $self->{gsresolution} = $gsresolution if ($gsresolution); return $oldgsresolution; } =head2 finalresolution([$finalresolution]) Sets the resolution when plotting to a pixmap format. The previous resolution is returned. If this resolution is higher than the gsresolution, gsresolution is set the same value. If it is higher than half of the gsresolution then the gsresolution is set to twice the finalresolution. See new() for details about output formats. =cut sub finalresolution { my ($self, $finalresolution) = @_; my $oldfinalresolution = $self->{finalresolution}; if (defined($finalresolution)) { $self->{finalresolution} = $finalresolution if ($finalresolution); if ($finalresolution >= $self->{gsresolution}) { $self->{gsresolution} = $finalresolution; } elsif ($finalresolution >= $self->{gsresolution} / 2) { $self->{gsresolution} = $finalresolution * 2; } } return $oldfinalresolution; } =head2 yrange([$ymin, $ymax]) Set the y range. If either $ymin or $ymax is undef it is automatically determined. =cut sub yrange { my ($self, $ymin, $ymax) = @_; if (@_ == 3) { $self->{yrange} = [ $ymin, $ymax ]; } return $self->{yrange} ? @{ $self->{yrange} } : (); } =head2 colors(@colors) Get or set the color palette. =cut sub colors { my ($self, @colors) = @_; if (@colors) { $self->{colors} = [@colors]; } return @colors; } =head2 pointtype($pointtype) Get or set the point type. This sets the point type for all the timeseries. There probably should be a way to set the point type for each timeseries, like for the colors. =cut sub pointtype { my ($self, $pointtype) = @_; if (defined $pointtype) { $self->{pointtype} = $pointtype; } return $self->{pointtype}; } =head2 pointsize($pointsize) Get or set the point size. This sets the point size for all the timeseries. I think that's what we usually want. Different point sizes for different timeseries are probably not a good idea. =cut sub pointsize { my ($self, $pointsize) = @_; if (defined $pointsize) { $self->{pointsize} = $pointsize; } return $self->{pointsize}; } =head2 dstcorr $time [, $period] corrects for time shifts caused by DST switches by aligning the time to the given period in local time. Example: 1048989600 is 2003-03-30 00:00:00 CET. 4 hours (14400 seconds) later, the time is 2003-03-30 05:00:00 CEST. To get back to a 4 hour period starting at midnight, 1 hour needs to be subtracted, so C returns 1048989600, which is 2003-03-30 04:00:00 CEST. This is an internal function which normally doesn't need to be called by the user. =cut sub dstcorr { my ($time, $period) = @_; $period = 24 * 3600 unless ($period); my ($sec,$min,$hour,$mday,$mon,$year) = localtime($time); my $toff = ($hour * 3600 + $min * 60 * $sec) % $period; if ($toff != 0) { if ($toff > $period/2) { $toff -= $period; } print STDERR "correcting time by $toff seconds " if ($debug); printf STDERR "from %04d-%02d-%02d %02d:%02d:%02d ", $year+1900, $mon+1, $mday, $hour, $min, $sec if ($debug); $time -= $toff; ($sec,$min,$hour,$mday,$mon,$year) = localtime($time); printf STDERR "to %04d-%02d-%02d %02d:%02d:%02d\n", $year+1900, $mon+1, $mday, $hour, $min, $sec if ($debug); } return $time; } =head2 plot Returns a string with the plot of the timeseries. =cut sub plot { my ($self) = @_; #print Dumper($self); my ($datafh, $datafn) = tempfile("tsplotXXXXXXXX", UNLINK => !$self->{keeptempfiles}); for my $i (@{$self->{data}}) { my $time = $i->[0]; my $data = $i->[1]; print $datafh $time; if ($self->{stacked}) { my $v = 0; my @stackeddata; for (my $j = $#$data; $j >= 0; $j--) { $v += ($data->[$j] || 0); $stackeddata[$j] = $v; } for my $v (@stackeddata) { print $datafh "\t", $v; } } else { for my $j (@$data) { print $datafh "\t", (defined $j ? $j : '?'); } } print $datafh "\n"; } close($datafh); my ($ctlfh, $ctlfn) = tempfile(UNLINK => !$self->{keeptempfiles}); my ($psfh, $psfn) = tempfile(UNLINK => !$self->{keeptempfiles}); # generic settings if ($self->{output_format} eq 'svg') { print $ctlfh "set term $self->{output_format}\n"; } else { print $ctlfh "set term postscript color solid 10\n"; } print $ctlfh "set output '$psfn'\n"; print $ctlfh "set style data $self->{style}\n"; print $ctlfh "set grid\n"; print $ctlfh "set log x\n" if ($self->{log_x}); print $ctlfh "set log y\n" if ($self->{log_y}); print $ctlfh "set key $self->{legend_position}\n" if ($self->{legend_position}); print $ctlfh "set datafile missing '?'\n"; print $ctlfh "set yrange [", $self->{yrange}[0] // "*", ":", $self->{yrange}[1] // "*", "]\n"; my $firsttime = $self->{data}[0][0]; my $lasttime = $self->{data}[$#{$self->{data}}][0]; my @tics = get_ticks($firsttime, $lasttime); # force tick values to look like fp numbers to avoid warnings about # exceeding the int range from gnuplot print $ctlfh "set xtics rotate (", join(", ", map sprintf(qq|"%s" %.16e|, $_->[1], $_->[0]), @tics), ")\n"; # what to plot print $ctlfh "plot "; my $comma = 0; my $col = 2; for my $i (0 .. $#{$self->{legend}}) { if ($comma) { print $ctlfh ", "; } else { $comma = 1; } my $legend = $self->{legend}[$i]; my $escaped_legend = $legend =~ s/_/\\_/gr; if ($self->{style} eq 'filledcurves') { print $ctlfh "'$datafn' using 1:(\$", $col++, "):(0) title '$escaped_legend'"; } else { print $ctlfh "'$datafn' using 1:(\$", $col++, ") title '$escaped_legend'"; } if (my $color = $self->{colors}[$i]) { print $ctlfh " linecolor rgbcolor '$color'"; } if (defined $self->{pointtype}) { print $ctlfh " pointtype $self->{pointtype}"; } if (defined $self->{pointsize}) { print $ctlfh " pointsize $self->{pointsize}"; } } print $ctlfh "\n"; close ($ctlfh); my $rc = system("gnuplot", $ctlfn); #print STDERR "system returned $rc\n"; my $pipe; if ($self->{output_format} eq "ps") { $pipe = "< $psfn"; } elsif ($self->{output_format} eq "svg") { $pipe = "< $psfn"; } else { $pipe = "gs -sDEVICE=ppmraw -r" . $self->{gsresolution} . " -dBATCH -sOutputFile=- -q - < $psfn |"; if ($self->{gsresolution} != $self->{finalresolution}) { $pipe .= "pnmscale " . ($self->{finalresolution} / $self->{gsresolution}) . " |"; } $pipe .= "pnmflip -cw |"; if ($self->{crop}) { $pipe .= "pnmcrop 2> /dev/null |"; } } if ($self->{output_format} eq "png") { $pipe .= "pnmtopng 2>/dev/null |"; } if ($self->{output_format} eq "gif") { # the ppm tools are noisy. Shut them up. $pipe .= "ppmquant 256 2> /dev/null |" . "ppmtogif 2> /dev/null |"; } if ($self->{output_format} eq "jpeg") { $pipe .= "cjpeg -sample 1x1,1x1,1x1 |"; } open(PNG, $pipe); my $graph; { local $/ = undef; $graph = ; } close(PNG); return $graph; } =head2 get_ticks($firsttime, $lasttime) Compute a "reasonable" set of ticks covering the interval between $firsttime and $lasttime. The spacing of the ticks a bit tricky: They should be related to common time units (1 hour, 1 day, 1 week, ...), which are irregular and not even of constant length (a day can be 23, 24 or 25 hours, a month 28 to 31 days, a year 365 or 366 days). Also the spacing shouldn't be too tight or too sparse. The function returns an ordered list of [ $timestamp, $label ] pairs. $firsttime falls into the interval between the first and second timestamp. =cut sub get_ticks { my ($firsttime, $lasttime) = @_; my @ticks = (); my $label; my $nexttime; if ($lasttime - $firsttime > 50 * 365 * 24 * 3600) { # more than 50 years: 1 tick/5 years my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime); $sec = $min = $hour = 0; $mday = 1; $mon = 0; $year = floor(($year + 1900) / 5) * 5; $firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year); $label = '%Y-%m-%d'; $nexttime = sub { return add_years($_[0], 5) }; } elsif ($lasttime - $firsttime > 10 * 365 * 24 * 3600) { # more than 10 years: 1 tick/year my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime); $sec = $min = $hour = 0; $mday = 1; $mon = 0; $firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year); $label = '%Y-%m-%d'; $nexttime = sub { return add_years($_[0], 1) }; } elsif ($lasttime - $firsttime > 3 * 365 * 24 * 3600) { # more than 3 years: 4 ticks/year my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime); $sec = $min = $hour = 0; $mday = 1; $mon = int($mon/3) * 3; $firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year); $label = '%Y-%m-%d'; $nexttime = sub { return add_months($_[0], 3) }; } elsif ($lasttime - $firsttime > 3 * 30 * 24 * 3600) { # 3 to 36 months: 1 tick/month my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime); $sec = $min = $hour = 0; $mday = 1; $firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year); $label = '%Y-%m-%d'; $nexttime = sub { return add_months($_[0], 1) }; } elsif ($lasttime - $firsttime > 30 * 24 * 3600) { # 30 ... 90 days: 1 tick/week. my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime); $firsttime -= 86400 * $wday; ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime); $sec = $min = $hour = 0; $firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year); $label = '%Y-%m-%d'; $nexttime = sub { return dstcorr($_[0] + 7 * 24 * 3600) }; } elsif ($lasttime - $firsttime > 8 * 24 * 3600) { # 8 .. 30 days: 1 tick per day. my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime); $sec = $min = $hour = 0; $firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year); $label = '%Y-%m-%d'; $nexttime = sub { return dstcorr($_[0] + 24 * 3600) }; } elsif ($lasttime - $firsttime > 2 * 24 * 3600) { # 2 .. 8 days: 1 tick/4 hours my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime); $sec = $min = 0; $hour = int($hour / 4) * 4; $firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year); $label = '%Y-%m-%d %H:%M'; $nexttime = sub { return dstcorr($_[0] + 4 * 3600, 4 * 3600) }; } elsif ($lasttime - $firsttime > 6 * 3600) { # 6 hours to 2 days: 1 tick per hour. my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime); $sec = $min = 0; $firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year); $label = '%Y-%m-%d %H:%M'; $nexttime = sub { return $_[0] + 3600 }; } elsif ($lasttime - $firsttime > 1 * 3600) { # 1 to 6 hours: 1 tick per 15 minutes. my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime); $sec = 0; $min = int($min / 15) * 15; $firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year); $label = '%Y-%m-%d %H:%M'; $nexttime = sub { return $_[0] + 15 * 60 }; } else { # less than 1 hour: 1 tick per minute. my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($firsttime); $sec = 0; my $time = $firsttime = timelocal($sec,$min,$hour,$mday,$mon,$year); $label = '%Y-%m-%d %H:%M'; $nexttime = sub { return $_[0] + 60 }; } my $time = $nexttime->($firsttime); my $llb; for (;;) { my $lb = strftime($label, localtime($time)); my $dlb = $lb; if ($llb) { my @lc = split(/(\d+)/, $llb); my @c = split(/(\d+)/, $lb); for my $i (0 .. $#c) { if ($c[$i] eq $lc[$i]) { $c[$i] = " " x length($c[$i]); } else { last; } } $dlb = join("", @c); } push @ticks, [$time, $dlb]; if ($time > $lasttime) {last} $time = $nexttime->($time); $llb = $lb; } return @ticks; } sub add_months { my ($time, $d_mon) = @_; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($time); $year += 1900; # localtime/timelocal mismatch $mon += $d_mon; if ($mon >= 12) { $mon -= 12; $year++; } $time = timelocal($sec,$min,$hour,$mday,$mon,$year); } sub add_years { my ($time, $d_year) = @_; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($time); $year += 1900; # localtime/timelocal mismatch $year += $d_year; $time = timelocal($sec,$min,$hour,$mday,$mon,$year); } 1;