timeseries/top_n

43 lines
728 B
Plaintext
Raw Normal View History

#!/usr/bin/perl -w
use strict;
my $n = shift;
my $f = shift;
my $v = shift;
my @data;
my %s;
while (<>) {
chomp;
my @a = split(/\t/);
push @data, [@a];
$s{$a[$f]} += $a[$v];
}
my @top_n = (sort { $s{$b} <=> $s{$a} } keys %s )[ 0 .. $n - 1 ];
my %top_n = map { $_ => 1 } @top_n;
my %index;
for my $i (0 .. $#data) {
unless ($top_n{$data[$i][$f]}) {
my $val = $data[$i][$v];
$data[$i][$v] = 0;
$data[$i][$f] = 'OTHER';
my $k = join("\t", @{$data[$i]});
if (defined($index{$k})) {
$data[$index{$k}][$v] += $val;
$data[$i][$v] = undef;
} else {
$data[$i][$v] = $val;
$index{$k} = $i;
}
}
}
for (@data) {
if (defined($_->[$v])) {
print join("\t", @$_), "\n";
}
}