#!/usr/bin/perl

# This script removes all data associated with the given sessions.
# For each session it first removes all instances of that session and
# then cleans up any orphans.

use warnings;
use strict;

use Simba::CA;
use Bit::Vector::Judy;
use Getopt::Long;

$| = 1;

my $ca = Simba::CA->new({
			    dbi_file => $ENV{SIMBA_DB_CONN} || "$ENV{HOME}/.dbi/simba",
			});

my $dbh = $ca->{dbh};

my %opt;
GetOptions(
    \%opt,
    "age=s");

if ($opt{age}) {
    my ($num, $unit) = $opt{age} =~ /(\d(?:.\d+)?)(y|m|w|d)/;
    my $scale = { y => 365 * 86400,
                  m =>  30 * 86400,
                  w =>   7 * 86400,
                  d =>   1 * 86400 }->{$unit};
    die "unknown time unit $unit" unless $scale;
    my $expired_sessions
        = $dbh->selectcol_arrayref("select id from sessions where start_date < ? order by id",
                                   {},
                                   time() - $num * $scale);
    push @ARGV, @$expired_sessions;
}
for my $session (@ARGV) {
    print "deleting instances of session $session\n";
    my $n_instances = $dbh->do("delete from instances where session=?", {}, $session);
    print "\t$n_instances instances deleted\n";
    $dbh->commit();
}
remove_orphaned_sessions();
remove_orphaned_files();
remove_orphaned_versions();
$dbh->disconnect();
exit();

sub remove_orphaned_sessions {
    print "deleting orphaned sessions\n";
    my $sessions
	= $dbh->selectcol_arrayref(
	    q{select s.id from instances i right outer join sessions s  on i.session=s.id where i.id is null}
	  );

    for my $session (@$sessions) {
	$dbh->do(q{delete from sessions where id=?}, {}, $session);
	print "\tsession $session deleted\n";
    }
    $dbh->commit();
}

sub remove_orphaned_files {
    print "deleting orphaned files\n";
    my $files
	= $dbh->selectcol_arrayref(
	    q{select f.id from instances i right outer join files f  on i.file=f.id where i.id is null}
	  );

    for my $file (@$files) {
	$dbh->do(q{delete from files where id=?}, {}, $file);
	print "\tfile $file deleted\n";
    }
    $dbh->commit();
}

sub remove_orphaned_versions {
    # This differs from the other two because mysql doesn't find a good plan for
    # the outer join: It does an index lookup on instances for every row of
    # versions2. For the other tables that's good because sessions and files are
    # much smaller than instances, but there is only about a factor of 10
    # between versions2 and instances, so reading both sequentally is much
    # better. Surprisingly, perl is also faster at eliminating duplicates than 
    # mysql, so just doing two selects and doing all the work in perl is faster
    # than “select distinct … minus …” though not much.
    print "deleting orphaned versions\n";
    my $sth;

    $dbh->{'mysql_use_result'} = 1;
    my $versions = Bit::Vector::Judy->new;
    $sth = $dbh->prepare("select id from versions2");
    $sth->execute;
    my $i = 0;
    while (my $version = $sth->fetchrow_array) {
	if ($i % 1_000_000 == 0) {
	    print "\t$i records from versions processed, ", $versions->count(0, -1), " versions found\n";
	}
	$versions->set($version);
	$i++;
    }

    $sth = $dbh->prepare("select version from instances");
    $sth->execute;
    $i = 0;
    while (my $version = $sth->fetchrow_array) {
	if ($i % 1_000_000 == 0) {
	    print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n";
	}
	$versions->unset($version);
	$i++;
    }
    $dbh->{'mysql_use_result'} = 0;
    print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n";

    for (my $version = $versions->first(0); $version; $version = $versions->next($version)) {
	$dbh->do(q{delete from versions2 where id=?}, {}, $version);
	print "\tversion $version deleted\n";
    }
    $dbh->commit();
}

# vim: tw=132