#!/usr/bin/perl # This script removes all data associated with the given sessions. # For each session it first removes all instances of that session and # then cleans up any orphans. use warnings; use strict; use Simba::CA; use Bit::Vector::Judy; use Getopt::Long; $| = 1; my $ca = Simba::CA->new({ dbi_file => $ENV{SIMBA_DB_CONN} || "$ENV{HOME}/.dbi/simba", }); my $dbh = $ca->{dbh}; my %opt; GetOptions( \%opt, "age=s"); if ($opt{age}) { my ($num, $unit) = $opt{age} =~ /(\d(?:.\d+)?)(y|m|w|d)/; my $scale = { y => 365 * 86400, m => 30 * 86400, w => 7 * 86400, d => 1 * 86400 }->{$unit}; die "unknown time unit $unit" unless $scale; my $expired_sessions = $dbh->selectcol_arrayref("select id from sessions where start_date < ? order by id", {}, time() - $num * $scale); push @ARGV, @$expired_sessions; } for my $session (@ARGV) { print "deleting instances of session $session\n"; my $n_instances = $dbh->do("delete from instances where session=?", {}, $session); print "\t$n_instances instances deleted\n"; $dbh->commit(); } remove_orphaned_sessions(); remove_orphaned_files(); remove_orphaned_versions(); $dbh->disconnect(); exit(); sub remove_orphaned_sessions { print "deleting orphaned sessions\n"; my $sessions = $dbh->selectcol_arrayref( q{select s.id from instances i right outer join sessions s on i.session=s.id where i.id is null} ); for my $session (@$sessions) { $dbh->do(q{delete from sessions where id=?}, {}, $session); print "\tsession $session deleted\n"; } $dbh->commit(); } sub remove_orphaned_files { print "deleting orphaned files\n"; my $files = $dbh->selectcol_arrayref( q{select f.id from instances i right outer join files f on i.file=f.id where i.id is null} ); for my $file (@$files) { $dbh->do(q{delete from files where id=?}, {}, $file); print "\tfile $file deleted\n"; } $dbh->commit(); } sub remove_orphaned_versions { # This differs from the other two because mysql doesn't find a good plan for # the outer join: It does an index lookup on instances for every row of # versions2. For the other tables that's good because sessions and files are # much smaller than instances, but there is only about a factor of 10 # between versions2 and instances, so reading both sequentally is much # better. Surprisingly, perl is also faster at eliminating duplicates than # mysql, so just doing two selects and doing all the work in perl is faster # than “select distinct … minus …” though not much. print "deleting orphaned versions\n"; my $sth; $dbh->{'mysql_use_result'} = 1; my $versions = Bit::Vector::Judy->new; $sth = $dbh->prepare("select id from versions2"); $sth->execute; my $i = 0; while (my $version = $sth->fetchrow_array) { if ($i % 1_000_000 == 0) { print "\t$i records from versions processed, ", $versions->count(0, -1), " versions found\n"; } $versions->set($version); $i++; } $sth = $dbh->prepare("select version from instances"); $sth->execute; $i = 0; while (my $version = $sth->fetchrow_array) { if ($i % 1_000_000 == 0) { print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n"; } $versions->unset($version); $i++; } $dbh->{'mysql_use_result'} = 0; print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n"; for (my $version = $versions->first(0); $version; $version = $versions->next($version)) { $dbh->do(q{delete from versions2 where id=?}, {}, $version); print "\tversion $version deleted\n"; } $dbh->commit(); } # vim: tw=132