#!/usr/bin/perl # This script removes all data associated with the given sessions. # For each session it first removes all instances of that session and # then cleans up any orphans. use warnings; use strict; use Simba::CA; use Bit::Vector::Judy; use Getopt::Long; $| = 1; my $ca = Simba::CA->new({ dbi_file => $ENV{SIMBA_DB_CONN} || "$ENV{HOME}/.dbi/simba", }); my $dbh = $ca->{dbh}; my $partition_size = $ca->{instances_part_size}; my %opt; GetOptions( \%opt, "age=s"); if ($opt{age}) { my ($num, $unit) = $opt{age} =~ /(\d(?:.\d+)?)(y|m|w|d)/; my $scale = { y => 365 * 86400, m => 30 * 86400, w => 7 * 86400, d => 1 * 86400 }->{$unit}; die "unknown time unit $unit" unless $scale; my $expired_sessions = $dbh->selectcol_arrayref("select id from sessions where start_date < ? order by id", {}, time() - $num * $scale); push @ARGV, @$expired_sessions; } for my $session (@ARGV) { print "deleting instances of session $session\n"; my $old_min_id = $dbh->selectrow_array("select min(id) from instances"); my $n_instances = $dbh->do("delete from instances where session=?", {}, $session); print "\t$n_instances instances deleted\n"; # Check if we just crossed into a new partition, if so, the old one is empty # and should be shrunk to minimum size. # # Note: This will not shrink partitions if we delete a session somewhere # in the middle, but I expect to do that rarely, and rebuilding a partition # after expiring a single session isn't worthwhile anyway. If I delete lots # of instances in the middle, I can always rebuild the affected partitions # manually. my $new_min_id = $dbh->selectrow_array("select min(id) from instances"); if (int($new_min_id/$partition_size) > int($old_min_id/$partition_size)) { my $partition = sprintf("p%03d", int($new_min_id/$partition_size)); $dbh->do("alter table instances rebuild partition $partition"); print "\trebuilt partition $partition\n"; } $dbh->commit(); } remove_orphaned_sessions(); remove_orphaned_files(); remove_orphaned_versions(); $dbh->disconnect(); exit(); sub remove_orphaned_sessions { print "deleting orphaned sessions\n"; my $sessions = $dbh->selectcol_arrayref( q{select s.id from instances i right outer join sessions s on i.session=s.id where i.id is null} ); for my $session (@$sessions) { $dbh->do(q{delete from sessions where id=?}, {}, $session); print "\tsession $session deleted\n"; } $dbh->commit(); } sub remove_orphaned_files { print "deleting orphaned files\n"; my $files = $dbh->selectcol_arrayref( q{select f.id from instances i right outer join files f on i.file=f.id where i.id is null} ); for my $file (@$files) { $dbh->do(q{delete from files where id=?}, {}, $file); print "\tfile $file deleted\n"; } $dbh->commit(); } sub remove_orphaned_versions { # This differs from the other two because mysql doesn't find a good plan for # the outer join: It does an index lookup on instances for every row of # versions2. For the other tables that's good because sessions and files are # much smaller than instances, but there is only about a factor of 10 # between versions2 and instances, so reading both sequentally is much # better. Surprisingly, perl is also faster at eliminating duplicates than # mysql, so just doing two selects and doing all the work in perl is faster # than “select distinct … minus …” though not much. print "deleting orphaned versions\n"; my $sth; $dbh->{'mysql_use_result'} = 1; my $versions = Bit::Vector::Judy->new; $sth = $dbh->prepare("select id from versions2"); $sth->execute; my $i = 0; while (my $version = $sth->fetchrow_array) { if ($i % 1_000_000 == 0) { print "\t$i records from versions processed, ", $versions->count(0, -1), " versions found\n"; } $versions->set($version); $i++; } $sth = $dbh->prepare("select version from instances"); $sth->execute; $i = 0; while (my $version = $sth->fetchrow_array) { if ($i % 1_000_000 == 0) { print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n"; } $versions->unset($version); $i++; } $dbh->{'mysql_use_result'} = 0; print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n"; for (my $version = $versions->first(0); $version; $version = $versions->next($version)) { $dbh->do(q{delete from versions2 where id=?}, {}, $version); print "\tversion $version deleted\n"; } $dbh->commit(); } # vim: tw=132