diff --git a/scripts/remove_session b/scripts/remove_session index 1216c46..3ebb018 100755 --- a/scripts/remove_session +++ b/scripts/remove_session @@ -8,6 +8,7 @@ use warnings; use strict; use Simba::CA; +use Bit::Vector::Judy; $| = 1; @@ -18,18 +19,19 @@ my $ca = Simba::CA->new({ my $dbh = $ca->{dbh}; for my $session (@ARGV) { - print "deleting session $session\n"; + print "deleting instances of session $session\n"; my $n_instances = $dbh->do("delete from instances where session=?", {}, $session); print "\t$n_instances instances deleted\n"; $dbh->commit(); } remove_orphaned_sessions(); -remove_orphaned_files(); +#remove_orphaned_files(); remove_orphaned_versions(); $dbh->disconnect(); exit(); sub remove_orphaned_sessions { + print "deleting orphaned sessions\n"; my $sessions = $dbh->selectcol_arrayref( q{select s.id from instances i right outer join sessions s on i.session=s.id where i.id is null} @@ -43,6 +45,7 @@ sub remove_orphaned_sessions { } sub remove_orphaned_files { + print "deleting orphaned files\n"; my $files = $dbh->selectcol_arrayref( q{select f.id from instances i right outer join files f on i.file=f.id where i.id is null} @@ -56,14 +59,47 @@ sub remove_orphaned_files { } sub remove_orphaned_versions { - my $versions - = $dbh->selectcol_arrayref( - q{select v.id from instances i right outer join versions2 v on i.version=v.id where i.id is null} - ); + # This differs from the other two because mysql doesn't find a good plan for + # the outer join: It does an index lookup on instances for every row of + # versions2. For the other tables that's good because sessions and files are + # much smaller than instances, but there is only about a factor of 10 + # between versions2 and instances, so reading both sequentally is much + # better. Suprisingly, perl is also faster at eliminating duplicates than + # mysql, so just doing two selects and doing all the work in perl is faster + # than “select distinct … minus …” though not much. + print "deleting orphaned versions\n"; + my $sth; - for my $version (@$versions) { + $dbh->{'mysql_use_result'} = 1; + my $versions = Bit::Vector::Judy->new; + $sth = $dbh->prepare("select id from versions2"); + $sth->execute; + my $i = 0; + while (my $version = $sth->fetchrow_array) { + if ($i % 1_000_000 == 0) { + print "\t$i records from versions processed, ", $versions->count(0, -1), " versions found\n"; + } + $versions->set($version); + $i++; + } + + $sth = $dbh->prepare("select version from instances"); + $sth->execute; + $i = 0; + while (my $version = $sth->fetchrow_array) { + if ($i % 1_000_000 == 0) { + print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n"; + } + $versions->unset($version); + $i++; + } + $dbh->{'mysql_use_result'} = 0; + + for (my $version = $versions->first; $version; $version = $versions->next($version)) { $dbh->do(q{delete from versions2 where id=?}, {}, $version); print "\tversion $version deleted\n"; } $dbh->commit(); } + +# vim: tw=132