simba/scripts/remove_session

107 lines
3.2 KiB
Plaintext
Raw Normal View History

#!/usr/bin/perl
# This script removes all data associated with the given sessions.
# For each session it first removes all instances of that session and
# then cleans up any orphans.
use warnings;
use strict;
use Simba::CA;
use Bit::Vector::Judy;
2013-12-01 10:01:58 +01:00
$| = 1;
my $ca = Simba::CA->new({
dbi_file => $ENV{SIMBA_DB_CONN} || "$ENV{HOME}/.dbi/simba",
});
my $dbh = $ca->{dbh};
2013-11-26 22:51:49 +01:00
for my $session (@ARGV) {
print "deleting instances of session $session\n";
my $n_instances = $dbh->do("delete from instances where session=?", {}, $session);
print "\t$n_instances instances deleted\n";
$dbh->commit();
}
remove_orphaned_sessions();
2013-12-01 13:17:15 +01:00
remove_orphaned_files();
remove_orphaned_versions();
$dbh->disconnect();
exit();
sub remove_orphaned_sessions {
print "deleting orphaned sessions\n";
2013-11-26 22:51:49 +01:00
my $sessions
= $dbh->selectcol_arrayref(
q{select s.id from instances i right outer join sessions s on i.session=s.id where i.id is null}
);
for my $session (@$sessions) {
$dbh->do(q{delete from sessions where id=?}, {}, $session);
print "\tsession $session deleted\n";
}
$dbh->commit();
}
sub remove_orphaned_files {
print "deleting orphaned files\n";
my $files
= $dbh->selectcol_arrayref(
q{select f.id from instances i right outer join files f on i.file=f.id where i.id is null}
);
for my $file (@$files) {
$dbh->do(q{delete from files where id=?}, {}, $file);
print "\tfile $file deleted\n";
}
$dbh->commit();
}
sub remove_orphaned_versions {
# This differs from the other two because mysql doesn't find a good plan for
# the outer join: It does an index lookup on instances for every row of
# versions2. For the other tables that's good because sessions and files are
# much smaller than instances, but there is only about a factor of 10
# between versions2 and instances, so reading both sequentally is much
2014-01-11 19:44:43 +01:00
# better. Surprisingly, perl is also faster at eliminating duplicates than
# mysql, so just doing two selects and doing all the work in perl is faster
# than “select distinct … minus …” though not much.
print "deleting orphaned versions\n";
my $sth;
$dbh->{'mysql_use_result'} = 1;
my $versions = Bit::Vector::Judy->new;
$sth = $dbh->prepare("select id from versions2");
$sth->execute;
my $i = 0;
while (my $version = $sth->fetchrow_array) {
if ($i % 1_000_000 == 0) {
print "\t$i records from versions processed, ", $versions->count(0, -1), " versions found\n";
}
$versions->set($version);
$i++;
}
$sth = $dbh->prepare("select version from instances");
$sth->execute;
$i = 0;
while (my $version = $sth->fetchrow_array) {
if ($i % 1_000_000 == 0) {
print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n";
}
$versions->unset($version);
$i++;
}
$dbh->{'mysql_use_result'} = 0;
print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n";
for (my $version = $versions->first(0); $version; $version = $versions->next($version)) {
$dbh->do(q{delete from versions2 where id=?}, {}, $version);
print "\tversion $version deleted\n";
}
2013-12-01 10:01:58 +01:00
$dbh->commit();
}
# vim: tw=132