simba/scripts/remove_session

#!/usr/bin/perl

# This script removes all data associated with the given sessions.
# For each session it first removes all instances of that session and
# then cleans up any orphans.

use warnings;
use strict;

use Simba::CA;
use Bit::Vector::Judy;

$| = 1;

my $ca = Simba::CA->new({
			    dbi_file => $ENV{SIMBA_DB_CONN} || "$ENV{HOME}/.dbi/simba",
			});

my $dbh = $ca->{dbh};

for my $session (@ARGV) {
    print "deleting instances of session $session\n";
    my $n_instances = $dbh->do("delete from instances where session=?", {}, $session);
    print "\t$n_instances instances deleted\n";
    $dbh->commit();
}
remove_orphaned_sessions();
remove_orphaned_files();
remove_orphaned_versions();
$dbh->disconnect();
exit();

sub remove_orphaned_sessions {
    print "deleting orphaned sessions\n";
    my $sessions
	= $dbh->selectcol_arrayref(
	    q{select s.id from instances i right outer join sessions s  on i.session=s.id where i.id is null}
	  );

    for my $session (@$sessions) {
	$dbh->do(q{delete from sessions where id=?}, {}, $session);
	print "\tsession $session deleted\n";
    }
    $dbh->commit();
}

sub remove_orphaned_files {
    print "deleting orphaned files\n";
    my $files
	= $dbh->selectcol_arrayref(
	    q{select f.id from instances i right outer join files f  on i.file=f.id where i.id is null}
	  );

    for my $file (@$files) {
	$dbh->do(q{delete from files where id=?}, {}, $file);
	print "\tfile $file deleted\n";
    }
    $dbh->commit();
}

sub remove_orphaned_versions {
    # This differs from the other two because mysql doesn't find a good plan for
    # the outer join: It does an index lookup on instances for every row of
    # versions2. For the other tables that's good because sessions and files are
    # much smaller than instances, but there is only about a factor of 10
    # between versions2 and instances, so reading both sequentally is much
    # better. Surprisingly, perl is also faster at eliminating duplicates than 
    # mysql, so just doing two selects and doing all the work in perl is faster
    # than “select distinct … minus …” though not much.
    print "deleting orphaned versions\n";
    my $sth;

    $dbh->{'mysql_use_result'} = 1;
    my $versions = Bit::Vector::Judy->new;
    $sth = $dbh->prepare("select id from versions2");
    $sth->execute;
    my $i = 0;
    while (my $version = $sth->fetchrow_array) {
	if ($i % 1_000_000 == 0) {
	    print "\t$i records from versions processed, ", $versions->count(0, -1), " versions found\n";
	}
	$versions->set($version);
	$i++;
    }

    $sth = $dbh->prepare("select version from instances");
    $sth->execute;
    $i = 0;
    while (my $version = $sth->fetchrow_array) {
	if ($i % 1_000_000 == 0) {
	    print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n";
	}
	$versions->unset($version);
	$i++;
    }
    $dbh->{'mysql_use_result'} = 0;
    print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n";

    for (my $version = $versions->first(0); $version; $version = $versions->next($version)) {
	$dbh->do(q{delete from versions2 where id=?}, {}, $version);
	print "\tversion $version deleted\n";
    }
    $dbh->commit();
}

# vim: tw=132
script to remove session(s) from database. 2013-11-26 22:50:50 +01:00			`#!/usr/bin/perl`

			`# This script removes all data associated with the given sessions.`
			`# For each session it first removes all instances of that session and`
			`# then cleans up any orphans.`

			`use warnings;`
			`use strict;`

			`use Simba::CA;`
reimplemented the outer join in perl with two separate queries and a judy bitmap. This is about 20 times faster (1 hour instead of 20 for 1.3E9 instances and 125E6 versions). 2013-12-01 13:10:35 +01:00			`use Bit::Vector::Judy;`
script to remove session(s) from database. 2013-11-26 22:50:50 +01:00
Commit. Flush stdout. 2013-12-01 10:01:58 +01:00			`$\| = 1;`
script to remove session(s) from database. 2013-11-26 22:50:50 +01:00
			`my $ca = Simba::CA->new({`
			`dbi_file => $ENV{SIMBA_DB_CONN} \|\| "$ENV{HOME}/.dbi/simba",`
			`});`

			`my $dbh = $ca->{dbh};`

fixed trivial errors 2013-11-26 22:51:49 +01:00			`for my $session (@ARGV) {`
reimplemented the outer join in perl with two separate queries and a judy bitmap. This is about 20 times faster (1 hour instead of 20 for 1.3E9 instances and 125E6 versions). 2013-12-01 13:10:35 +01:00			`print "deleting instances of session $session\n";`
script to remove session(s) from database. 2013-11-26 22:50:50 +01:00			`my $n_instances = $dbh->do("delete from instances where session=?", {}, $session);`
			`print "\t$n_instances instances deleted\n";`
moved removal of orphaned records into their own functions and after the loop. 2013-12-01 10:08:38 +01:00			`$dbh->commit();`
			`}`
			`remove_orphaned_sessions();`
reactivated remove_orphaned_files. 2013-12-01 13:17:15 +01:00			`remove_orphaned_files();`
moved removal of orphaned records into their own functions and after the loop. 2013-12-01 10:08:38 +01:00			`remove_orphaned_versions();`
			`$dbh->disconnect();`
			`exit();`
script to remove session(s) from database. 2013-11-26 22:50:50 +01:00
moved removal of orphaned records into their own functions and after the loop. 2013-12-01 10:08:38 +01:00			`sub remove_orphaned_sessions {`
reimplemented the outer join in perl with two separate queries and a judy bitmap. This is about 20 times faster (1 hour instead of 20 for 1.3E9 instances and 125E6 versions). 2013-12-01 13:10:35 +01:00			`print "deleting orphaned sessions\n";`
fixed trivial errors 2013-11-26 22:51:49 +01:00			`my $sessions`
script to remove session(s) from database. 2013-11-26 22:50:50 +01:00			`= $dbh->selectcol_arrayref(`
			`q{select s.id from instances i right outer join sessions s on i.session=s.id where i.id is null}`
			`);`

			`for my $session (@$sessions) {`
			`$dbh->do(q{delete from sessions where id=?}, {}, $session);`
			`print "\tsession $session deleted\n";`
			`}`
moved removal of orphaned records into their own functions and after the loop. 2013-12-01 10:08:38 +01:00			`$dbh->commit();`
			`}`
script to remove session(s) from database. 2013-11-26 22:50:50 +01:00
moved removal of orphaned records into their own functions and after the loop. 2013-12-01 10:08:38 +01:00			`sub remove_orphaned_files {`
reimplemented the outer join in perl with two separate queries and a judy bitmap. This is about 20 times faster (1 hour instead of 20 for 1.3E9 instances and 125E6 versions). 2013-12-01 13:10:35 +01:00			`print "deleting orphaned files\n";`
script to remove session(s) from database. 2013-11-26 22:50:50 +01:00			`my $files`
			`= $dbh->selectcol_arrayref(`
			`q{select f.id from instances i right outer join files f on i.file=f.id where i.id is null}`
			`);`

			`for my $file (@$files) {`
			`$dbh->do(q{delete from files where id=?}, {}, $file);`
			`print "\tfile $file deleted\n";`
			`}`
moved removal of orphaned records into their own functions and after the loop. 2013-12-01 10:08:38 +01:00			`$dbh->commit();`
			`}`
script to remove session(s) from database. 2013-11-26 22:50:50 +01:00
moved removal of orphaned records into their own functions and after the loop. 2013-12-01 10:08:38 +01:00			`sub remove_orphaned_versions {`
reimplemented the outer join in perl with two separate queries and a judy bitmap. This is about 20 times faster (1 hour instead of 20 for 1.3E9 instances and 125E6 versions). 2013-12-01 13:10:35 +01:00			`# This differs from the other two because mysql doesn't find a good plan for`
			`# the outer join: It does an index lookup on instances for every row of`
			`# versions2. For the other tables that's good because sessions and files are`
			`# much smaller than instances, but there is only about a factor of 10`
			`# between versions2 and instances, so reading both sequentally is much`
typo 2014-01-11 19:44:43 +01:00			`# better. Surprisingly, perl is also faster at eliminating duplicates than`
reimplemented the outer join in perl with two separate queries and a judy bitmap. This is about 20 times faster (1 hour instead of 20 for 1.3E9 instances and 125E6 versions). 2013-12-01 13:10:35 +01:00			`# mysql, so just doing two selects and doing all the work in perl is faster`
			`# than “select distinct … minus …” though not much.`
			`print "deleting orphaned versions\n";`
			`my $sth;`

			`$dbh->{'mysql_use_result'} = 1;`
			`my $versions = Bit::Vector::Judy->new;`
			`$sth = $dbh->prepare("select id from versions2");`
			`$sth->execute;`
			`my $i = 0;`
			`while (my $version = $sth->fetchrow_array) {`
			`if ($i % 1_000_000 == 0) {`
			`print "\t$i records from versions processed, ", $versions->count(0, -1), " versions found\n";`
			`}`
			`$versions->set($version);`
			`$i++;`
			`}`
script to remove session(s) from database. 2013-11-26 22:50:50 +01:00
reimplemented the outer join in perl with two separate queries and a judy bitmap. This is about 20 times faster (1 hour instead of 20 for 1.3E9 instances and 125E6 versions). 2013-12-01 13:10:35 +01:00			`$sth = $dbh->prepare("select version from instances");`
			`$sth->execute;`
			`$i = 0;`
			`while (my $version = $sth->fetchrow_array) {`
			`if ($i % 1_000_000 == 0) {`
			`print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n";`
			`}`
			`$versions->unset($version);`
			`$i++;`
			`}`
			`$dbh->{'mysql_use_result'} = 0;`
Added missing parameter to $versions->first(). How could this have worked before? 2013-12-01 19:49:32 +01:00			`print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n";`
reimplemented the outer join in perl with two separate queries and a judy bitmap. This is about 20 times faster (1 hour instead of 20 for 1.3E9 instances and 125E6 versions). 2013-12-01 13:10:35 +01:00
Added missing parameter to $versions->first(). How could this have worked before? 2013-12-01 19:49:32 +01:00			`for (my $version = $versions->first(0); $version; $version = $versions->next($version)) {`
script to remove session(s) from database. 2013-11-26 22:50:50 +01:00			`$dbh->do(q{delete from versions2 where id=?}, {}, $version);`
			`print "\tversion $version deleted\n";`
			`}`
Commit. Flush stdout. 2013-12-01 10:01:58 +01:00			`$dbh->commit();`
script to remove session(s) from database. 2013-11-26 22:50:50 +01:00			`}`
reimplemented the outer join in perl with two separate queries and a judy bitmap. This is about 20 times faster (1 hour instead of 20 for 1.3E9 instances and 125E6 versions). 2013-12-01 13:10:35 +01:00
			`# vim: tw=132`