2013-11-26 22:50:50 +01:00
|
|
|
#!/usr/bin/perl
|
|
|
|
|
2019-09-08 11:06:01 +02:00
|
|
|
=head1 SYNOPSIS
|
|
|
|
|
|
|
|
remove_session [--age time] [session_ids ...]
|
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
|
|
|
|
The C<remove_session> script removes the specified sessions from the
|
|
|
|
simba database and then cleans up orphaned entries.
|
|
|
|
|
|
|
|
The sessions can be specified by minimum age or by listing their ids (or
|
|
|
|
both, but that is probably not very useful).
|
|
|
|
|
|
|
|
The age is specified as a fractional number followed by a unit: "y",
|
|
|
|
"m", "w", or "d". So C<remove_session --age 2.5y> removes all sessions
|
|
|
|
older than 2.5 years (actually 2.5 * 365 * 86400 seconds -- leap years
|
|
|
|
and DST are ignored), and C<remove_session --age 52w> removes all
|
|
|
|
sessions older than 52 weeks.
|
|
|
|
|
|
|
|
If neither the age nor a list of sessions is specified, no sessions are
|
|
|
|
removed, but the cleanup phase is still run, which may take considerable
|
|
|
|
time (and might be considered a bug).
|
|
|
|
|
|
|
|
=cut
|
|
|
|
|
2013-11-26 22:50:50 +01:00
|
|
|
# This script removes all data associated with the given sessions.
|
|
|
|
# For each session it first removes all instances of that session and
|
|
|
|
# then cleans up any orphans.
|
|
|
|
|
|
|
|
use warnings;
|
|
|
|
use strict;
|
|
|
|
|
|
|
|
use Simba::CA;
|
2013-12-01 13:10:35 +01:00
|
|
|
use Bit::Vector::Judy;
|
2014-12-14 10:34:47 +01:00
|
|
|
use Getopt::Long;
|
2019-09-08 11:06:01 +02:00
|
|
|
use Pod::Usage;
|
2013-11-26 22:50:50 +01:00
|
|
|
|
2014-12-14 11:07:25 +01:00
|
|
|
|
2013-12-01 10:01:58 +01:00
|
|
|
$| = 1;
|
2013-11-26 22:50:50 +01:00
|
|
|
|
|
|
|
my $ca = Simba::CA->new({
|
|
|
|
dbi_file => $ENV{SIMBA_DB_CONN} || "$ENV{HOME}/.dbi/simba",
|
|
|
|
});
|
|
|
|
|
2014-12-14 11:07:25 +01:00
|
|
|
my $dbh = $ca->{dbh};
|
|
|
|
my $partition_size = $ca->{instances_part_size};
|
2013-11-26 22:50:50 +01:00
|
|
|
|
2014-12-14 10:34:47 +01:00
|
|
|
my %opt;
|
|
|
|
GetOptions(
|
|
|
|
\%opt,
|
2019-09-08 11:06:01 +02:00
|
|
|
"age=s",
|
|
|
|
"help",
|
|
|
|
) or pod2usage(verbose => 0);
|
|
|
|
|
|
|
|
if ($opt{help}) {
|
|
|
|
pod2usage(verbose => 2);
|
|
|
|
}
|
2014-12-14 10:34:47 +01:00
|
|
|
|
|
|
|
if ($opt{age}) {
|
|
|
|
my ($num, $unit) = $opt{age} =~ /(\d(?:.\d+)?)(y|m|w|d)/;
|
|
|
|
my $scale = { y => 365 * 86400,
|
|
|
|
m => 30 * 86400,
|
|
|
|
w => 7 * 86400,
|
|
|
|
d => 1 * 86400 }->{$unit};
|
|
|
|
die "unknown time unit $unit" unless $scale;
|
|
|
|
my $expired_sessions
|
|
|
|
= $dbh->selectcol_arrayref("select id from sessions where start_date < ? order by id",
|
|
|
|
{},
|
|
|
|
time() - $num * $scale);
|
|
|
|
push @ARGV, @$expired_sessions;
|
|
|
|
}
|
2013-11-26 22:51:49 +01:00
|
|
|
for my $session (@ARGV) {
|
2013-12-01 13:10:35 +01:00
|
|
|
print "deleting instances of session $session\n";
|
2014-12-14 11:07:25 +01:00
|
|
|
my $old_min_id = $dbh->selectrow_array("select min(id) from instances");
|
2013-11-26 22:50:50 +01:00
|
|
|
my $n_instances = $dbh->do("delete from instances where session=?", {}, $session);
|
|
|
|
print "\t$n_instances instances deleted\n";
|
2014-12-14 11:07:25 +01:00
|
|
|
|
|
|
|
# Check if we just crossed into a new partition, if so, the old one is empty
|
|
|
|
# and should be shrunk to minimum size.
|
|
|
|
#
|
|
|
|
# Note: This will not shrink partitions if we delete a session somewhere
|
|
|
|
# in the middle, but I expect to do that rarely, and rebuilding a partition
|
|
|
|
# after expiring a single session isn't worthwhile anyway. If I delete lots
|
|
|
|
# of instances in the middle, I can always rebuild the affected partitions
|
|
|
|
# manually.
|
|
|
|
my $new_min_id = $dbh->selectrow_array("select min(id) from instances");
|
|
|
|
if (int($new_min_id/$partition_size) > int($old_min_id/$partition_size)) {
|
|
|
|
my $partition = sprintf("p%03d", int($new_min_id/$partition_size));
|
|
|
|
$dbh->do("alter table instances rebuild partition $partition");
|
|
|
|
print "\trebuilt partition $partition\n";
|
|
|
|
}
|
2013-12-01 10:08:38 +01:00
|
|
|
$dbh->commit();
|
|
|
|
}
|
|
|
|
remove_orphaned_sessions();
|
2013-12-01 13:17:15 +01:00
|
|
|
remove_orphaned_files();
|
2013-12-01 10:08:38 +01:00
|
|
|
remove_orphaned_versions();
|
|
|
|
$dbh->disconnect();
|
|
|
|
exit();
|
2013-11-26 22:50:50 +01:00
|
|
|
|
2013-12-01 10:08:38 +01:00
|
|
|
sub remove_orphaned_sessions {
|
2013-12-01 13:10:35 +01:00
|
|
|
print "deleting orphaned sessions\n";
|
2013-11-26 22:51:49 +01:00
|
|
|
my $sessions
|
2013-11-26 22:50:50 +01:00
|
|
|
= $dbh->selectcol_arrayref(
|
|
|
|
q{select s.id from instances i right outer join sessions s on i.session=s.id where i.id is null}
|
|
|
|
);
|
|
|
|
|
|
|
|
for my $session (@$sessions) {
|
|
|
|
$dbh->do(q{delete from sessions where id=?}, {}, $session);
|
|
|
|
print "\tsession $session deleted\n";
|
|
|
|
}
|
2013-12-01 10:08:38 +01:00
|
|
|
$dbh->commit();
|
|
|
|
}
|
2013-11-26 22:50:50 +01:00
|
|
|
|
2013-12-01 10:08:38 +01:00
|
|
|
sub remove_orphaned_files {
|
2013-12-01 13:10:35 +01:00
|
|
|
print "deleting orphaned files\n";
|
2013-11-26 22:50:50 +01:00
|
|
|
my $files
|
|
|
|
= $dbh->selectcol_arrayref(
|
|
|
|
q{select f.id from instances i right outer join files f on i.file=f.id where i.id is null}
|
|
|
|
);
|
|
|
|
|
|
|
|
for my $file (@$files) {
|
|
|
|
$dbh->do(q{delete from files where id=?}, {}, $file);
|
|
|
|
print "\tfile $file deleted\n";
|
|
|
|
}
|
2013-12-01 10:08:38 +01:00
|
|
|
$dbh->commit();
|
|
|
|
}
|
2013-11-26 22:50:50 +01:00
|
|
|
|
2013-12-01 10:08:38 +01:00
|
|
|
sub remove_orphaned_versions {
|
2013-12-01 13:10:35 +01:00
|
|
|
# This differs from the other two because mysql doesn't find a good plan for
|
|
|
|
# the outer join: It does an index lookup on instances for every row of
|
|
|
|
# versions2. For the other tables that's good because sessions and files are
|
|
|
|
# much smaller than instances, but there is only about a factor of 10
|
|
|
|
# between versions2 and instances, so reading both sequentally is much
|
2014-01-11 19:44:43 +01:00
|
|
|
# better. Surprisingly, perl is also faster at eliminating duplicates than
|
2013-12-01 13:10:35 +01:00
|
|
|
# mysql, so just doing two selects and doing all the work in perl is faster
|
|
|
|
# than “select distinct … minus …” though not much.
|
|
|
|
print "deleting orphaned versions\n";
|
|
|
|
my $sth;
|
|
|
|
|
|
|
|
$dbh->{'mysql_use_result'} = 1;
|
|
|
|
my $versions = Bit::Vector::Judy->new;
|
|
|
|
$sth = $dbh->prepare("select id from versions2");
|
|
|
|
$sth->execute;
|
|
|
|
my $i = 0;
|
|
|
|
while (my $version = $sth->fetchrow_array) {
|
|
|
|
if ($i % 1_000_000 == 0) {
|
|
|
|
print "\t$i records from versions processed, ", $versions->count(0, -1), " versions found\n";
|
|
|
|
}
|
|
|
|
$versions->set($version);
|
|
|
|
$i++;
|
|
|
|
}
|
2013-11-26 22:50:50 +01:00
|
|
|
|
2013-12-01 13:10:35 +01:00
|
|
|
$sth = $dbh->prepare("select version from instances");
|
|
|
|
$sth->execute;
|
|
|
|
$i = 0;
|
|
|
|
while (my $version = $sth->fetchrow_array) {
|
|
|
|
if ($i % 1_000_000 == 0) {
|
|
|
|
print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n";
|
|
|
|
}
|
|
|
|
$versions->unset($version);
|
|
|
|
$i++;
|
|
|
|
}
|
|
|
|
$dbh->{'mysql_use_result'} = 0;
|
2013-12-01 19:49:32 +01:00
|
|
|
print "\t$i records from instances processed, ", $versions->count(0, -1), " versions left\n";
|
2013-12-01 13:10:35 +01:00
|
|
|
|
2013-12-01 19:49:32 +01:00
|
|
|
for (my $version = $versions->first(0); $version; $version = $versions->next($version)) {
|
2013-11-26 22:50:50 +01:00
|
|
|
$dbh->do(q{delete from versions2 where id=?}, {}, $version);
|
|
|
|
print "\tversion $version deleted\n";
|
|
|
|
}
|
2013-12-01 10:01:58 +01:00
|
|
|
$dbh->commit();
|
2013-11-26 22:50:50 +01:00
|
|
|
}
|
2013-12-01 13:10:35 +01:00
|
|
|
|
|
|
|
# vim: tw=132
|