lock fileset during backup to avoid concurrent backups of the same
dataset. Only search the last few (well, currently 30) sessions with a backup on the same device for matching files in linkdup. Started to work on support for tokyocabinet
This commit is contained in:
parent
944d7f3be9
commit
4f4e5540ff
|
@ -104,7 +104,12 @@ sub new {
|
||||||
RaiseError => 1
|
RaiseError => 1
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
} elsif ($opt->{tokyocabinet}) {
|
||||||
|
my $tdb = $self->{tdb} = TokyoCabinet::TDB->new();
|
||||||
|
$tdb->open($opt->{tokyocabinet}, $tdb->WRITER, $tdb->OCREAT)
|
||||||
|
or die "open $opt->{tokyocabinet} failed: " . $tdb->errmsg($tdb->ecode());
|
||||||
}
|
}
|
||||||
|
# XXX - DBI
|
||||||
$self->{targets} = $self->{dbh}->selectall_arrayref("select * from filesets", { Slice => {} });
|
$self->{targets} = $self->{dbh}->selectall_arrayref("select * from filesets", { Slice => {} });
|
||||||
if ($opt->{filesets}) {
|
if ($opt->{filesets}) {
|
||||||
$self->{targets} =
|
$self->{targets} =
|
||||||
|
@ -148,6 +153,12 @@ sub run {
|
||||||
sub backup2disk {
|
sub backup2disk {
|
||||||
my ($self, $target) = @_;
|
my ($self, $target) = @_;
|
||||||
|
|
||||||
|
# XXX - lock fileset here, we don't want to backup the same fileset twice concurrently.
|
||||||
|
# theoretically something like
|
||||||
|
$self->{dbh}->selectall_arrayref(q{select * from filesets where id = ?}, {}, $target->{id});
|
||||||
|
# should suffice, but I'm not sure if that blocks too much (does that block reads? block the whole table?)
|
||||||
|
# Just try it and we will see ...
|
||||||
|
|
||||||
$self->log(3, "starting backup for target host " . $target->{host} . " dir " . $target->{dir});
|
$self->log(3, "starting backup for target host " . $target->{host} . " dir " . $target->{dir});
|
||||||
$self->{target} = $target;
|
$self->{target} = $target;
|
||||||
|
|
||||||
|
@ -570,6 +581,27 @@ sub linkdup {
|
||||||
my ($self, $f, $backup_filename) = @_;
|
my ($self, $f, $backup_filename) = @_;
|
||||||
my $t0 = gettimeofday();
|
my $t0 = gettimeofday();
|
||||||
# XXX - this seems to be slow
|
# XXX - this seems to be slow
|
||||||
|
# XXX - creates huge temp files. Restrict to last few sessions or at least sessions on the same device?
|
||||||
|
# XXX - that's not quite as simple: We only have the prefix, but there are many prefixes on the same
|
||||||
|
# device. We can create a list of them of them at first call, though and then pass the list
|
||||||
|
# to the query. Maybe even shorten the list. ($n newest sessions only)
|
||||||
|
# XXX - another possible optimization is to check the last few files we've written: .svn/prop-base
|
||||||
|
# normally contains a lot of identical files.
|
||||||
|
|
||||||
|
unless ($self->{sessions_on_same_device}) {
|
||||||
|
my $st = stat($backup_filename);
|
||||||
|
my $my_dev = defined $st ? $st->dev : ""; # can this happen?
|
||||||
|
my $sth = $self->{dbh}->prepare("select * sessions oder yb order by id desc");
|
||||||
|
$sth->execute();
|
||||||
|
while (my $r = $sth->fetchrow_hashref()) {
|
||||||
|
my $st = lstat $r->{prefix};
|
||||||
|
my $dev = defined $st ? $st->dev : "";;
|
||||||
|
next unless $dev ne $my_dev;
|
||||||
|
last if @{ $self->{sessions_on_same_device} } > 30;
|
||||||
|
push @{ $self->{sessions_on_same_device} }, $r;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
my $sth = $self->{dbh}->prepare("select * from versions2, instances, files, sessions
|
my $sth = $self->{dbh}->prepare("select * from versions2, instances, files, sessions
|
||||||
where file_type=? and file_size=? and file_mtime=?
|
where file_type=? and file_size=? and file_mtime=?
|
||||||
and file_owner=? and file_group=? and file_acl=?
|
and file_owner=? and file_group=? and file_acl=?
|
||||||
|
@ -578,19 +610,22 @@ sub linkdup {
|
||||||
and instances.file=files.id
|
and instances.file=files.id
|
||||||
and versions2.id=instances.version
|
and versions2.id=instances.version
|
||||||
and instances.session=sessions.id
|
and instances.session=sessions.id
|
||||||
order by instances.session desc
|
and sessions.id in (" . join(", ", map("?", @{ $self->{sessions_on_same_device} })) . ")" .
|
||||||
|
" order by instances.session desc
|
||||||
");
|
");
|
||||||
$sth->execute(
|
$sth->execute(
|
||||||
$f->{t}, $f->{s}, $f->{m},
|
$f->{t}, $f->{s}, $f->{m},
|
||||||
$f->{o}, $f->{g}, $f->{acl},
|
$f->{o}, $f->{g}, $f->{acl},
|
||||||
join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)),
|
join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)),
|
||||||
$f->{checksum}
|
$f->{checksum},
|
||||||
|
@{ $self->{sessions_on_same_device} },
|
||||||
);
|
);
|
||||||
my $st = stat($backup_filename);
|
my $st = stat($backup_filename);
|
||||||
my $my_dev = defined $st ? $st->dev : "";
|
my $my_dev = defined $st ? $st->dev : "";
|
||||||
while (my $r = $sth->fetchrow_hashref()) {
|
while (my $r = $sth->fetchrow_hashref()) {
|
||||||
|
|
||||||
# check if old file is on same device. If it isn't, skip it.
|
# check if old file is on same device. If it isn't, skip it.
|
||||||
|
# XXX - this should now be obsolete because we already selected only matching sessions above.
|
||||||
unless ($self->{prefix_device}{$r->{prefix}}) {
|
unless ($self->{prefix_device}{$r->{prefix}}) {
|
||||||
my $st = lstat $r->{prefix};
|
my $st = lstat $r->{prefix};
|
||||||
$self->{prefix_device}{$r->{prefix}}
|
$self->{prefix_device}{$r->{prefix}}
|
||||||
|
|
Loading…
Reference in New Issue