Only try to hardlink to old versions on the same device.

This should significantly speed up the first backup to a new device.
Added some timing info to the log.
This commit is contained in:
hjp 2009-09-10 16:14:21 +00:00
parent 8334de1d7b
commit 74da594a66
1 changed files with 34 additions and 2 deletions

View File

@ -70,6 +70,7 @@ use IO::Handle;
use File::stat;
use Scalar::Util qw(tainted);
use DBI;
use Time::HiRes qw(gettimeofday);
Readonly my $BUFSIZE => 128 * 1024;
@ -135,6 +136,13 @@ sub run {
for my $target (@{$self->{targets}}) {
$self->backup2disk($target);
}
$self->log(3, "statistics:");
for (sort keys %{ $self->{counts} }) {
$self->log(3, " $_: $self->{counts}{$_}");
}
for (sort keys %{ $self->{times} }) {
$self->log(3, " $_: $self->{times}{$_} s");
}
}
sub backup2disk {
@ -195,6 +203,7 @@ sub backup2disk {
}
$self->close_session();
$self->log(3, "finished backup for target host " . $target->{host} . " dir " . $target->{dir} . ": $count files");
$self->{counts}{objects} += $count;
}
sub parse {
@ -376,6 +385,8 @@ sub log_level {
sub db_record_version {
my ($self, $target, $f) = @_;
my $t0 = gettimeofday();
my $db_f = $self->{dbh}->selectall_arrayref("select * from files where fileset=? and path=?",
{ Slice => {} },
$target->{id}, $f->{name});
@ -463,6 +474,9 @@ sub db_record_version {
time(), 1,
$self->{session_id}, $version_id
);
my $t1 = gettimeofday();
$self->{times}{db_record_version} += $t1 - $t0;
}
sub new_session {
@ -503,7 +517,7 @@ sub get_last_session_id {
return $sessions->[0]{id};
}
=head2 finddup
=head2 linkdup
Try to find a duplicate of the current file in the database and replace the
current file with a hardlink to it. This is useful if you
@ -518,7 +532,8 @@ and it is more likely that we can link to new copies than to old ones.
sub linkdup {
my ($self, $f, $backup_filename) = @_;
# XXX
my $t0 = gettimeofday();
# XXX - this seems to be slow
my $sth = $self->{dbh}->prepare("select * from versions2, instances, files, sessions
where file_type=? and file_size=? and file_mtime=?
and file_owner=? and file_group=? and file_acl=?
@ -535,7 +550,18 @@ sub linkdup {
join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)),
$f->{checksum}
);
my $st = stat($backup_filename);
my $my_dev = defined $st ? $st->dev : "";
while (my $r = $sth->fetchrow_hashref()) {
# check if old file is on same device. If it isn't, skip it.
unless ($self->{prefix_device}{$r->{prefix}}) {
my $st = lstat $r->{prefix};
$self->{prefix_device}{$r->{prefix}}
= defined $st ? $st->dev : "";
}
next unless $self->{prefix_device}{$r->{prefix}} eq $my_dev;
my $oldfile = "$r->{prefix}/$r->{path}";
if (my $st = lstat($oldfile)) {
if ($st->mtime == $f->{m} &&
@ -549,6 +575,9 @@ sub linkdup {
$self->log(10, "linked (dup)");
unlink("$backup_filename.$$.simba_backup") or die "cannot unlink $backup_filename.$$.simba_backup: $!";
$sth->finish();
my $t1 = gettimeofday();
$self->{counts}{dup2}++;
$self->{times}{linkdup} += $t1 - $t0;
return $oldfile;
} else {
$self->log(5, "cannot link $oldfile to $backup_filename");
@ -557,6 +586,8 @@ sub linkdup {
}
}
}
my $t1 = gettimeofday();
$self->{times}{linkdup} += $t1 - $t0;
return;
}
@ -578,6 +609,7 @@ sub store_file {
if($self->present($f)) {
if (link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}")) {
$self->log(10, "linked");
$self->{counts}{dup1}++;
return $success;
} else {
$self->log(5, "cannot link $self->{last_backup}/$f->{name} to $self->{this_backup}/$f->{name}: $!");