Only try to hardlink to old versions on the same device.

This should significantly speed up the first backup to a new device.
Added some timing info to the log.
This commit is contained in:
hjp 2009-09-10 16:14:21 +00:00
parent 8334de1d7b
commit 74da594a66
1 changed files with 34 additions and 2 deletions

View File

@ -70,6 +70,7 @@ use IO::Handle;
use File::stat; use File::stat;
use Scalar::Util qw(tainted); use Scalar::Util qw(tainted);
use DBI; use DBI;
use Time::HiRes qw(gettimeofday);
Readonly my $BUFSIZE => 128 * 1024; Readonly my $BUFSIZE => 128 * 1024;
@ -135,6 +136,13 @@ sub run {
for my $target (@{$self->{targets}}) { for my $target (@{$self->{targets}}) {
$self->backup2disk($target); $self->backup2disk($target);
} }
$self->log(3, "statistics:");
for (sort keys %{ $self->{counts} }) {
$self->log(3, " $_: $self->{counts}{$_}");
}
for (sort keys %{ $self->{times} }) {
$self->log(3, " $_: $self->{times}{$_} s");
}
} }
sub backup2disk { sub backup2disk {
@ -195,6 +203,7 @@ sub backup2disk {
} }
$self->close_session(); $self->close_session();
$self->log(3, "finished backup for target host " . $target->{host} . " dir " . $target->{dir} . ": $count files"); $self->log(3, "finished backup for target host " . $target->{host} . " dir " . $target->{dir} . ": $count files");
$self->{counts}{objects} += $count;
} }
sub parse { sub parse {
@ -376,6 +385,8 @@ sub log_level {
sub db_record_version { sub db_record_version {
my ($self, $target, $f) = @_; my ($self, $target, $f) = @_;
my $t0 = gettimeofday();
my $db_f = $self->{dbh}->selectall_arrayref("select * from files where fileset=? and path=?", my $db_f = $self->{dbh}->selectall_arrayref("select * from files where fileset=? and path=?",
{ Slice => {} }, { Slice => {} },
$target->{id}, $f->{name}); $target->{id}, $f->{name});
@ -463,6 +474,9 @@ sub db_record_version {
time(), 1, time(), 1,
$self->{session_id}, $version_id $self->{session_id}, $version_id
); );
my $t1 = gettimeofday();
$self->{times}{db_record_version} += $t1 - $t0;
} }
sub new_session { sub new_session {
@ -503,7 +517,7 @@ sub get_last_session_id {
return $sessions->[0]{id}; return $sessions->[0]{id};
} }
=head2 finddup =head2 linkdup
Try to find a duplicate of the current file in the database and replace the Try to find a duplicate of the current file in the database and replace the
current file with a hardlink to it. This is useful if you current file with a hardlink to it. This is useful if you
@ -518,7 +532,8 @@ and it is more likely that we can link to new copies than to old ones.
sub linkdup { sub linkdup {
my ($self, $f, $backup_filename) = @_; my ($self, $f, $backup_filename) = @_;
# XXX my $t0 = gettimeofday();
# XXX - this seems to be slow
my $sth = $self->{dbh}->prepare("select * from versions2, instances, files, sessions my $sth = $self->{dbh}->prepare("select * from versions2, instances, files, sessions
where file_type=? and file_size=? and file_mtime=? where file_type=? and file_size=? and file_mtime=?
and file_owner=? and file_group=? and file_acl=? and file_owner=? and file_group=? and file_acl=?
@ -535,7 +550,18 @@ sub linkdup {
join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)), join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)),
$f->{checksum} $f->{checksum}
); );
my $st = stat($backup_filename);
my $my_dev = defined $st ? $st->dev : "";
while (my $r = $sth->fetchrow_hashref()) { while (my $r = $sth->fetchrow_hashref()) {
# check if old file is on same device. If it isn't, skip it.
unless ($self->{prefix_device}{$r->{prefix}}) {
my $st = lstat $r->{prefix};
$self->{prefix_device}{$r->{prefix}}
= defined $st ? $st->dev : "";
}
next unless $self->{prefix_device}{$r->{prefix}} eq $my_dev;
my $oldfile = "$r->{prefix}/$r->{path}"; my $oldfile = "$r->{prefix}/$r->{path}";
if (my $st = lstat($oldfile)) { if (my $st = lstat($oldfile)) {
if ($st->mtime == $f->{m} && if ($st->mtime == $f->{m} &&
@ -549,6 +575,9 @@ sub linkdup {
$self->log(10, "linked (dup)"); $self->log(10, "linked (dup)");
unlink("$backup_filename.$$.simba_backup") or die "cannot unlink $backup_filename.$$.simba_backup: $!"; unlink("$backup_filename.$$.simba_backup") or die "cannot unlink $backup_filename.$$.simba_backup: $!";
$sth->finish(); $sth->finish();
my $t1 = gettimeofday();
$self->{counts}{dup2}++;
$self->{times}{linkdup} += $t1 - $t0;
return $oldfile; return $oldfile;
} else { } else {
$self->log(5, "cannot link $oldfile to $backup_filename"); $self->log(5, "cannot link $oldfile to $backup_filename");
@ -557,6 +586,8 @@ sub linkdup {
} }
} }
} }
my $t1 = gettimeofday();
$self->{times}{linkdup} += $t1 - $t0;
return; return;
} }
@ -578,6 +609,7 @@ sub store_file {
if($self->present($f)) { if($self->present($f)) {
if (link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}")) { if (link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}")) {
$self->log(10, "linked"); $self->log(10, "linked");
$self->{counts}{dup1}++;
return $success; return $success;
} else { } else {
$self->log(5, "cannot link $self->{last_backup}/$f->{name} to $self->{this_backup}/$f->{name}: $!"); $self->log(5, "cannot link $self->{last_backup}/$f->{name} to $self->{this_backup}/$f->{name}: $!");