Only try to hardlink to old versions on the same device.
This should significantly speed up the first backup to a new device. Added some timing info to the log.
This commit is contained in:
parent
8334de1d7b
commit
74da594a66
|
@ -70,6 +70,7 @@ use IO::Handle;
|
||||||
use File::stat;
|
use File::stat;
|
||||||
use Scalar::Util qw(tainted);
|
use Scalar::Util qw(tainted);
|
||||||
use DBI;
|
use DBI;
|
||||||
|
use Time::HiRes qw(gettimeofday);
|
||||||
|
|
||||||
Readonly my $BUFSIZE => 128 * 1024;
|
Readonly my $BUFSIZE => 128 * 1024;
|
||||||
|
|
||||||
|
@ -135,6 +136,13 @@ sub run {
|
||||||
for my $target (@{$self->{targets}}) {
|
for my $target (@{$self->{targets}}) {
|
||||||
$self->backup2disk($target);
|
$self->backup2disk($target);
|
||||||
}
|
}
|
||||||
|
$self->log(3, "statistics:");
|
||||||
|
for (sort keys %{ $self->{counts} }) {
|
||||||
|
$self->log(3, " $_: $self->{counts}{$_}");
|
||||||
|
}
|
||||||
|
for (sort keys %{ $self->{times} }) {
|
||||||
|
$self->log(3, " $_: $self->{times}{$_} s");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sub backup2disk {
|
sub backup2disk {
|
||||||
|
@ -195,6 +203,7 @@ sub backup2disk {
|
||||||
}
|
}
|
||||||
$self->close_session();
|
$self->close_session();
|
||||||
$self->log(3, "finished backup for target host " . $target->{host} . " dir " . $target->{dir} . ": $count files");
|
$self->log(3, "finished backup for target host " . $target->{host} . " dir " . $target->{dir} . ": $count files");
|
||||||
|
$self->{counts}{objects} += $count;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub parse {
|
sub parse {
|
||||||
|
@ -376,6 +385,8 @@ sub log_level {
|
||||||
sub db_record_version {
|
sub db_record_version {
|
||||||
my ($self, $target, $f) = @_;
|
my ($self, $target, $f) = @_;
|
||||||
|
|
||||||
|
my $t0 = gettimeofday();
|
||||||
|
|
||||||
my $db_f = $self->{dbh}->selectall_arrayref("select * from files where fileset=? and path=?",
|
my $db_f = $self->{dbh}->selectall_arrayref("select * from files where fileset=? and path=?",
|
||||||
{ Slice => {} },
|
{ Slice => {} },
|
||||||
$target->{id}, $f->{name});
|
$target->{id}, $f->{name});
|
||||||
|
@ -463,6 +474,9 @@ sub db_record_version {
|
||||||
time(), 1,
|
time(), 1,
|
||||||
$self->{session_id}, $version_id
|
$self->{session_id}, $version_id
|
||||||
);
|
);
|
||||||
|
|
||||||
|
my $t1 = gettimeofday();
|
||||||
|
$self->{times}{db_record_version} += $t1 - $t0;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub new_session {
|
sub new_session {
|
||||||
|
@ -503,7 +517,7 @@ sub get_last_session_id {
|
||||||
return $sessions->[0]{id};
|
return $sessions->[0]{id};
|
||||||
}
|
}
|
||||||
|
|
||||||
=head2 finddup
|
=head2 linkdup
|
||||||
|
|
||||||
Try to find a duplicate of the current file in the database and replace the
|
Try to find a duplicate of the current file in the database and replace the
|
||||||
current file with a hardlink to it. This is useful if you
|
current file with a hardlink to it. This is useful if you
|
||||||
|
@ -518,7 +532,8 @@ and it is more likely that we can link to new copies than to old ones.
|
||||||
|
|
||||||
sub linkdup {
|
sub linkdup {
|
||||||
my ($self, $f, $backup_filename) = @_;
|
my ($self, $f, $backup_filename) = @_;
|
||||||
# XXX
|
my $t0 = gettimeofday();
|
||||||
|
# XXX - this seems to be slow
|
||||||
my $sth = $self->{dbh}->prepare("select * from versions2, instances, files, sessions
|
my $sth = $self->{dbh}->prepare("select * from versions2, instances, files, sessions
|
||||||
where file_type=? and file_size=? and file_mtime=?
|
where file_type=? and file_size=? and file_mtime=?
|
||||||
and file_owner=? and file_group=? and file_acl=?
|
and file_owner=? and file_group=? and file_acl=?
|
||||||
|
@ -535,7 +550,18 @@ sub linkdup {
|
||||||
join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)),
|
join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)),
|
||||||
$f->{checksum}
|
$f->{checksum}
|
||||||
);
|
);
|
||||||
|
my $st = stat($backup_filename);
|
||||||
|
my $my_dev = defined $st ? $st->dev : "";
|
||||||
while (my $r = $sth->fetchrow_hashref()) {
|
while (my $r = $sth->fetchrow_hashref()) {
|
||||||
|
|
||||||
|
# check if old file is on same device. If it isn't, skip it.
|
||||||
|
unless ($self->{prefix_device}{$r->{prefix}}) {
|
||||||
|
my $st = lstat $r->{prefix};
|
||||||
|
$self->{prefix_device}{$r->{prefix}}
|
||||||
|
= defined $st ? $st->dev : "";
|
||||||
|
}
|
||||||
|
next unless $self->{prefix_device}{$r->{prefix}} eq $my_dev;
|
||||||
|
|
||||||
my $oldfile = "$r->{prefix}/$r->{path}";
|
my $oldfile = "$r->{prefix}/$r->{path}";
|
||||||
if (my $st = lstat($oldfile)) {
|
if (my $st = lstat($oldfile)) {
|
||||||
if ($st->mtime == $f->{m} &&
|
if ($st->mtime == $f->{m} &&
|
||||||
|
@ -549,6 +575,9 @@ sub linkdup {
|
||||||
$self->log(10, "linked (dup)");
|
$self->log(10, "linked (dup)");
|
||||||
unlink("$backup_filename.$$.simba_backup") or die "cannot unlink $backup_filename.$$.simba_backup: $!";
|
unlink("$backup_filename.$$.simba_backup") or die "cannot unlink $backup_filename.$$.simba_backup: $!";
|
||||||
$sth->finish();
|
$sth->finish();
|
||||||
|
my $t1 = gettimeofday();
|
||||||
|
$self->{counts}{dup2}++;
|
||||||
|
$self->{times}{linkdup} += $t1 - $t0;
|
||||||
return $oldfile;
|
return $oldfile;
|
||||||
} else {
|
} else {
|
||||||
$self->log(5, "cannot link $oldfile to $backup_filename");
|
$self->log(5, "cannot link $oldfile to $backup_filename");
|
||||||
|
@ -557,6 +586,8 @@ sub linkdup {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
my $t1 = gettimeofday();
|
||||||
|
$self->{times}{linkdup} += $t1 - $t0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -578,6 +609,7 @@ sub store_file {
|
||||||
if($self->present($f)) {
|
if($self->present($f)) {
|
||||||
if (link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}")) {
|
if (link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}")) {
|
||||||
$self->log(10, "linked");
|
$self->log(10, "linked");
|
||||||
|
$self->{counts}{dup1}++;
|
||||||
return $success;
|
return $success;
|
||||||
} else {
|
} else {
|
||||||
$self->log(5, "cannot link $self->{last_backup}/$f->{name} to $self->{this_backup}/$f->{name}: $!");
|
$self->log(5, "cannot link $self->{last_backup}/$f->{name} to $self->{this_backup}/$f->{name}: $!");
|
||||||
|
|
Loading…
Reference in New Issue