Try to be more intelligent in linking to old instances of the same file.

Especially recover from various errors (e.g. link count exvceeded) by
creating a new file.

Note: This code is extremely slow and should not be used. A
reorganisation of the database layout is in progress.
This commit is contained in:
hjp 2008-04-13 10:36:46 +00:00
parent 87ab219c31
commit 066869e9c2
1 changed files with 82 additions and 68 deletions

View File

@ -455,19 +455,27 @@ sub get_last_session_id {
=head2 finddup
Find a duplicate of the current file in the database. This is useful if you
Try to find a duplicate of the current file in the database and replace the
current file with a hardlink to it. This is useful if you
have multiple copies of a file stored in different locations.
The search starts from the newest session and continues into the past until
either linking successful or we run out of duplicates.
This is done because creating a hard link may not always be possible (duplicate is
on a different file system or has already reached the maximum link count)
and it is more likely that we can link to new copies than to old ones.
=cut
sub finddup {
my ($self, $f) = @_;
sub linkdup {
my ($self, $f, $backup_filename) = @_;
my $sth = $self->{dbh}->prepare("select * from versions, files, sessions
where file_type=? and file_size=? and file_mtime=?
and file_owner=? and file_group=? and file_acl=?
and file_unix_bits=?
and checksum=? and online=1
and versions.file=files.id and versions.session=sessions.id");
and versions.file=files.id and versions.session=sessions.id
order by sessions.id desc
");
$sth->execute(
$f->{t}, $f->{s}, $f->{m},
$f->{o}, $f->{g}, $f->{acl},
@ -483,8 +491,16 @@ sub finddup {
$st->gid == $self->name2gid($f->{g}) &&
($st->mode & 07777) == $self->acl2mode($f)
) {
$sth->finish();
return $oldfile;
rename($backup_filename, "$backup_filename.$$.simba_backup") or die "cannot save $backup_filename to $backup_filename.$$.simba_backup: $!";
if (link($oldfile, $backup_filename)) {
$self->log(10, "linked (dup)");
unlink("$backup_filename.$$.simba_backup") or die "cannot unlink $backup_filename.$$.simba_backup: $!";
$sth->finish();
return $oldfile;
} else {
$self->log(5, "cannot link $oldfile to $backup_filename");
rename("$backup_filename.$$.simba_backup", $backup_filename) or die "cannot restore $backup_filename from $backup_filename.$$.simba_backup: $!";
}
}
}
}
@ -505,71 +521,69 @@ sub store_file {
my ($self, $f) = @_;
if($self->present($f)) {
link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}") or die; # XXX
$self->log(10, "linked");
} else {
# else request from da
unless ($self->{file_pid}) {
$self->{file_pid} = open2($self->{file_dfd}, $self->{file_cfd},
"/usr/bin/ssh",
"-l", "simba_da",
$self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (),
$self->{target}->{host}, "da");
}
$self->{file_cfd}->printflush("get $self->{target}->{dir}/$f->{name}\n"); # XXX - encode!
my $header = $self->{file_dfd}->getline; # this should be the same as $_ - check?
if ($header =~ /^data (.*)/) {
my $f2 = $self->parse($1);
my $backup_filename = "$self->{this_backup}/$f->{name}";
open(my $file_bfd, '>:raw', $backup_filename) or die "cannot open backup file $backup_filename: $!";
my $size = $f2->{s};
my $err;
my $sha1 = Digest::SHA1->new;
while ($size > 0) {
my $buffer;
my $rc = read($self->{file_dfd}, $buffer, min($size, $BUFSIZE));
if (!defined($rc)) {
# I/O error
$self->log(5, "error reading from data socket: $!");
last;
} elsif ($rc == 0) {
# premature EOF.
$self->log(5, "unexpected EOF reading from data socket");
last;
}
$file_bfd->print($buffer) or die "write to backup failed: $!";
$size -= length($buffer);
$sha1->add($buffer);
}
close($file_bfd) or die "write to backup failed: $!";
my $trailer = $self->{file_dfd}->getline; # should be empty line
$trailer = $self->{file_dfd}->getline;
if ($trailer =~ /^fail /) {
$self->log(5, $trailer);
} elsif ($trailer =~ /^chk sha1 (\w+)/) {
my $checksum = $sha1->hexdigest;
if ($checksum ne $1) {
$self->log(5, "checksum error\n");
}
$f->{checksum} = $checksum;
} else {
$self->log(5, "unexpected trailer $trailer\n");
}
my $oldfile = $self->finddup($f);
if ($oldfile) {
unlink($backup_filename) or die "cannot unlink $backup_filename: $!";
link($oldfile, $backup_filename) or die "cannot link $oldfile to $backup_filename: $!";
$self->log(10, "linked (dup)");
} else {
$self->setmeta($f);
$self->log(10, "stored");
}
if (link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}")) {
$self->log(10, "linked");
return;
} else {
$self->log(5, "unexpected header $header\n");
$self->log(5, "cannot link $self->{last_backup}/$f->{name} to $self->{this_backup}/$f->{name}: $!");
}
}
# else request from da
unless ($self->{file_pid}) {
$self->{file_pid} = open2($self->{file_dfd}, $self->{file_cfd},
"/usr/bin/ssh",
"-l", "simba_da",
$self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (),
$self->{target}->{host}, "da");
}
$self->{file_cfd}->printflush("get $self->{target}->{dir}/$f->{name}\n"); # XXX - encode!
my $header = $self->{file_dfd}->getline; # this should be the same as $_ - check?
if ($header =~ /^data (.*)/) {
my $f2 = $self->parse($1);
my $backup_filename = "$self->{this_backup}/$f->{name}";
open(my $file_bfd, '>:raw', $backup_filename) or die "cannot open backup file $backup_filename: $!";
my $size = $f2->{s};
my $err;
my $sha1 = Digest::SHA1->new;
while ($size > 0) {
my $buffer;
my $rc = read($self->{file_dfd}, $buffer, min($size, $BUFSIZE));
if (!defined($rc)) {
# I/O error
$self->log(5, "error reading from data socket: $!");
last;
} elsif ($rc == 0) {
# premature EOF.
$self->log(5, "unexpected EOF reading from data socket");
last;
}
$file_bfd->print($buffer) or die "write to backup failed: $!";
$size -= length($buffer);
$sha1->add($buffer);
}
close($file_bfd) or die "write to backup failed: $!";
my $trailer = $self->{file_dfd}->getline; # should be empty line
$trailer = $self->{file_dfd}->getline;
if ($trailer =~ /^fail /) {
$self->log(5, $trailer);
} elsif ($trailer =~ /^chk sha1 (\w+)/) {
my $checksum = $sha1->hexdigest;
if ($checksum ne $1) {
$self->log(5, "checksum error\n");
}
$f->{checksum} = $checksum;
} else {
$self->log(5, "unexpected trailer $trailer\n");
}
unless ($self->linkdup($f, $backup_filename)) {
$self->setmeta($f);
$self->log(10, "stored");
}
} else {
$self->log(5, "unexpected header $header\n");
}
}
sub DESTROY {