Try to be more intelligent in linking to old instances of the same file.
Especially recover from various errors (e.g. link count exvceeded) by creating a new file. Note: This code is extremely slow and should not be used. A reorganisation of the database layout is in progress.
This commit is contained in:
parent
87ab219c31
commit
066869e9c2
150
lib/Simba/CA.pm
150
lib/Simba/CA.pm
|
@ -455,19 +455,27 @@ sub get_last_session_id {
|
|||
|
||||
=head2 finddup
|
||||
|
||||
Find a duplicate of the current file in the database. This is useful if you
|
||||
Try to find a duplicate of the current file in the database and replace the
|
||||
current file with a hardlink to it. This is useful if you
|
||||
have multiple copies of a file stored in different locations.
|
||||
The search starts from the newest session and continues into the past until
|
||||
either linking successful or we run out of duplicates.
|
||||
This is done because creating a hard link may not always be possible (duplicate is
|
||||
on a different file system or has already reached the maximum link count)
|
||||
and it is more likely that we can link to new copies than to old ones.
|
||||
|
||||
=cut
|
||||
|
||||
sub finddup {
|
||||
my ($self, $f) = @_;
|
||||
sub linkdup {
|
||||
my ($self, $f, $backup_filename) = @_;
|
||||
my $sth = $self->{dbh}->prepare("select * from versions, files, sessions
|
||||
where file_type=? and file_size=? and file_mtime=?
|
||||
and file_owner=? and file_group=? and file_acl=?
|
||||
and file_unix_bits=?
|
||||
and checksum=? and online=1
|
||||
and versions.file=files.id and versions.session=sessions.id");
|
||||
and versions.file=files.id and versions.session=sessions.id
|
||||
order by sessions.id desc
|
||||
");
|
||||
$sth->execute(
|
||||
$f->{t}, $f->{s}, $f->{m},
|
||||
$f->{o}, $f->{g}, $f->{acl},
|
||||
|
@ -483,8 +491,16 @@ sub finddup {
|
|||
$st->gid == $self->name2gid($f->{g}) &&
|
||||
($st->mode & 07777) == $self->acl2mode($f)
|
||||
) {
|
||||
$sth->finish();
|
||||
return $oldfile;
|
||||
rename($backup_filename, "$backup_filename.$$.simba_backup") or die "cannot save $backup_filename to $backup_filename.$$.simba_backup: $!";
|
||||
if (link($oldfile, $backup_filename)) {
|
||||
$self->log(10, "linked (dup)");
|
||||
unlink("$backup_filename.$$.simba_backup") or die "cannot unlink $backup_filename.$$.simba_backup: $!";
|
||||
$sth->finish();
|
||||
return $oldfile;
|
||||
} else {
|
||||
$self->log(5, "cannot link $oldfile to $backup_filename");
|
||||
rename("$backup_filename.$$.simba_backup", $backup_filename) or die "cannot restore $backup_filename from $backup_filename.$$.simba_backup: $!";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -505,71 +521,69 @@ sub store_file {
|
|||
my ($self, $f) = @_;
|
||||
|
||||
if($self->present($f)) {
|
||||
link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}") or die; # XXX
|
||||
$self->log(10, "linked");
|
||||
} else {
|
||||
|
||||
# else request from da
|
||||
unless ($self->{file_pid}) {
|
||||
$self->{file_pid} = open2($self->{file_dfd}, $self->{file_cfd},
|
||||
"/usr/bin/ssh",
|
||||
"-l", "simba_da",
|
||||
$self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (),
|
||||
$self->{target}->{host}, "da");
|
||||
}
|
||||
$self->{file_cfd}->printflush("get $self->{target}->{dir}/$f->{name}\n"); # XXX - encode!
|
||||
my $header = $self->{file_dfd}->getline; # this should be the same as $_ - check?
|
||||
if ($header =~ /^data (.*)/) {
|
||||
my $f2 = $self->parse($1);
|
||||
my $backup_filename = "$self->{this_backup}/$f->{name}";
|
||||
open(my $file_bfd, '>:raw', $backup_filename) or die "cannot open backup file $backup_filename: $!";
|
||||
my $size = $f2->{s};
|
||||
my $err;
|
||||
my $sha1 = Digest::SHA1->new;
|
||||
|
||||
while ($size > 0) {
|
||||
my $buffer;
|
||||
my $rc = read($self->{file_dfd}, $buffer, min($size, $BUFSIZE));
|
||||
if (!defined($rc)) {
|
||||
# I/O error
|
||||
$self->log(5, "error reading from data socket: $!");
|
||||
last;
|
||||
} elsif ($rc == 0) {
|
||||
# premature EOF.
|
||||
$self->log(5, "unexpected EOF reading from data socket");
|
||||
last;
|
||||
}
|
||||
$file_bfd->print($buffer) or die "write to backup failed: $!";
|
||||
$size -= length($buffer);
|
||||
$sha1->add($buffer);
|
||||
}
|
||||
close($file_bfd) or die "write to backup failed: $!";
|
||||
my $trailer = $self->{file_dfd}->getline; # should be empty line
|
||||
$trailer = $self->{file_dfd}->getline;
|
||||
if ($trailer =~ /^fail /) {
|
||||
$self->log(5, $trailer);
|
||||
} elsif ($trailer =~ /^chk sha1 (\w+)/) {
|
||||
my $checksum = $sha1->hexdigest;
|
||||
if ($checksum ne $1) {
|
||||
$self->log(5, "checksum error\n");
|
||||
}
|
||||
$f->{checksum} = $checksum;
|
||||
} else {
|
||||
$self->log(5, "unexpected trailer $trailer\n");
|
||||
}
|
||||
my $oldfile = $self->finddup($f);
|
||||
if ($oldfile) {
|
||||
unlink($backup_filename) or die "cannot unlink $backup_filename: $!";
|
||||
link($oldfile, $backup_filename) or die "cannot link $oldfile to $backup_filename: $!";
|
||||
$self->log(10, "linked (dup)");
|
||||
} else {
|
||||
$self->setmeta($f);
|
||||
$self->log(10, "stored");
|
||||
}
|
||||
if (link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}")) {
|
||||
$self->log(10, "linked");
|
||||
return;
|
||||
} else {
|
||||
$self->log(5, "unexpected header $header\n");
|
||||
$self->log(5, "cannot link $self->{last_backup}/$f->{name} to $self->{this_backup}/$f->{name}: $!");
|
||||
}
|
||||
}
|
||||
|
||||
# else request from da
|
||||
unless ($self->{file_pid}) {
|
||||
$self->{file_pid} = open2($self->{file_dfd}, $self->{file_cfd},
|
||||
"/usr/bin/ssh",
|
||||
"-l", "simba_da",
|
||||
$self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (),
|
||||
$self->{target}->{host}, "da");
|
||||
}
|
||||
$self->{file_cfd}->printflush("get $self->{target}->{dir}/$f->{name}\n"); # XXX - encode!
|
||||
my $header = $self->{file_dfd}->getline; # this should be the same as $_ - check?
|
||||
if ($header =~ /^data (.*)/) {
|
||||
my $f2 = $self->parse($1);
|
||||
my $backup_filename = "$self->{this_backup}/$f->{name}";
|
||||
open(my $file_bfd, '>:raw', $backup_filename) or die "cannot open backup file $backup_filename: $!";
|
||||
my $size = $f2->{s};
|
||||
my $err;
|
||||
my $sha1 = Digest::SHA1->new;
|
||||
|
||||
while ($size > 0) {
|
||||
my $buffer;
|
||||
my $rc = read($self->{file_dfd}, $buffer, min($size, $BUFSIZE));
|
||||
if (!defined($rc)) {
|
||||
# I/O error
|
||||
$self->log(5, "error reading from data socket: $!");
|
||||
last;
|
||||
} elsif ($rc == 0) {
|
||||
# premature EOF.
|
||||
$self->log(5, "unexpected EOF reading from data socket");
|
||||
last;
|
||||
}
|
||||
$file_bfd->print($buffer) or die "write to backup failed: $!";
|
||||
$size -= length($buffer);
|
||||
$sha1->add($buffer);
|
||||
}
|
||||
close($file_bfd) or die "write to backup failed: $!";
|
||||
my $trailer = $self->{file_dfd}->getline; # should be empty line
|
||||
$trailer = $self->{file_dfd}->getline;
|
||||
if ($trailer =~ /^fail /) {
|
||||
$self->log(5, $trailer);
|
||||
} elsif ($trailer =~ /^chk sha1 (\w+)/) {
|
||||
my $checksum = $sha1->hexdigest;
|
||||
if ($checksum ne $1) {
|
||||
$self->log(5, "checksum error\n");
|
||||
}
|
||||
$f->{checksum} = $checksum;
|
||||
} else {
|
||||
$self->log(5, "unexpected trailer $trailer\n");
|
||||
}
|
||||
unless ($self->linkdup($f, $backup_filename)) {
|
||||
$self->setmeta($f);
|
||||
$self->log(10, "stored");
|
||||
}
|
||||
} else {
|
||||
$self->log(5, "unexpected header $header\n");
|
||||
}
|
||||
}
|
||||
|
||||
sub DESTROY {
|
||||
|
|
Loading…
Reference in New Issue