Try to be more intelligent in linking to old instances of the same file.
Especially recover from various errors (e.g. link count exvceeded) by creating a new file. Note: This code is extremely slow and should not be used. A reorganisation of the database layout is in progress.
This commit is contained in:
parent
87ab219c31
commit
066869e9c2
150
lib/Simba/CA.pm
150
lib/Simba/CA.pm
|
@ -455,19 +455,27 @@ sub get_last_session_id {
|
||||||
|
|
||||||
=head2 finddup
|
=head2 finddup
|
||||||
|
|
||||||
Find a duplicate of the current file in the database. This is useful if you
|
Try to find a duplicate of the current file in the database and replace the
|
||||||
|
current file with a hardlink to it. This is useful if you
|
||||||
have multiple copies of a file stored in different locations.
|
have multiple copies of a file stored in different locations.
|
||||||
|
The search starts from the newest session and continues into the past until
|
||||||
|
either linking successful or we run out of duplicates.
|
||||||
|
This is done because creating a hard link may not always be possible (duplicate is
|
||||||
|
on a different file system or has already reached the maximum link count)
|
||||||
|
and it is more likely that we can link to new copies than to old ones.
|
||||||
|
|
||||||
=cut
|
=cut
|
||||||
|
|
||||||
sub finddup {
|
sub linkdup {
|
||||||
my ($self, $f) = @_;
|
my ($self, $f, $backup_filename) = @_;
|
||||||
my $sth = $self->{dbh}->prepare("select * from versions, files, sessions
|
my $sth = $self->{dbh}->prepare("select * from versions, files, sessions
|
||||||
where file_type=? and file_size=? and file_mtime=?
|
where file_type=? and file_size=? and file_mtime=?
|
||||||
and file_owner=? and file_group=? and file_acl=?
|
and file_owner=? and file_group=? and file_acl=?
|
||||||
and file_unix_bits=?
|
and file_unix_bits=?
|
||||||
and checksum=? and online=1
|
and checksum=? and online=1
|
||||||
and versions.file=files.id and versions.session=sessions.id");
|
and versions.file=files.id and versions.session=sessions.id
|
||||||
|
order by sessions.id desc
|
||||||
|
");
|
||||||
$sth->execute(
|
$sth->execute(
|
||||||
$f->{t}, $f->{s}, $f->{m},
|
$f->{t}, $f->{s}, $f->{m},
|
||||||
$f->{o}, $f->{g}, $f->{acl},
|
$f->{o}, $f->{g}, $f->{acl},
|
||||||
|
@ -483,8 +491,16 @@ sub finddup {
|
||||||
$st->gid == $self->name2gid($f->{g}) &&
|
$st->gid == $self->name2gid($f->{g}) &&
|
||||||
($st->mode & 07777) == $self->acl2mode($f)
|
($st->mode & 07777) == $self->acl2mode($f)
|
||||||
) {
|
) {
|
||||||
$sth->finish();
|
rename($backup_filename, "$backup_filename.$$.simba_backup") or die "cannot save $backup_filename to $backup_filename.$$.simba_backup: $!";
|
||||||
return $oldfile;
|
if (link($oldfile, $backup_filename)) {
|
||||||
|
$self->log(10, "linked (dup)");
|
||||||
|
unlink("$backup_filename.$$.simba_backup") or die "cannot unlink $backup_filename.$$.simba_backup: $!";
|
||||||
|
$sth->finish();
|
||||||
|
return $oldfile;
|
||||||
|
} else {
|
||||||
|
$self->log(5, "cannot link $oldfile to $backup_filename");
|
||||||
|
rename("$backup_filename.$$.simba_backup", $backup_filename) or die "cannot restore $backup_filename from $backup_filename.$$.simba_backup: $!";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -505,71 +521,69 @@ sub store_file {
|
||||||
my ($self, $f) = @_;
|
my ($self, $f) = @_;
|
||||||
|
|
||||||
if($self->present($f)) {
|
if($self->present($f)) {
|
||||||
link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}") or die; # XXX
|
if (link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}")) {
|
||||||
$self->log(10, "linked");
|
$self->log(10, "linked");
|
||||||
} else {
|
return;
|
||||||
|
|
||||||
# else request from da
|
|
||||||
unless ($self->{file_pid}) {
|
|
||||||
$self->{file_pid} = open2($self->{file_dfd}, $self->{file_cfd},
|
|
||||||
"/usr/bin/ssh",
|
|
||||||
"-l", "simba_da",
|
|
||||||
$self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (),
|
|
||||||
$self->{target}->{host}, "da");
|
|
||||||
}
|
|
||||||
$self->{file_cfd}->printflush("get $self->{target}->{dir}/$f->{name}\n"); # XXX - encode!
|
|
||||||
my $header = $self->{file_dfd}->getline; # this should be the same as $_ - check?
|
|
||||||
if ($header =~ /^data (.*)/) {
|
|
||||||
my $f2 = $self->parse($1);
|
|
||||||
my $backup_filename = "$self->{this_backup}/$f->{name}";
|
|
||||||
open(my $file_bfd, '>:raw', $backup_filename) or die "cannot open backup file $backup_filename: $!";
|
|
||||||
my $size = $f2->{s};
|
|
||||||
my $err;
|
|
||||||
my $sha1 = Digest::SHA1->new;
|
|
||||||
|
|
||||||
while ($size > 0) {
|
|
||||||
my $buffer;
|
|
||||||
my $rc = read($self->{file_dfd}, $buffer, min($size, $BUFSIZE));
|
|
||||||
if (!defined($rc)) {
|
|
||||||
# I/O error
|
|
||||||
$self->log(5, "error reading from data socket: $!");
|
|
||||||
last;
|
|
||||||
} elsif ($rc == 0) {
|
|
||||||
# premature EOF.
|
|
||||||
$self->log(5, "unexpected EOF reading from data socket");
|
|
||||||
last;
|
|
||||||
}
|
|
||||||
$file_bfd->print($buffer) or die "write to backup failed: $!";
|
|
||||||
$size -= length($buffer);
|
|
||||||
$sha1->add($buffer);
|
|
||||||
}
|
|
||||||
close($file_bfd) or die "write to backup failed: $!";
|
|
||||||
my $trailer = $self->{file_dfd}->getline; # should be empty line
|
|
||||||
$trailer = $self->{file_dfd}->getline;
|
|
||||||
if ($trailer =~ /^fail /) {
|
|
||||||
$self->log(5, $trailer);
|
|
||||||
} elsif ($trailer =~ /^chk sha1 (\w+)/) {
|
|
||||||
my $checksum = $sha1->hexdigest;
|
|
||||||
if ($checksum ne $1) {
|
|
||||||
$self->log(5, "checksum error\n");
|
|
||||||
}
|
|
||||||
$f->{checksum} = $checksum;
|
|
||||||
} else {
|
|
||||||
$self->log(5, "unexpected trailer $trailer\n");
|
|
||||||
}
|
|
||||||
my $oldfile = $self->finddup($f);
|
|
||||||
if ($oldfile) {
|
|
||||||
unlink($backup_filename) or die "cannot unlink $backup_filename: $!";
|
|
||||||
link($oldfile, $backup_filename) or die "cannot link $oldfile to $backup_filename: $!";
|
|
||||||
$self->log(10, "linked (dup)");
|
|
||||||
} else {
|
|
||||||
$self->setmeta($f);
|
|
||||||
$self->log(10, "stored");
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
$self->log(5, "unexpected header $header\n");
|
$self->log(5, "cannot link $self->{last_backup}/$f->{name} to $self->{this_backup}/$f->{name}: $!");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# else request from da
|
||||||
|
unless ($self->{file_pid}) {
|
||||||
|
$self->{file_pid} = open2($self->{file_dfd}, $self->{file_cfd},
|
||||||
|
"/usr/bin/ssh",
|
||||||
|
"-l", "simba_da",
|
||||||
|
$self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (),
|
||||||
|
$self->{target}->{host}, "da");
|
||||||
|
}
|
||||||
|
$self->{file_cfd}->printflush("get $self->{target}->{dir}/$f->{name}\n"); # XXX - encode!
|
||||||
|
my $header = $self->{file_dfd}->getline; # this should be the same as $_ - check?
|
||||||
|
if ($header =~ /^data (.*)/) {
|
||||||
|
my $f2 = $self->parse($1);
|
||||||
|
my $backup_filename = "$self->{this_backup}/$f->{name}";
|
||||||
|
open(my $file_bfd, '>:raw', $backup_filename) or die "cannot open backup file $backup_filename: $!";
|
||||||
|
my $size = $f2->{s};
|
||||||
|
my $err;
|
||||||
|
my $sha1 = Digest::SHA1->new;
|
||||||
|
|
||||||
|
while ($size > 0) {
|
||||||
|
my $buffer;
|
||||||
|
my $rc = read($self->{file_dfd}, $buffer, min($size, $BUFSIZE));
|
||||||
|
if (!defined($rc)) {
|
||||||
|
# I/O error
|
||||||
|
$self->log(5, "error reading from data socket: $!");
|
||||||
|
last;
|
||||||
|
} elsif ($rc == 0) {
|
||||||
|
# premature EOF.
|
||||||
|
$self->log(5, "unexpected EOF reading from data socket");
|
||||||
|
last;
|
||||||
|
}
|
||||||
|
$file_bfd->print($buffer) or die "write to backup failed: $!";
|
||||||
|
$size -= length($buffer);
|
||||||
|
$sha1->add($buffer);
|
||||||
|
}
|
||||||
|
close($file_bfd) or die "write to backup failed: $!";
|
||||||
|
my $trailer = $self->{file_dfd}->getline; # should be empty line
|
||||||
|
$trailer = $self->{file_dfd}->getline;
|
||||||
|
if ($trailer =~ /^fail /) {
|
||||||
|
$self->log(5, $trailer);
|
||||||
|
} elsif ($trailer =~ /^chk sha1 (\w+)/) {
|
||||||
|
my $checksum = $sha1->hexdigest;
|
||||||
|
if ($checksum ne $1) {
|
||||||
|
$self->log(5, "checksum error\n");
|
||||||
|
}
|
||||||
|
$f->{checksum} = $checksum;
|
||||||
|
} else {
|
||||||
|
$self->log(5, "unexpected trailer $trailer\n");
|
||||||
|
}
|
||||||
|
unless ($self->linkdup($f, $backup_filename)) {
|
||||||
|
$self->setmeta($f);
|
||||||
|
$self->log(10, "stored");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$self->log(5, "unexpected header $header\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sub DESTROY {
|
sub DESTROY {
|
||||||
|
|
Loading…
Reference in New Issue