diff --git a/lib/Simba/CA.pm b/lib/Simba/CA.pm index 2924b40..c6624ec 100644 --- a/lib/Simba/CA.pm +++ b/lib/Simba/CA.pm @@ -455,19 +455,27 @@ sub get_last_session_id { =head2 finddup -Find a duplicate of the current file in the database. This is useful if you +Try to find a duplicate of the current file in the database and replace the +current file with a hardlink to it. This is useful if you have multiple copies of a file stored in different locations. +The search starts from the newest session and continues into the past until +either linking successful or we run out of duplicates. +This is done because creating a hard link may not always be possible (duplicate is +on a different file system or has already reached the maximum link count) +and it is more likely that we can link to new copies than to old ones. =cut -sub finddup { - my ($self, $f) = @_; +sub linkdup { + my ($self, $f, $backup_filename) = @_; my $sth = $self->{dbh}->prepare("select * from versions, files, sessions where file_type=? and file_size=? and file_mtime=? and file_owner=? and file_group=? and file_acl=? and file_unix_bits=? and checksum=? and online=1 - and versions.file=files.id and versions.session=sessions.id"); + and versions.file=files.id and versions.session=sessions.id + order by sessions.id desc + "); $sth->execute( $f->{t}, $f->{s}, $f->{m}, $f->{o}, $f->{g}, $f->{acl}, @@ -483,8 +491,16 @@ sub finddup { $st->gid == $self->name2gid($f->{g}) && ($st->mode & 07777) == $self->acl2mode($f) ) { - $sth->finish(); - return $oldfile; + rename($backup_filename, "$backup_filename.$$.simba_backup") or die "cannot save $backup_filename to $backup_filename.$$.simba_backup: $!"; + if (link($oldfile, $backup_filename)) { + $self->log(10, "linked (dup)"); + unlink("$backup_filename.$$.simba_backup") or die "cannot unlink $backup_filename.$$.simba_backup: $!"; + $sth->finish(); + return $oldfile; + } else { + $self->log(5, "cannot link $oldfile to $backup_filename"); + rename("$backup_filename.$$.simba_backup", $backup_filename) or die "cannot restore $backup_filename from $backup_filename.$$.simba_backup: $!"; + } } } } @@ -505,71 +521,69 @@ sub store_file { my ($self, $f) = @_; if($self->present($f)) { - link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}") or die; # XXX - $self->log(10, "linked"); - } else { - - # else request from da - unless ($self->{file_pid}) { - $self->{file_pid} = open2($self->{file_dfd}, $self->{file_cfd}, - "/usr/bin/ssh", - "-l", "simba_da", - $self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (), - $self->{target}->{host}, "da"); - } - $self->{file_cfd}->printflush("get $self->{target}->{dir}/$f->{name}\n"); # XXX - encode! - my $header = $self->{file_dfd}->getline; # this should be the same as $_ - check? - if ($header =~ /^data (.*)/) { - my $f2 = $self->parse($1); - my $backup_filename = "$self->{this_backup}/$f->{name}"; - open(my $file_bfd, '>:raw', $backup_filename) or die "cannot open backup file $backup_filename: $!"; - my $size = $f2->{s}; - my $err; - my $sha1 = Digest::SHA1->new; - - while ($size > 0) { - my $buffer; - my $rc = read($self->{file_dfd}, $buffer, min($size, $BUFSIZE)); - if (!defined($rc)) { - # I/O error - $self->log(5, "error reading from data socket: $!"); - last; - } elsif ($rc == 0) { - # premature EOF. - $self->log(5, "unexpected EOF reading from data socket"); - last; - } - $file_bfd->print($buffer) or die "write to backup failed: $!"; - $size -= length($buffer); - $sha1->add($buffer); - } - close($file_bfd) or die "write to backup failed: $!"; - my $trailer = $self->{file_dfd}->getline; # should be empty line - $trailer = $self->{file_dfd}->getline; - if ($trailer =~ /^fail /) { - $self->log(5, $trailer); - } elsif ($trailer =~ /^chk sha1 (\w+)/) { - my $checksum = $sha1->hexdigest; - if ($checksum ne $1) { - $self->log(5, "checksum error\n"); - } - $f->{checksum} = $checksum; - } else { - $self->log(5, "unexpected trailer $trailer\n"); - } - my $oldfile = $self->finddup($f); - if ($oldfile) { - unlink($backup_filename) or die "cannot unlink $backup_filename: $!"; - link($oldfile, $backup_filename) or die "cannot link $oldfile to $backup_filename: $!"; - $self->log(10, "linked (dup)"); - } else { - $self->setmeta($f); - $self->log(10, "stored"); - } + if (link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}")) { + $self->log(10, "linked"); + return; } else { - $self->log(5, "unexpected header $header\n"); + $self->log(5, "cannot link $self->{last_backup}/$f->{name} to $self->{this_backup}/$f->{name}: $!"); } } + + # else request from da + unless ($self->{file_pid}) { + $self->{file_pid} = open2($self->{file_dfd}, $self->{file_cfd}, + "/usr/bin/ssh", + "-l", "simba_da", + $self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (), + $self->{target}->{host}, "da"); + } + $self->{file_cfd}->printflush("get $self->{target}->{dir}/$f->{name}\n"); # XXX - encode! + my $header = $self->{file_dfd}->getline; # this should be the same as $_ - check? + if ($header =~ /^data (.*)/) { + my $f2 = $self->parse($1); + my $backup_filename = "$self->{this_backup}/$f->{name}"; + open(my $file_bfd, '>:raw', $backup_filename) or die "cannot open backup file $backup_filename: $!"; + my $size = $f2->{s}; + my $err; + my $sha1 = Digest::SHA1->new; + + while ($size > 0) { + my $buffer; + my $rc = read($self->{file_dfd}, $buffer, min($size, $BUFSIZE)); + if (!defined($rc)) { + # I/O error + $self->log(5, "error reading from data socket: $!"); + last; + } elsif ($rc == 0) { + # premature EOF. + $self->log(5, "unexpected EOF reading from data socket"); + last; + } + $file_bfd->print($buffer) or die "write to backup failed: $!"; + $size -= length($buffer); + $sha1->add($buffer); + } + close($file_bfd) or die "write to backup failed: $!"; + my $trailer = $self->{file_dfd}->getline; # should be empty line + $trailer = $self->{file_dfd}->getline; + if ($trailer =~ /^fail /) { + $self->log(5, $trailer); + } elsif ($trailer =~ /^chk sha1 (\w+)/) { + my $checksum = $sha1->hexdigest; + if ($checksum ne $1) { + $self->log(5, "checksum error\n"); + } + $f->{checksum} = $checksum; + } else { + $self->log(5, "unexpected trailer $trailer\n"); + } + unless ($self->linkdup($f, $backup_filename)) { + $self->setmeta($f); + $self->log(10, "stored"); + } + } else { + $self->log(5, "unexpected header $header\n"); + } } sub DESTROY {