Refactoring: Moved code to store a regular file in a sub of its own,

but don't change the logic yet. Put info about file related subprocesses 
into $self so that this can be cleaned up after the session ends.
Added a few log messages.
This commit is contained in:
hjp 2008-04-05 11:21:28 +00:00
parent 446c3a6fdc
commit c42aeb5aee
1 changed files with 157 additions and 68 deletions

View File

@ -1,4 +1,60 @@
#!/usr/bin/perl
=head1 NAME
Simba::CA
=head1 DESCRIPTION
Collecting Agent of the Simba backup system.
This class represents one instance of a running collecting agent.
The only user-callable methods are the constructor new and the instance
method run, which collects all the files from various disk agents.
The Simba::CA package is a hashref with the following keys:
=over
=item basedir
=item unknown_uid
=item unknown_gid
=item fh_log
=item log_level
=item dbh
=item targets
=item ssh_id_file
=item target
=item last_backup
=item last_backup_id
=item timestamp
=item this_backup
=item session_id
=item file_pid
=item file_cfd
=item file_dfd
=back
=cut
package Simba::CA;
use strict;
use warnings;
@ -84,6 +140,9 @@ sub run {
sub backup2disk {
my ($self, $target) = @_;
$self->log(3, "starting backup for target host " . $target->{host} . " dir " . $target->{dir});
$self->{target} = $target;
# get previous generation
my @dirs = glob($self->{basedir} . '/????-??-??T??.??.??/' . $target->{host} . '/' . $target->{dir});
@ -96,7 +155,6 @@ sub backup2disk {
$self->new_session();
my ($list_pid, $list_cfd, $list_dfd); # connection to get list of files
my ($file_pid, $file_cfd, $file_dfd); # connection to get content of files
$list_pid = open2($list_dfd, $list_cfd,
"/usr/bin/ssh",
"-l", "simba_da",
@ -104,80 +162,17 @@ sub backup2disk {
$target->{host}, "da");
$list_cfd->printflush("list $target->{dir}\n"); # XXX - encode!
close($list_cfd);
my $count = 0;
while (<$list_dfd>) {
$count++;
chomp;
$self->log(10, "file: $_");
# split into fields
chomp;
my $f = $self->parse($_);
# if file is already present
if ($f->{t} eq 'f') {
if($self->present($f)) {
link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}") or die; # XXX
$self->log(10, "linked");
} else {
# else request from da
unless ($file_pid) {
$file_pid = open2($file_dfd, $file_cfd,
"/usr/bin/ssh",
"-l", "simba_da",
$self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (),
$target->{host}, "da");
}
$file_cfd->printflush("get $target->{dir}/$f->{name}\n"); # XXX - encode!
my $header = <$file_dfd>; # this should be the same as $_ - check?
if ($header =~ /^data (.*)/) {
my $f2 = $self->parse($1);
my $backup_filename = "$self->{this_backup}/$f->{name}";
open(my $file_bfd, '>:raw', $backup_filename) or die "cannot open backup file $backup_filename: $!";
my $size = $f2->{s};
my $err;
my $sha1 = Digest::SHA1->new;
while ($size > 0) {
my $buffer;
my $rc = read($file_dfd, $buffer, min($size, $BUFSIZE));
if (!defined($rc)) {
# I/O error
$self->log(5, "error reading from data socket: $!");
last;
} elsif ($rc == 0) {
# premature EOF.
$self->log(5, "unexpected EOF reading from data socket");
last;
}
$file_bfd->print($buffer) or die "write to backup failed: $!";
$size -= length($buffer);
$sha1->add($buffer);
}
close($file_bfd) or die "write to backup failed: $!";
my $trailer = <$file_dfd>; # should be empty line
$trailer = <$file_dfd>;
if ($trailer =~ /^fail /) {
$self->log(5, $trailer);
} elsif ($trailer =~ /^chk sha1 (\w+)/) {
my $checksum = $sha1->hexdigest;
if ($checksum ne $1) {
$self->log(5, "checksum error\n");
}
$f->{checksum} = $checksum;
} else {
$self->log(5, "unexpected trailer $trailer\n");
}
my $oldfile = $self->finddup($f);
if ($oldfile) {
unlink($backup_filename) or die "cannot unlink $backup_filename: $!";
link($oldfile, $backup_filename) or die "cannot link $oldfile to $backup_filename: $!";
$self->log(10, "linked (dup)");
} else {
$self->setmeta($f);
$self->log(10, "stored");
}
} else {
$self->log(5, "unexpected header $header\n");
}
}
$self->store_file($f);
} elsif ($f->{t} eq 'd') {
my $d = "$self->{this_backup}/$f->{name}";
$d =~ s,//+,/,g;
@ -198,6 +193,7 @@ sub backup2disk {
$self->db_record_version($target, $f);
}
$self->close_session();
$self->log(3, "finished backup for target host " . $target->{host} . " dir " . $target->{dir} . ": $count files");
}
sub parse {
@ -433,6 +429,18 @@ sub new_session {
sub close_session {
my ($self) = @_;
$self->{dbh}->do("update sessions set end_date=? where id=?", {}, time(), $self->{session_id});
if ($self->{file_pid}) {
close($self->{file_cfd});
close($self->{file_dfd});
$self->log(3, "waiting for $self->{file_pid}");
waitpid $self->{file_pid}, 0;
$self->log(3, "$self->{file_pid} terminated with status $?");
delete $self->{file_cfd};
delete $self->{file_dfd};
delete $self->{file_pid};
}
delete $self->{target};
}
sub get_last_session_id {
@ -483,6 +491,87 @@ sub finddup {
return;
}
=head2 store_file
store a file in the local filesystem. If the file appears to be unchanged since
the last backup, try to create a hard link. Otherwise, get the contents of the
file from the DA, and search for a file with the same contents (i.e., checksum)
and metadata, but possibly different name and try to link to that. If no link
can be created to an existing file, create a new one.
=cut
sub store_file {
my ($self, $f) = @_;
if($self->present($f)) {
link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}") or die; # XXX
$self->log(10, "linked");
} else {
# else request from da
unless ($self->{file_pid}) {
$self->{file_pid} = open2($self->{file_dfd}, $self->{file_cfd},
"/usr/bin/ssh",
"-l", "simba_da",
$self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (),
$self->{target}->{host}, "da");
}
$self->{file_cfd}->printflush("get $self->{target}->{dir}/$f->{name}\n"); # XXX - encode!
my $header = $self->{file_dfd}->getline; # this should be the same as $_ - check?
if ($header =~ /^data (.*)/) {
my $f2 = $self->parse($1);
my $backup_filename = "$self->{this_backup}/$f->{name}";
open(my $file_bfd, '>:raw', $backup_filename) or die "cannot open backup file $backup_filename: $!";
my $size = $f2->{s};
my $err;
my $sha1 = Digest::SHA1->new;
while ($size > 0) {
my $buffer;
my $rc = read($self->{file_dfd}, $buffer, min($size, $BUFSIZE));
if (!defined($rc)) {
# I/O error
$self->log(5, "error reading from data socket: $!");
last;
} elsif ($rc == 0) {
# premature EOF.
$self->log(5, "unexpected EOF reading from data socket");
last;
}
$file_bfd->print($buffer) or die "write to backup failed: $!";
$size -= length($buffer);
$sha1->add($buffer);
}
close($file_bfd) or die "write to backup failed: $!";
my $trailer = $self->{file_dfd}->getline; # should be empty line
$trailer = $self->{file_dfd}->getline;
if ($trailer =~ /^fail /) {
$self->log(5, $trailer);
} elsif ($trailer =~ /^chk sha1 (\w+)/) {
my $checksum = $sha1->hexdigest;
if ($checksum ne $1) {
$self->log(5, "checksum error\n");
}
$f->{checksum} = $checksum;
} else {
$self->log(5, "unexpected trailer $trailer\n");
}
my $oldfile = $self->finddup($f);
if ($oldfile) {
unlink($backup_filename) or die "cannot unlink $backup_filename: $!";
link($oldfile, $backup_filename) or die "cannot link $oldfile to $backup_filename: $!";
$self->log(10, "linked (dup)");
} else {
$self->setmeta($f);
$self->log(10, "stored");
}
} else {
$self->log(5, "unexpected header $header\n");
}
}
}
sub DESTROY {
my ($self) = @_;
$self->{dbh}->disconnect();