Refactoring: Moved code to store a regular file in a sub of its own,
but don't change the logic yet. Put info about file related subprocesses into $self so that this can be cleaned up after the session ends. Added a few log messages.
This commit is contained in:
parent
446c3a6fdc
commit
c42aeb5aee
225
lib/Simba/CA.pm
225
lib/Simba/CA.pm
|
@ -1,4 +1,60 @@
|
||||||
#!/usr/bin/perl
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
=head1 NAME
|
||||||
|
|
||||||
|
Simba::CA
|
||||||
|
|
||||||
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
|
Collecting Agent of the Simba backup system.
|
||||||
|
|
||||||
|
This class represents one instance of a running collecting agent.
|
||||||
|
The only user-callable methods are the constructor new and the instance
|
||||||
|
method run, which collects all the files from various disk agents.
|
||||||
|
|
||||||
|
The Simba::CA package is a hashref with the following keys:
|
||||||
|
|
||||||
|
=over
|
||||||
|
|
||||||
|
=item basedir
|
||||||
|
|
||||||
|
=item unknown_uid
|
||||||
|
|
||||||
|
=item unknown_gid
|
||||||
|
|
||||||
|
=item fh_log
|
||||||
|
|
||||||
|
=item log_level
|
||||||
|
|
||||||
|
=item dbh
|
||||||
|
|
||||||
|
=item targets
|
||||||
|
|
||||||
|
=item ssh_id_file
|
||||||
|
|
||||||
|
=item target
|
||||||
|
|
||||||
|
=item last_backup
|
||||||
|
|
||||||
|
=item last_backup_id
|
||||||
|
|
||||||
|
=item timestamp
|
||||||
|
|
||||||
|
=item this_backup
|
||||||
|
|
||||||
|
=item session_id
|
||||||
|
|
||||||
|
=item file_pid
|
||||||
|
|
||||||
|
=item file_cfd
|
||||||
|
|
||||||
|
=item file_dfd
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
|
||||||
package Simba::CA;
|
package Simba::CA;
|
||||||
use strict;
|
use strict;
|
||||||
use warnings;
|
use warnings;
|
||||||
|
@ -84,6 +140,9 @@ sub run {
|
||||||
sub backup2disk {
|
sub backup2disk {
|
||||||
my ($self, $target) = @_;
|
my ($self, $target) = @_;
|
||||||
|
|
||||||
|
$self->log(3, "starting backup for target host " . $target->{host} . " dir " . $target->{dir});
|
||||||
|
$self->{target} = $target;
|
||||||
|
|
||||||
# get previous generation
|
# get previous generation
|
||||||
my @dirs = glob($self->{basedir} . '/????-??-??T??.??.??/' . $target->{host} . '/' . $target->{dir});
|
my @dirs = glob($self->{basedir} . '/????-??-??T??.??.??/' . $target->{host} . '/' . $target->{dir});
|
||||||
|
|
||||||
|
@ -96,7 +155,6 @@ sub backup2disk {
|
||||||
$self->new_session();
|
$self->new_session();
|
||||||
|
|
||||||
my ($list_pid, $list_cfd, $list_dfd); # connection to get list of files
|
my ($list_pid, $list_cfd, $list_dfd); # connection to get list of files
|
||||||
my ($file_pid, $file_cfd, $file_dfd); # connection to get content of files
|
|
||||||
$list_pid = open2($list_dfd, $list_cfd,
|
$list_pid = open2($list_dfd, $list_cfd,
|
||||||
"/usr/bin/ssh",
|
"/usr/bin/ssh",
|
||||||
"-l", "simba_da",
|
"-l", "simba_da",
|
||||||
|
@ -104,80 +162,17 @@ sub backup2disk {
|
||||||
$target->{host}, "da");
|
$target->{host}, "da");
|
||||||
$list_cfd->printflush("list $target->{dir}\n"); # XXX - encode!
|
$list_cfd->printflush("list $target->{dir}\n"); # XXX - encode!
|
||||||
close($list_cfd);
|
close($list_cfd);
|
||||||
|
my $count = 0;
|
||||||
while (<$list_dfd>) {
|
while (<$list_dfd>) {
|
||||||
|
$count++;
|
||||||
chomp;
|
chomp;
|
||||||
$self->log(10, "file: $_");
|
$self->log(10, "file: $_");
|
||||||
# split into fields
|
# split into fields
|
||||||
chomp;
|
chomp;
|
||||||
my $f = $self->parse($_);
|
my $f = $self->parse($_);
|
||||||
# if file is already present
|
|
||||||
if ($f->{t} eq 'f') {
|
if ($f->{t} eq 'f') {
|
||||||
if($self->present($f)) {
|
$self->store_file($f);
|
||||||
link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}") or die; # XXX
|
|
||||||
$self->log(10, "linked");
|
|
||||||
} else {
|
|
||||||
|
|
||||||
# else request from da
|
|
||||||
unless ($file_pid) {
|
|
||||||
$file_pid = open2($file_dfd, $file_cfd,
|
|
||||||
"/usr/bin/ssh",
|
|
||||||
"-l", "simba_da",
|
|
||||||
$self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (),
|
|
||||||
$target->{host}, "da");
|
|
||||||
}
|
|
||||||
$file_cfd->printflush("get $target->{dir}/$f->{name}\n"); # XXX - encode!
|
|
||||||
my $header = <$file_dfd>; # this should be the same as $_ - check?
|
|
||||||
if ($header =~ /^data (.*)/) {
|
|
||||||
my $f2 = $self->parse($1);
|
|
||||||
my $backup_filename = "$self->{this_backup}/$f->{name}";
|
|
||||||
open(my $file_bfd, '>:raw', $backup_filename) or die "cannot open backup file $backup_filename: $!";
|
|
||||||
my $size = $f2->{s};
|
|
||||||
my $err;
|
|
||||||
my $sha1 = Digest::SHA1->new;
|
|
||||||
|
|
||||||
while ($size > 0) {
|
|
||||||
my $buffer;
|
|
||||||
my $rc = read($file_dfd, $buffer, min($size, $BUFSIZE));
|
|
||||||
if (!defined($rc)) {
|
|
||||||
# I/O error
|
|
||||||
$self->log(5, "error reading from data socket: $!");
|
|
||||||
last;
|
|
||||||
} elsif ($rc == 0) {
|
|
||||||
# premature EOF.
|
|
||||||
$self->log(5, "unexpected EOF reading from data socket");
|
|
||||||
last;
|
|
||||||
}
|
|
||||||
$file_bfd->print($buffer) or die "write to backup failed: $!";
|
|
||||||
$size -= length($buffer);
|
|
||||||
$sha1->add($buffer);
|
|
||||||
}
|
|
||||||
close($file_bfd) or die "write to backup failed: $!";
|
|
||||||
my $trailer = <$file_dfd>; # should be empty line
|
|
||||||
$trailer = <$file_dfd>;
|
|
||||||
if ($trailer =~ /^fail /) {
|
|
||||||
$self->log(5, $trailer);
|
|
||||||
} elsif ($trailer =~ /^chk sha1 (\w+)/) {
|
|
||||||
my $checksum = $sha1->hexdigest;
|
|
||||||
if ($checksum ne $1) {
|
|
||||||
$self->log(5, "checksum error\n");
|
|
||||||
}
|
|
||||||
$f->{checksum} = $checksum;
|
|
||||||
} else {
|
|
||||||
$self->log(5, "unexpected trailer $trailer\n");
|
|
||||||
}
|
|
||||||
my $oldfile = $self->finddup($f);
|
|
||||||
if ($oldfile) {
|
|
||||||
unlink($backup_filename) or die "cannot unlink $backup_filename: $!";
|
|
||||||
link($oldfile, $backup_filename) or die "cannot link $oldfile to $backup_filename: $!";
|
|
||||||
$self->log(10, "linked (dup)");
|
|
||||||
} else {
|
|
||||||
$self->setmeta($f);
|
|
||||||
$self->log(10, "stored");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
$self->log(5, "unexpected header $header\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} elsif ($f->{t} eq 'd') {
|
} elsif ($f->{t} eq 'd') {
|
||||||
my $d = "$self->{this_backup}/$f->{name}";
|
my $d = "$self->{this_backup}/$f->{name}";
|
||||||
$d =~ s,//+,/,g;
|
$d =~ s,//+,/,g;
|
||||||
|
@ -198,6 +193,7 @@ sub backup2disk {
|
||||||
$self->db_record_version($target, $f);
|
$self->db_record_version($target, $f);
|
||||||
}
|
}
|
||||||
$self->close_session();
|
$self->close_session();
|
||||||
|
$self->log(3, "finished backup for target host " . $target->{host} . " dir " . $target->{dir} . ": $count files");
|
||||||
}
|
}
|
||||||
|
|
||||||
sub parse {
|
sub parse {
|
||||||
|
@ -433,6 +429,18 @@ sub new_session {
|
||||||
sub close_session {
|
sub close_session {
|
||||||
my ($self) = @_;
|
my ($self) = @_;
|
||||||
$self->{dbh}->do("update sessions set end_date=? where id=?", {}, time(), $self->{session_id});
|
$self->{dbh}->do("update sessions set end_date=? where id=?", {}, time(), $self->{session_id});
|
||||||
|
if ($self->{file_pid}) {
|
||||||
|
close($self->{file_cfd});
|
||||||
|
close($self->{file_dfd});
|
||||||
|
|
||||||
|
$self->log(3, "waiting for $self->{file_pid}");
|
||||||
|
waitpid $self->{file_pid}, 0;
|
||||||
|
$self->log(3, "$self->{file_pid} terminated with status $?");
|
||||||
|
delete $self->{file_cfd};
|
||||||
|
delete $self->{file_dfd};
|
||||||
|
delete $self->{file_pid};
|
||||||
|
}
|
||||||
|
delete $self->{target};
|
||||||
}
|
}
|
||||||
|
|
||||||
sub get_last_session_id {
|
sub get_last_session_id {
|
||||||
|
@ -483,6 +491,87 @@ sub finddup {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
=head2 store_file
|
||||||
|
|
||||||
|
store a file in the local filesystem. If the file appears to be unchanged since
|
||||||
|
the last backup, try to create a hard link. Otherwise, get the contents of the
|
||||||
|
file from the DA, and search for a file with the same contents (i.e., checksum)
|
||||||
|
and metadata, but possibly different name and try to link to that. If no link
|
||||||
|
can be created to an existing file, create a new one.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub store_file {
|
||||||
|
my ($self, $f) = @_;
|
||||||
|
|
||||||
|
if($self->present($f)) {
|
||||||
|
link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}") or die; # XXX
|
||||||
|
$self->log(10, "linked");
|
||||||
|
} else {
|
||||||
|
|
||||||
|
# else request from da
|
||||||
|
unless ($self->{file_pid}) {
|
||||||
|
$self->{file_pid} = open2($self->{file_dfd}, $self->{file_cfd},
|
||||||
|
"/usr/bin/ssh",
|
||||||
|
"-l", "simba_da",
|
||||||
|
$self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (),
|
||||||
|
$self->{target}->{host}, "da");
|
||||||
|
}
|
||||||
|
$self->{file_cfd}->printflush("get $self->{target}->{dir}/$f->{name}\n"); # XXX - encode!
|
||||||
|
my $header = $self->{file_dfd}->getline; # this should be the same as $_ - check?
|
||||||
|
if ($header =~ /^data (.*)/) {
|
||||||
|
my $f2 = $self->parse($1);
|
||||||
|
my $backup_filename = "$self->{this_backup}/$f->{name}";
|
||||||
|
open(my $file_bfd, '>:raw', $backup_filename) or die "cannot open backup file $backup_filename: $!";
|
||||||
|
my $size = $f2->{s};
|
||||||
|
my $err;
|
||||||
|
my $sha1 = Digest::SHA1->new;
|
||||||
|
|
||||||
|
while ($size > 0) {
|
||||||
|
my $buffer;
|
||||||
|
my $rc = read($self->{file_dfd}, $buffer, min($size, $BUFSIZE));
|
||||||
|
if (!defined($rc)) {
|
||||||
|
# I/O error
|
||||||
|
$self->log(5, "error reading from data socket: $!");
|
||||||
|
last;
|
||||||
|
} elsif ($rc == 0) {
|
||||||
|
# premature EOF.
|
||||||
|
$self->log(5, "unexpected EOF reading from data socket");
|
||||||
|
last;
|
||||||
|
}
|
||||||
|
$file_bfd->print($buffer) or die "write to backup failed: $!";
|
||||||
|
$size -= length($buffer);
|
||||||
|
$sha1->add($buffer);
|
||||||
|
}
|
||||||
|
close($file_bfd) or die "write to backup failed: $!";
|
||||||
|
my $trailer = $self->{file_dfd}->getline; # should be empty line
|
||||||
|
$trailer = $self->{file_dfd}->getline;
|
||||||
|
if ($trailer =~ /^fail /) {
|
||||||
|
$self->log(5, $trailer);
|
||||||
|
} elsif ($trailer =~ /^chk sha1 (\w+)/) {
|
||||||
|
my $checksum = $sha1->hexdigest;
|
||||||
|
if ($checksum ne $1) {
|
||||||
|
$self->log(5, "checksum error\n");
|
||||||
|
}
|
||||||
|
$f->{checksum} = $checksum;
|
||||||
|
} else {
|
||||||
|
$self->log(5, "unexpected trailer $trailer\n");
|
||||||
|
}
|
||||||
|
my $oldfile = $self->finddup($f);
|
||||||
|
if ($oldfile) {
|
||||||
|
unlink($backup_filename) or die "cannot unlink $backup_filename: $!";
|
||||||
|
link($oldfile, $backup_filename) or die "cannot link $oldfile to $backup_filename: $!";
|
||||||
|
$self->log(10, "linked (dup)");
|
||||||
|
} else {
|
||||||
|
$self->setmeta($f);
|
||||||
|
$self->log(10, "stored");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$self->log(5, "unexpected header $header\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
sub DESTROY {
|
sub DESTROY {
|
||||||
my ($self) = @_;
|
my ($self) = @_;
|
||||||
$self->{dbh}->disconnect();
|
$self->{dbh}->disconnect();
|
||||||
|
|
Loading…
Reference in New Issue