From c42aeb5aee04f1b66ef0c4aa7fa414d6b19ea38b Mon Sep 17 00:00:00 2001 From: hjp Date: Sat, 5 Apr 2008 11:21:28 +0000 Subject: [PATCH] Refactoring: Moved code to store a regular file in a sub of its own, but don't change the logic yet. Put info about file related subprocesses into $self so that this can be cleaned up after the session ends. Added a few log messages. --- lib/Simba/CA.pm | 225 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 157 insertions(+), 68 deletions(-) diff --git a/lib/Simba/CA.pm b/lib/Simba/CA.pm index 2c5c8a9..2924b40 100644 --- a/lib/Simba/CA.pm +++ b/lib/Simba/CA.pm @@ -1,4 +1,60 @@ #!/usr/bin/perl + +=head1 NAME + +Simba::CA + +=head1 DESCRIPTION + +Collecting Agent of the Simba backup system. + +This class represents one instance of a running collecting agent. +The only user-callable methods are the constructor new and the instance +method run, which collects all the files from various disk agents. + +The Simba::CA package is a hashref with the following keys: + +=over + +=item basedir + +=item unknown_uid + +=item unknown_gid + +=item fh_log + +=item log_level + +=item dbh + +=item targets + +=item ssh_id_file + +=item target + +=item last_backup + +=item last_backup_id + +=item timestamp + +=item this_backup + +=item session_id + +=item file_pid + +=item file_cfd + +=item file_dfd + +=back + +=cut + + package Simba::CA; use strict; use warnings; @@ -84,6 +140,9 @@ sub run { sub backup2disk { my ($self, $target) = @_; + $self->log(3, "starting backup for target host " . $target->{host} . " dir " . $target->{dir}); + $self->{target} = $target; + # get previous generation my @dirs = glob($self->{basedir} . '/????-??-??T??.??.??/' . $target->{host} . '/' . $target->{dir}); @@ -96,7 +155,6 @@ sub backup2disk { $self->new_session(); my ($list_pid, $list_cfd, $list_dfd); # connection to get list of files - my ($file_pid, $file_cfd, $file_dfd); # connection to get content of files $list_pid = open2($list_dfd, $list_cfd, "/usr/bin/ssh", "-l", "simba_da", @@ -104,80 +162,17 @@ sub backup2disk { $target->{host}, "da"); $list_cfd->printflush("list $target->{dir}\n"); # XXX - encode! close($list_cfd); + my $count = 0; while (<$list_dfd>) { + $count++; chomp; $self->log(10, "file: $_"); # split into fields chomp; my $f = $self->parse($_); - # if file is already present + if ($f->{t} eq 'f') { - if($self->present($f)) { - link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}") or die; # XXX - $self->log(10, "linked"); - } else { - - # else request from da - unless ($file_pid) { - $file_pid = open2($file_dfd, $file_cfd, - "/usr/bin/ssh", - "-l", "simba_da", - $self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (), - $target->{host}, "da"); - } - $file_cfd->printflush("get $target->{dir}/$f->{name}\n"); # XXX - encode! - my $header = <$file_dfd>; # this should be the same as $_ - check? - if ($header =~ /^data (.*)/) { - my $f2 = $self->parse($1); - my $backup_filename = "$self->{this_backup}/$f->{name}"; - open(my $file_bfd, '>:raw', $backup_filename) or die "cannot open backup file $backup_filename: $!"; - my $size = $f2->{s}; - my $err; - my $sha1 = Digest::SHA1->new; - - while ($size > 0) { - my $buffer; - my $rc = read($file_dfd, $buffer, min($size, $BUFSIZE)); - if (!defined($rc)) { - # I/O error - $self->log(5, "error reading from data socket: $!"); - last; - } elsif ($rc == 0) { - # premature EOF. - $self->log(5, "unexpected EOF reading from data socket"); - last; - } - $file_bfd->print($buffer) or die "write to backup failed: $!"; - $size -= length($buffer); - $sha1->add($buffer); - } - close($file_bfd) or die "write to backup failed: $!"; - my $trailer = <$file_dfd>; # should be empty line - $trailer = <$file_dfd>; - if ($trailer =~ /^fail /) { - $self->log(5, $trailer); - } elsif ($trailer =~ /^chk sha1 (\w+)/) { - my $checksum = $sha1->hexdigest; - if ($checksum ne $1) { - $self->log(5, "checksum error\n"); - } - $f->{checksum} = $checksum; - } else { - $self->log(5, "unexpected trailer $trailer\n"); - } - my $oldfile = $self->finddup($f); - if ($oldfile) { - unlink($backup_filename) or die "cannot unlink $backup_filename: $!"; - link($oldfile, $backup_filename) or die "cannot link $oldfile to $backup_filename: $!"; - $self->log(10, "linked (dup)"); - } else { - $self->setmeta($f); - $self->log(10, "stored"); - } - } else { - $self->log(5, "unexpected header $header\n"); - } - } + $self->store_file($f); } elsif ($f->{t} eq 'd') { my $d = "$self->{this_backup}/$f->{name}"; $d =~ s,//+,/,g; @@ -198,6 +193,7 @@ sub backup2disk { $self->db_record_version($target, $f); } $self->close_session(); + $self->log(3, "finished backup for target host " . $target->{host} . " dir " . $target->{dir} . ": $count files"); } sub parse { @@ -433,6 +429,18 @@ sub new_session { sub close_session { my ($self) = @_; $self->{dbh}->do("update sessions set end_date=? where id=?", {}, time(), $self->{session_id}); + if ($self->{file_pid}) { + close($self->{file_cfd}); + close($self->{file_dfd}); + + $self->log(3, "waiting for $self->{file_pid}"); + waitpid $self->{file_pid}, 0; + $self->log(3, "$self->{file_pid} terminated with status $?"); + delete $self->{file_cfd}; + delete $self->{file_dfd}; + delete $self->{file_pid}; + } + delete $self->{target}; } sub get_last_session_id { @@ -483,6 +491,87 @@ sub finddup { return; } +=head2 store_file + +store a file in the local filesystem. If the file appears to be unchanged since +the last backup, try to create a hard link. Otherwise, get the contents of the +file from the DA, and search for a file with the same contents (i.e., checksum) +and metadata, but possibly different name and try to link to that. If no link +can be created to an existing file, create a new one. + +=cut + +sub store_file { + my ($self, $f) = @_; + + if($self->present($f)) { + link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}") or die; # XXX + $self->log(10, "linked"); + } else { + + # else request from da + unless ($self->{file_pid}) { + $self->{file_pid} = open2($self->{file_dfd}, $self->{file_cfd}, + "/usr/bin/ssh", + "-l", "simba_da", + $self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (), + $self->{target}->{host}, "da"); + } + $self->{file_cfd}->printflush("get $self->{target}->{dir}/$f->{name}\n"); # XXX - encode! + my $header = $self->{file_dfd}->getline; # this should be the same as $_ - check? + if ($header =~ /^data (.*)/) { + my $f2 = $self->parse($1); + my $backup_filename = "$self->{this_backup}/$f->{name}"; + open(my $file_bfd, '>:raw', $backup_filename) or die "cannot open backup file $backup_filename: $!"; + my $size = $f2->{s}; + my $err; + my $sha1 = Digest::SHA1->new; + + while ($size > 0) { + my $buffer; + my $rc = read($self->{file_dfd}, $buffer, min($size, $BUFSIZE)); + if (!defined($rc)) { + # I/O error + $self->log(5, "error reading from data socket: $!"); + last; + } elsif ($rc == 0) { + # premature EOF. + $self->log(5, "unexpected EOF reading from data socket"); + last; + } + $file_bfd->print($buffer) or die "write to backup failed: $!"; + $size -= length($buffer); + $sha1->add($buffer); + } + close($file_bfd) or die "write to backup failed: $!"; + my $trailer = $self->{file_dfd}->getline; # should be empty line + $trailer = $self->{file_dfd}->getline; + if ($trailer =~ /^fail /) { + $self->log(5, $trailer); + } elsif ($trailer =~ /^chk sha1 (\w+)/) { + my $checksum = $sha1->hexdigest; + if ($checksum ne $1) { + $self->log(5, "checksum error\n"); + } + $f->{checksum} = $checksum; + } else { + $self->log(5, "unexpected trailer $trailer\n"); + } + my $oldfile = $self->finddup($f); + if ($oldfile) { + unlink($backup_filename) or die "cannot unlink $backup_filename: $!"; + link($oldfile, $backup_filename) or die "cannot link $oldfile to $backup_filename: $!"; + $self->log(10, "linked (dup)"); + } else { + $self->setmeta($f); + $self->log(10, "stored"); + } + } else { + $self->log(5, "unexpected header $header\n"); + } + } +} + sub DESTROY { my ($self) = @_; $self->{dbh}->disconnect();