From 6cc428c1c9236e0e4b0b1b8dfa4604d1585ae07b Mon Sep 17 00:00:00 2001 From: hjp Date: Sun, 19 Nov 2006 23:00:08 +0000 Subject: [PATCH] Skeleton of rsync-like backup to disk is working. Relative paths are wrong, and permissions are missing. Database support is missing. --- Build.PL | 2 +- backup | 10 ++++ da | 4 +- lib/Simba/CA.pm | 137 ++++++++++++++++++++++++++++++++++++++++++---- lib/Simba/DA.pm | 105 +++++++++++++++++++---------------- lib/Simba/Util.pm | 35 ++++++++++++ t/00_da.t | 16 +++++- t/01_ca.t | 11 ++++ 8 files changed, 259 insertions(+), 61 deletions(-) create mode 100755 backup create mode 100644 lib/Simba/Util.pm create mode 100644 t/01_ca.t diff --git a/Build.PL b/Build.PL index caa8cea..184a15a 100644 --- a/Build.PL +++ b/Build.PL @@ -4,7 +4,7 @@ my $build = Module::Build->new module_name => 'Simba', license => 'perl', requires => { - Readonly => 0, + #Readonly => 0, Digest::SHA1 => 0, }, script_files => [ diff --git a/backup b/backup new file mode 100755 index 0000000..33239eb --- /dev/null +++ b/backup @@ -0,0 +1,10 @@ +#!/usr/bin/perl -T +use warnings; +use strict; +use Simba::CA; + +$ENV{PATH} = "/usr/bin"; + +my $ca = Simba::CA->new(); + +$ca->run(); diff --git a/da b/da index 9eaee0a..1794346 100755 --- a/da +++ b/da @@ -3,6 +3,8 @@ use warnings; use strict; use Simba::DA; -my $da = Simba::DA->new(); +open(my $log, '>>', '/var/log/simba/da.log'); +$log->autoflush(1); +my $da = Simba::DA->new({fh_log => $log}); $da->run(); diff --git a/lib/Simba/CA.pm b/lib/Simba/CA.pm index 027dce4..b63f4fb 100644 --- a/lib/Simba/CA.pm +++ b/lib/Simba/CA.pm @@ -1,7 +1,17 @@ +#!/usr/bin/perl package Simba::CA; +use strict; +use warnings; use Encode; use IPC::Open2; +use POSIX qw(strftime); +use Simba::Util qw(quote unquote typestr); +use Readonly; +use Digest::SHA1; +use List::Util qw(min); + +Readonly my $BUFSIZE => 128 * 1024; sub new { my ($class) = @_; @@ -11,8 +21,8 @@ sub new { $self->{basedir} = '/home/hjp/backup'; $self->{targets} = [ - { host => 'bernon.wsr.ac.at', dir => '/', } - { host => 'users.wsr.ac.at', dir => '/shares/user', } + { host => 'localhost', dir => '/home/hjp/test/', }, + # { host => 'users.wsr.ac.at', dir => '/shares/user', }, ]; return $self; @@ -32,13 +42,17 @@ sub backup2disk { my ($self, $target) = @_; # get previous generation - $self->{last_backup} = "..."; - $self->{this_backup} = "..."; + my @dirs = glob($self->{basedir} . '/????-??-??T??.??.??/' . $target->{host} . '/' . $target->{dir}); + + $self->{last_backup} = $dirs[-1]; + + my $timestamp = $self->{timestamp} || strftime('%Y-%m-%dT%H.%M.%S', localtime); + $self->{this_backup} = $self->{basedir} . "/$timestamp/" . $target->{host} . '/' . $target->{dir}; my ($list_pid, $list_cfd, $list_dfd); # connection to get list of files my ($file_pid, $file_cfd, $file_dfd); # connection to get content of files - $list_pid = open2(my $list_cfd, my $list_dfd, "ssh", "-l", "simba_da", $host, "da"); - $list_cfd->printflush("list $dir\n"); # XXX - encode! + $list_pid = open2($list_dfd, $list_cfd, "/usr/bin/ssh", "-l", "simba_da", $target->{host}, "da"); + $list_cfd->printflush("list $target->{dir}\n"); # XXX - encode! while (<$list_dfd>) { # split into fields my $f = $self->parse($_); @@ -48,14 +62,117 @@ sub backup2disk { link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}") or die; # XXX } else { - # else request from da - unless ($file_pid) { - $file_pid = open2(my $file_cfd, my $file_dfd, "ssh", "-l", "simba_da", $host, "da"); + # else request from da + unless ($file_pid) { + $file_pid = open2($file_dfd, $file_cfd, "/usr/bin/ssh", "-l", "simba_da", $target->{host}, "da"); + } + $file_cfd->printflush("get $target->{dir}/$f->{name}\n"); # XXX - encode! + my $header = <$file_dfd>; # this should be the same as $_ - check? + if ($header =~ /^data (.*)/) { + my $f2 = $self->parse($1); + open(my $file_bfd, '>:raw', "$self->{this_backup}/$f->{name}") or die; # XXX + my $size = $f2->{s}; + my $err; + my $sha1 = Digest::SHA1->new; + + while ($size > 0) { + my $buffer; + my $rc = read($file_dfd, $buffer, min($size, $BUFSIZE)); + if (!defined($rc)) { + # I/O error + print STDERR "error reading from data socket: $!"; + last; + } elsif ($rc == 0) { + # premature EOF. + print STDERR "unexpected EOF reading from data socket"; + last; + } + $file_bfd->print($buffer); + $size -= length($buffer); + $sha1->add($buffer); + } + my $trailer = <$file_dfd>; # should be empty line + $trailer = <$file_dfd>; + if ($trailer =~ /^fail /) { + print STDERR $trailer; + } elsif ($trailer =~ /^chk sha1 (\w+)/) { + if ($sha1->hexdigest ne $1) { + print STDERR "checksum error\n"; + } + } else { + print STDERR "unexpected trailer $trailer\n"; + } + } else { + print STDERR "unexpected header $header\n"; + } } - $file_cfd->printflush("get $dir\n"); # XXX - encode! + } elsif ($f->{t} eq 'd') { + my $d = "$self->{this_backup}/$f->{name}"; + $d =~ s,//+,/,g; + mkdir_p($d, 0700) or die "cannot mkdir $d: $!"; # XXX } else { # create local copy (or insert into DB only?) + print STDERR "ignored $_\n"; } # insert into DB. } } + +sub parse { + my ($self, $s) = @_; + + my @s = split(/ +/, $s); + my $f = {}; + $f->{name} = shift @s; + $f->{name} = $1 if ($f->{name} =~ /(.*)/); # detaint XXX + for (@s) { + my ($k, $v) = split(/=/, $_, 2); + $f->{$k} = $v; + # special processing for permissions etc, here? + } + $f->{o} = unquote($f->{o}); + $f->{g} = unquote($f->{g}); + $f->{acl} = unquote($f->{acl}); + return $f; + +} + +sub present { + my ($self, $f) = @_; + my $st; + if ($self->{last_backup} && + ($st = lstat("$self->{last_backup}/$f->{name}")) && + $st->mtime == $f->{m} && + $st->size == $f->{s} && + uid2name($st->uid) eq $f->{o} && + uid2name($st->gid) eq $f->{g} && + mode2acl($st->mode) eq $f->{acl} && + (($st->mode & 04000) == ($f->{setuid} || 0) * 04000) && + (($st->mode & 02000) == ($f->{setgid} || 0) * 02000) && + (($st->mode & 01000) == ($f->{sticky} || 0) * 01000) + ) { + return 1; + } else { + return 0; + } +} + +sub mkdir_p { + my ($dir, $perm) = @_; + $perm = 0777 unless(defined($perm)); + + if (-d $dir) { + return 1; + } elsif (mkdir($dir, $perm)) { + return 1; + } elsif ($!{ENOENT}) { + my $parentdir = $dir; + $parentdir =~ s|(.*)/.+|$1|; + mkdir_p($parentdir, $perm); + return mkdir($dir, $perm); + } else { + return undef; + } +} +# vim: tw=0 expandtab +1; diff --git a/lib/Simba/DA.pm b/lib/Simba/DA.pm index 900e466..dbde8ca 100644 --- a/lib/Simba/DA.pm +++ b/lib/Simba/DA.pm @@ -2,17 +2,21 @@ package Simba::DA; use strict; use warnings; -use Encode qw(decode encode from_to); +use Encode qw(decode encode); use File::Find; use File::stat; use Readonly; use Digest::SHA1; use List::Util qw(min); +use IO::Handle; +use Simba::Util qw(quote unquote typestr); +use POSIX qw(strftime); Readonly my $BUFSIZE => 128 * 1024; +#my $BUFSIZE = 128 * 1024; sub new { - my ($class) = @_; + my ($class, $opt) = @_; my $self = {}; bless $self, $class; @@ -38,13 +42,19 @@ sub new { '/tmp/hyre_be_dragones' => 1, }; $self->{charset} = 'utf-8'; + $self->{fh_out} = exists($opt->{fh_out}) ? $opt->{fh_out} : \*STDOUT; + $self->{fh_log} = exists($opt->{fh_log}) ? $opt->{fh_log} : \*STDERR; + $self->{log_level} = 99; + + die "PERL_UNICODE must not be set!" if $ENV{PERL_UNICODE}; return $self; } my %dispatch = ( - list => \&list, + list => \&list, + get => \&get, default => \&no_such_command, ); @@ -58,6 +68,7 @@ sub run { while (<>) { chomp; + $self->log(10, "received: $_"); my @cmd = split(); my $sub = $dispatch{$cmd[0]} || $dispatch{default}; $self->$sub(@cmd); @@ -67,6 +78,8 @@ sub run { sub list { my ($self, $cmd, $path) = @_; + my $fh_out = $self->{fh_out}; + find({ preprocess => sub { @@ -85,17 +98,18 @@ sub list { my $fn = decode($self->{charset}, $File::Find::name); $fn = quote($fn); - print $fn; + $fh_out->print($fn); - print metastr($fn, $st); + $fh_out->print(metastr($fn, $st)); - print "\n"; + $fh_out->print("\n"); }, no_chdir => 1, }, $path ); - + $fh_out->flush(); + $self->log(10, "$cmd done"); } sub permstr { @@ -133,33 +147,11 @@ sub gid2name { } } -sub quote { - my ($s) = @_; - $s =~ s{[\000-\040&=]}{sprintf("&#%d;", ord($&))}eg; - return encode('utf-8', $s); -} - -sub unquote { - my ($s) = @_; - $s = decode('utf-8', $s); - $s =~ s{&#(\d+);}{chr($1)}eg; - return $s; -} - -my @typestr = ( - #0 1 2 3 4 5 6 7 - '?', 'p', 'c', '?', 'd', '?', 'b', '?', - 'f', '?', 'l', '?', 's', '?', '?', '?', -); -sub typestr { - my ($mode) = @_; - $mode >>= 12; - return $typestr[$mode] || '?'; -} - sub get { my ($self, $cmd, $path) = @_; + my $fh_out = $self->{fh_out}; + # sanity checks on $path here? my $fn = encode($self->{charset}, unquote($path)); @@ -168,16 +160,16 @@ sub get { my $st = lstat($fn); if (!$st) { - print "fail $path ($!)\n"; + $fh_out->printflush("fail $path ($!)\n"); return; } if (typestr($st->mode) eq 'f') { my $fh; unless (open($fh, '<:raw', $fn)) { - print "fail $path ($!)\n"; + $fh_out->printflush("fail $path ($!)\n"); return; } - print "data $path ", metastr($fn, $st), "\n"; + $fh_out->print("data $path ", metastr($fn, $st), "\n"); my $size = $st->size; my $err; my $sha1 = Digest::SHA1->new; @@ -188,35 +180,37 @@ sub get { if (!defined($rc)) { # I/O error $err = $!; - print "\0" for (1 .. $size); + $fh_out->print("\0") for (1 .. $size); last; } elsif ($rc == 0) { # premature EOF. $err = "file shrunk by $size bytes"; - print "\0" for (1 .. $size); + $fh_out->print("\0") for (1 .. $size); last; } - print $buffer; + $fh_out->print($buffer); $size -= length($buffer); $sha1->add($buffer); } - print "\n"; + $fh_out->print("\n"); if ($err) { - print "fail ($err)\n"; + $fh_out->print("fail ($err)\n"); } else { - print "chk sha1 ", $sha1->hexdigest, "\n"; + $fh_out->print("chk sha1 ", $sha1->hexdigest, "\n"); } + $fh_out->flush(); + $self->log(10, "$fn done"); } elsif (typestr($st->mode) eq 'l') { my $target = readlink($fn); if (length($target) == $st->size) { - print "data $path ", metastr($fn, $st), "\n"; - print "$target\n"; - print "chk sha1 ", sha1_hex($target), "\n"; + $fh_out->print("data $path ", metastr($fn, $st), "\n"); + $fh_out->print("$target\n"); + $fh_out->print("chk sha1 ", sha1_hex($target), "\n"); } else { - print "fail ($!)\n"; + $fh_out->print("fail ($!)\n"); } } else { - print "nodata $path ", metastr($fn, $st), "\n"; + $fh_out->print("nodata $path ", metastr($fn, $st), "\n"); } } @@ -255,6 +249,25 @@ sub metastr { return $s; } +sub fh_out { + my ($self, $fh_out) = @_; + $self->{fh_out} = $fh_out if defined($fh_out); + return $self->{fh_out}; +} + +sub no_such_command { + my ($self, $cmd) = @_; + $self->{fh_out}->print("FAIL: no such command: $cmd\n"); + $self->log(5, "FAIL: no such command: $cmd"); +} + +sub log { + my ($self, $level, $msg) = @_; + if ($level <= $self->{log_level}) { + $self->{fh_log}->print(strftime("%Y-%m-%dT%H:%M:%S%z", localtime), " $$ [$level]: $msg\n"); + } +} + 1; -# vim: sw=4 expandtab +# vim: sw=4 expandtab tw=0 diff --git a/lib/Simba/Util.pm b/lib/Simba/Util.pm new file mode 100644 index 0000000..a519fae --- /dev/null +++ b/lib/Simba/Util.pm @@ -0,0 +1,35 @@ +#!/usr/bin/perl +package Simba::Util; + +use warnings; +use strict; +use Exporter qw( import ); +use Encode qw(decode encode ); + +our @EXPORT_OK = qw(quote unquote typestr); + +sub quote { + my ($s) = @_; + $s =~ s{[\000-\040&=]}{sprintf("&#%d;", ord($&))}eg; + return encode('utf-8', $s); +} + +sub unquote { + my ($s) = @_; + $s = decode('utf-8', $s); + $s =~ s{&#(\d+);}{chr($1)}eg; + return $s; +} + +my @typestr = ( + #0 1 2 3 4 5 6 7 + '?', 'p', 'c', '?', 'd', '?', 'b', '?', + 'f', '?', 'l', '?', 's', '?', '?', '?', +); +sub typestr { + my ($mode) = @_; + $mode >>= 12; + return $typestr[$mode] || '?'; +} + +1; diff --git a/t/00_da.t b/t/00_da.t index 81f507a..23ad78e 100644 --- a/t/00_da.t +++ b/t/00_da.t @@ -10,8 +10,9 @@ my $da = Simba::DA->new(); ok($da, 'new DA'); my $list; open(my $fh, '>', \$list); -select $fh; +$da->fh_out($fh); $da->list('list', 't/root'); +close($fh); ok($list); cmp_ok($list, '=~', qr{t/root .* t=d }, 'root is a directory'); cmp_ok($list, '=~', qr{t/root/test.txt .* t=f s=14 }, 'text file found'); @@ -19,7 +20,7 @@ cmp_ok($list, '=~', qr{t/root/test.bin .* t=f s=1024 }, 'binary file found'); my $result; open($fh, '>', \$result); -select $fh; +$da->fh_out($fh); $da->get('get', 't/root/test.txt'); ok($result, 'get returned something'); @@ -38,7 +39,7 @@ ok($trailer, 'trailer found'); is($trailer, 'chk sha1 e3b9312f5f7afbe0bfff5c49ab5e9a160b2b04f4', 'trailer contains correct checksum'); open($fh, '>', \$result); -select $fh; +$da->fh_out($fh); $da->get('get', 't/root/test.bin'); ($header, $content, $trailer) @@ -48,7 +49,16 @@ ok($header, 'header found'); cmp_ok($header, '=~', qr{t/root/test.bin .* t=f s=1024 }, 'binary file found'); ok($content, 'content found'); +open($fh, '>:raw', "t/tmp/test.$$.bin"); +print $fh $content; +close($fh); cmp_ok(length($content), '==', 1024, 'binary file is 1024 bytes long'); ok($trailer, 'trailer found'); is($trailer, 'chk sha1 97253f25fc3945cd6293e3dfad2a322041b14164', 'trailer contains correct checksum'); + +open($fh, '>', \$result); +$da->fh_out($fh); +$da->no_such_command('no_such_command'); +cmp_ok(length($result), '>', 0, 'invoking nonexistant command produces message'); + diff --git a/t/01_ca.t b/t/01_ca.t new file mode 100644 index 0000000..fba7b41 --- /dev/null +++ b/t/01_ca.t @@ -0,0 +1,11 @@ +#!/usr/bin/perl +use warnings; +use strict; + +use Test::More 'no_plan'; + +BEGIN { use_ok( 'Simba::CA' ); } + +my $ca = Simba::CA->new(); +ok($ca, 'new CA'); +