diff --git a/Notes b/Notes index 45ff579..47a90d7 100644 --- a/Notes +++ b/Notes @@ -63,11 +63,7 @@ Tape performance: Equality checking doesn't work for setuid files. -Implement equality checking via saved checksum. - -checksum is null if file is linked. Is that ok? Can this be solved -together with the previous point? - exit if disk full -Move prune list to config file. +On my 800 MHz PIII, the CPU usage is rather high. Some profiling seems +to be necessary (or I should get a faster backup server :-)). diff --git a/lib/Simba/CA.pm b/lib/Simba/CA.pm index 57f6778..123e218 100644 --- a/lib/Simba/CA.pm +++ b/lib/Simba/CA.pm @@ -156,8 +156,15 @@ sub backup2disk { } else { $self->log(5, "unexpected trailer $trailer\n"); } - $self->setmeta($f); - $self->log(10, "stored"); + my $oldfile = $self->finddup($f); + if ($oldfile) { + unlink($backup_filename) or die "cannot unlink $backup_filename: $!"; + link($oldfile, $backup_filename) or die "cannot link $oldfile to $backup_filename: $!"; + $self->log(10, "linked (dup)"); + } else { + $self->setmeta($f); + $self->log(10, "stored"); + } } else { $self->log(5, "unexpected header $header\n"); } @@ -421,6 +428,7 @@ sub close_session { sub get_last_session_id { my ($self) = @_; + return unless $self->{last_backup}; my $sessions = $self->{dbh}->selectall_arrayref("select * from sessions where prefix=?", { Slice => {} }, $self->{last_backup}); @@ -428,5 +436,49 @@ sub get_last_session_id { return $sessions->[0]{id}; } +=head2 finddup + +Find a duplicate of the current file in the database. This is useful if you +have multiple copies of a file stored in different locations. + +=cut + +sub finddup { + my ($self, $f) = @_; + my $sth = $self->{dbh}->prepare("select * from versions, files, sessions + where file_type=? and file_size=? and file_mtime=? + and file_owner=? and file_group=? and file_acl=? + and file_unix_bits=? + and checksum=? and online=1 + and versions.file=files.id and versions.session=sessions.id"); + $sth->execute( + $f->{t}, $f->{s}, $f->{m}, + $f->{o}, $f->{g}, $f->{acl}, + join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)), + $f->{checksum} + ); + while (my $r = $sth->fetchrow_hashref()) { + my $oldfile = "$r->{prefix}/$r->{path}"; + if (my $st = lstat($oldfile)) { + if ($st->mtime == $f->{m} && + $st->size == $f->{s} && + $st->uid == $self->name2uid($f->{o}) && + $st->gid == $self->name2gid($f->{g}) && + ($st->mode & 07777) == $self->acl2mode($f) + ) { + $sth->finish(); + return $oldfile; + } + } + } + return; +} + +sub DESTROY { + my ($self) = @_; + $self->{dbh}->disconnect(); +} + + # vim: tw=0 expandtab 1; diff --git a/t/01_ca.t b/t/01_ca.t index c1fa9fe..c755478 100644 --- a/t/01_ca.t +++ b/t/01_ca.t @@ -6,7 +6,8 @@ use Test::More tests => 15; BEGIN { use_ok( 'Simba::CA' ); } -my $ca = Simba::CA->new({ dbi_file => $ENV{SIMBA_DB_CONN} || "$ENV{HOME}/.dbi/simba"}); +my $ca = Simba::CA->new({ dbi_file => $ENV{SIMBA_DB_CONN} || + "$ENV{HOME}/.dbi/simba_test"}); ok($ca, 'new CA'); my $uid; diff --git a/t/02_ca.t b/t/02_ca.t new file mode 100644 index 0000000..082faf2 --- /dev/null +++ b/t/02_ca.t @@ -0,0 +1,61 @@ +#!/usr/bin/perl +use warnings; +use strict; + +# Live tests. +# +# These tests need to be run as simba_ca and it needs to be able to +# connect to simba_da@localhost. + +use Test::More tests => 8; + +use File::stat; + +BEGIN { use_ok( 'Simba::CA' ); } + +my $ca = Simba::CA->new({ dbi_file => $ENV{SIMBA_DB_CONN} || + "$ENV{HOME}/.dbi/simba_test"}); + +ok($ca, 'new CA'); + +# make sure filesets contains test data then connect again: +$ca->{dbh}->do("delete from filesets"); +$ca->{dbh}->do("insert into filesets(host, dir) values('localhost', '/var/tmp/simba_test')"); + +$ca = Simba::CA->new({ dbi_file => $ENV{SIMBA_DB_CONN} || + "$ENV{HOME}/.dbi/simba_test"}); + +ok($ca, 'new CA 2'); + + +SKIP: { + skip "not running as root", 1 unless $> == 0; + mkdir "/var/tmp/simba_test"; + mkdir "/var/tmp/simba_test/d1"; + mkdir "/var/tmp/simba_test/d2"; + + open my $fh, '>:raw', '/var/tmp/simba_test/d1/f1'; + print $fh "test\n"; + close($fh); + + open $fh, '>:raw', '/var/tmp/simba_test/d2/f2'; + print $fh "test\n"; + close($fh); + + $ca->run(); + my $this_backup = $ca->{this_backup}; + my $st1 = lstat("$this_backup/d1/f1"); + ok($st1, "file 1 exists"); + is($st1->nlink, 2, "file 1 has 2 links"); + my $st2 = lstat("$this_backup/d2/f2"); + ok($st2, "file 2 exists"); + is($st2->nlink, 2, "file 2 has 2 links"); + is($st1->ino, $st2->ino, , "file 1 and 2 are the same"); + + # cleanup + system("rm", "-rf", $this_backup); + $ca->{dbh}->do("delete from versions"); + $ca->{dbh}->do("delete from files"); + $ca->{dbh}->do("delete from filesets"); + +}