Find duplicate files even if file name is different.
This commit is contained in:
parent
474ef9e7ef
commit
579b0a33b3
8
Notes
8
Notes
|
@ -63,11 +63,7 @@ Tape performance:
|
|||
|
||||
Equality checking doesn't work for setuid files.
|
||||
|
||||
Implement equality checking via saved checksum.
|
||||
|
||||
checksum is null if file is linked. Is that ok? Can this be solved
|
||||
together with the previous point?
|
||||
|
||||
exit if disk full
|
||||
|
||||
Move prune list to config file.
|
||||
On my 800 MHz PIII, the CPU usage is rather high. Some profiling seems
|
||||
to be necessary (or I should get a faster backup server :-)).
|
||||
|
|
|
@ -156,8 +156,15 @@ sub backup2disk {
|
|||
} else {
|
||||
$self->log(5, "unexpected trailer $trailer\n");
|
||||
}
|
||||
$self->setmeta($f);
|
||||
$self->log(10, "stored");
|
||||
my $oldfile = $self->finddup($f);
|
||||
if ($oldfile) {
|
||||
unlink($backup_filename) or die "cannot unlink $backup_filename: $!";
|
||||
link($oldfile, $backup_filename) or die "cannot link $oldfile to $backup_filename: $!";
|
||||
$self->log(10, "linked (dup)");
|
||||
} else {
|
||||
$self->setmeta($f);
|
||||
$self->log(10, "stored");
|
||||
}
|
||||
} else {
|
||||
$self->log(5, "unexpected header $header\n");
|
||||
}
|
||||
|
@ -421,6 +428,7 @@ sub close_session {
|
|||
|
||||
sub get_last_session_id {
|
||||
my ($self) = @_;
|
||||
return unless $self->{last_backup};
|
||||
my $sessions = $self->{dbh}->selectall_arrayref("select * from sessions where prefix=?",
|
||||
{ Slice => {} },
|
||||
$self->{last_backup});
|
||||
|
@ -428,5 +436,49 @@ sub get_last_session_id {
|
|||
return $sessions->[0]{id};
|
||||
}
|
||||
|
||||
=head2 finddup
|
||||
|
||||
Find a duplicate of the current file in the database. This is useful if you
|
||||
have multiple copies of a file stored in different locations.
|
||||
|
||||
=cut
|
||||
|
||||
sub finddup {
|
||||
my ($self, $f) = @_;
|
||||
my $sth = $self->{dbh}->prepare("select * from versions, files, sessions
|
||||
where file_type=? and file_size=? and file_mtime=?
|
||||
and file_owner=? and file_group=? and file_acl=?
|
||||
and file_unix_bits=?
|
||||
and checksum=? and online=1
|
||||
and versions.file=files.id and versions.session=sessions.id");
|
||||
$sth->execute(
|
||||
$f->{t}, $f->{s}, $f->{m},
|
||||
$f->{o}, $f->{g}, $f->{acl},
|
||||
join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)),
|
||||
$f->{checksum}
|
||||
);
|
||||
while (my $r = $sth->fetchrow_hashref()) {
|
||||
my $oldfile = "$r->{prefix}/$r->{path}";
|
||||
if (my $st = lstat($oldfile)) {
|
||||
if ($st->mtime == $f->{m} &&
|
||||
$st->size == $f->{s} &&
|
||||
$st->uid == $self->name2uid($f->{o}) &&
|
||||
$st->gid == $self->name2gid($f->{g}) &&
|
||||
($st->mode & 07777) == $self->acl2mode($f)
|
||||
) {
|
||||
$sth->finish();
|
||||
return $oldfile;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
sub DESTROY {
|
||||
my ($self) = @_;
|
||||
$self->{dbh}->disconnect();
|
||||
}
|
||||
|
||||
|
||||
# vim: tw=0 expandtab
|
||||
1;
|
||||
|
|
|
@ -6,7 +6,8 @@ use Test::More tests => 15;
|
|||
|
||||
BEGIN { use_ok( 'Simba::CA' ); }
|
||||
|
||||
my $ca = Simba::CA->new({ dbi_file => $ENV{SIMBA_DB_CONN} || "$ENV{HOME}/.dbi/simba"});
|
||||
my $ca = Simba::CA->new({ dbi_file => $ENV{SIMBA_DB_CONN} ||
|
||||
"$ENV{HOME}/.dbi/simba_test"});
|
||||
ok($ca, 'new CA');
|
||||
|
||||
my $uid;
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
#!/usr/bin/perl
|
||||
use warnings;
|
||||
use strict;
|
||||
|
||||
# Live tests.
|
||||
#
|
||||
# These tests need to be run as simba_ca and it needs to be able to
|
||||
# connect to simba_da@localhost.
|
||||
|
||||
use Test::More tests => 8;
|
||||
|
||||
use File::stat;
|
||||
|
||||
BEGIN { use_ok( 'Simba::CA' ); }
|
||||
|
||||
my $ca = Simba::CA->new({ dbi_file => $ENV{SIMBA_DB_CONN} ||
|
||||
"$ENV{HOME}/.dbi/simba_test"});
|
||||
|
||||
ok($ca, 'new CA');
|
||||
|
||||
# make sure filesets contains test data then connect again:
|
||||
$ca->{dbh}->do("delete from filesets");
|
||||
$ca->{dbh}->do("insert into filesets(host, dir) values('localhost', '/var/tmp/simba_test')");
|
||||
|
||||
$ca = Simba::CA->new({ dbi_file => $ENV{SIMBA_DB_CONN} ||
|
||||
"$ENV{HOME}/.dbi/simba_test"});
|
||||
|
||||
ok($ca, 'new CA 2');
|
||||
|
||||
|
||||
SKIP: {
|
||||
skip "not running as root", 1 unless $> == 0;
|
||||
mkdir "/var/tmp/simba_test";
|
||||
mkdir "/var/tmp/simba_test/d1";
|
||||
mkdir "/var/tmp/simba_test/d2";
|
||||
|
||||
open my $fh, '>:raw', '/var/tmp/simba_test/d1/f1';
|
||||
print $fh "test\n";
|
||||
close($fh);
|
||||
|
||||
open $fh, '>:raw', '/var/tmp/simba_test/d2/f2';
|
||||
print $fh "test\n";
|
||||
close($fh);
|
||||
|
||||
$ca->run();
|
||||
my $this_backup = $ca->{this_backup};
|
||||
my $st1 = lstat("$this_backup/d1/f1");
|
||||
ok($st1, "file 1 exists");
|
||||
is($st1->nlink, 2, "file 1 has 2 links");
|
||||
my $st2 = lstat("$this_backup/d2/f2");
|
||||
ok($st2, "file 2 exists");
|
||||
is($st2->nlink, 2, "file 2 has 2 links");
|
||||
is($st1->ino, $st2->ino, , "file 1 and 2 are the same");
|
||||
|
||||
# cleanup
|
||||
system("rm", "-rf", $this_backup);
|
||||
$ca->{dbh}->do("delete from versions");
|
||||
$ca->{dbh}->do("delete from files");
|
||||
$ca->{dbh}->do("delete from filesets");
|
||||
|
||||
}
|
Loading…
Reference in New Issue