Find duplicate files even if file name is different.
This commit is contained in:
parent
474ef9e7ef
commit
579b0a33b3
8
Notes
8
Notes
|
@ -63,11 +63,7 @@ Tape performance:
|
||||||
|
|
||||||
Equality checking doesn't work for setuid files.
|
Equality checking doesn't work for setuid files.
|
||||||
|
|
||||||
Implement equality checking via saved checksum.
|
|
||||||
|
|
||||||
checksum is null if file is linked. Is that ok? Can this be solved
|
|
||||||
together with the previous point?
|
|
||||||
|
|
||||||
exit if disk full
|
exit if disk full
|
||||||
|
|
||||||
Move prune list to config file.
|
On my 800 MHz PIII, the CPU usage is rather high. Some profiling seems
|
||||||
|
to be necessary (or I should get a faster backup server :-)).
|
||||||
|
|
|
@ -156,8 +156,15 @@ sub backup2disk {
|
||||||
} else {
|
} else {
|
||||||
$self->log(5, "unexpected trailer $trailer\n");
|
$self->log(5, "unexpected trailer $trailer\n");
|
||||||
}
|
}
|
||||||
$self->setmeta($f);
|
my $oldfile = $self->finddup($f);
|
||||||
$self->log(10, "stored");
|
if ($oldfile) {
|
||||||
|
unlink($backup_filename) or die "cannot unlink $backup_filename: $!";
|
||||||
|
link($oldfile, $backup_filename) or die "cannot link $oldfile to $backup_filename: $!";
|
||||||
|
$self->log(10, "linked (dup)");
|
||||||
|
} else {
|
||||||
|
$self->setmeta($f);
|
||||||
|
$self->log(10, "stored");
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
$self->log(5, "unexpected header $header\n");
|
$self->log(5, "unexpected header $header\n");
|
||||||
}
|
}
|
||||||
|
@ -421,6 +428,7 @@ sub close_session {
|
||||||
|
|
||||||
sub get_last_session_id {
|
sub get_last_session_id {
|
||||||
my ($self) = @_;
|
my ($self) = @_;
|
||||||
|
return unless $self->{last_backup};
|
||||||
my $sessions = $self->{dbh}->selectall_arrayref("select * from sessions where prefix=?",
|
my $sessions = $self->{dbh}->selectall_arrayref("select * from sessions where prefix=?",
|
||||||
{ Slice => {} },
|
{ Slice => {} },
|
||||||
$self->{last_backup});
|
$self->{last_backup});
|
||||||
|
@ -428,5 +436,49 @@ sub get_last_session_id {
|
||||||
return $sessions->[0]{id};
|
return $sessions->[0]{id};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
=head2 finddup
|
||||||
|
|
||||||
|
Find a duplicate of the current file in the database. This is useful if you
|
||||||
|
have multiple copies of a file stored in different locations.
|
||||||
|
|
||||||
|
=cut
|
||||||
|
|
||||||
|
sub finddup {
|
||||||
|
my ($self, $f) = @_;
|
||||||
|
my $sth = $self->{dbh}->prepare("select * from versions, files, sessions
|
||||||
|
where file_type=? and file_size=? and file_mtime=?
|
||||||
|
and file_owner=? and file_group=? and file_acl=?
|
||||||
|
and file_unix_bits=?
|
||||||
|
and checksum=? and online=1
|
||||||
|
and versions.file=files.id and versions.session=sessions.id");
|
||||||
|
$sth->execute(
|
||||||
|
$f->{t}, $f->{s}, $f->{m},
|
||||||
|
$f->{o}, $f->{g}, $f->{acl},
|
||||||
|
join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)),
|
||||||
|
$f->{checksum}
|
||||||
|
);
|
||||||
|
while (my $r = $sth->fetchrow_hashref()) {
|
||||||
|
my $oldfile = "$r->{prefix}/$r->{path}";
|
||||||
|
if (my $st = lstat($oldfile)) {
|
||||||
|
if ($st->mtime == $f->{m} &&
|
||||||
|
$st->size == $f->{s} &&
|
||||||
|
$st->uid == $self->name2uid($f->{o}) &&
|
||||||
|
$st->gid == $self->name2gid($f->{g}) &&
|
||||||
|
($st->mode & 07777) == $self->acl2mode($f)
|
||||||
|
) {
|
||||||
|
$sth->finish();
|
||||||
|
return $oldfile;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub DESTROY {
|
||||||
|
my ($self) = @_;
|
||||||
|
$self->{dbh}->disconnect();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# vim: tw=0 expandtab
|
# vim: tw=0 expandtab
|
||||||
1;
|
1;
|
||||||
|
|
|
@ -6,7 +6,8 @@ use Test::More tests => 15;
|
||||||
|
|
||||||
BEGIN { use_ok( 'Simba::CA' ); }
|
BEGIN { use_ok( 'Simba::CA' ); }
|
||||||
|
|
||||||
my $ca = Simba::CA->new({ dbi_file => $ENV{SIMBA_DB_CONN} || "$ENV{HOME}/.dbi/simba"});
|
my $ca = Simba::CA->new({ dbi_file => $ENV{SIMBA_DB_CONN} ||
|
||||||
|
"$ENV{HOME}/.dbi/simba_test"});
|
||||||
ok($ca, 'new CA');
|
ok($ca, 'new CA');
|
||||||
|
|
||||||
my $uid;
|
my $uid;
|
||||||
|
|
|
@ -0,0 +1,61 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
use warnings;
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
# Live tests.
|
||||||
|
#
|
||||||
|
# These tests need to be run as simba_ca and it needs to be able to
|
||||||
|
# connect to simba_da@localhost.
|
||||||
|
|
||||||
|
use Test::More tests => 8;
|
||||||
|
|
||||||
|
use File::stat;
|
||||||
|
|
||||||
|
BEGIN { use_ok( 'Simba::CA' ); }
|
||||||
|
|
||||||
|
my $ca = Simba::CA->new({ dbi_file => $ENV{SIMBA_DB_CONN} ||
|
||||||
|
"$ENV{HOME}/.dbi/simba_test"});
|
||||||
|
|
||||||
|
ok($ca, 'new CA');
|
||||||
|
|
||||||
|
# make sure filesets contains test data then connect again:
|
||||||
|
$ca->{dbh}->do("delete from filesets");
|
||||||
|
$ca->{dbh}->do("insert into filesets(host, dir) values('localhost', '/var/tmp/simba_test')");
|
||||||
|
|
||||||
|
$ca = Simba::CA->new({ dbi_file => $ENV{SIMBA_DB_CONN} ||
|
||||||
|
"$ENV{HOME}/.dbi/simba_test"});
|
||||||
|
|
||||||
|
ok($ca, 'new CA 2');
|
||||||
|
|
||||||
|
|
||||||
|
SKIP: {
|
||||||
|
skip "not running as root", 1 unless $> == 0;
|
||||||
|
mkdir "/var/tmp/simba_test";
|
||||||
|
mkdir "/var/tmp/simba_test/d1";
|
||||||
|
mkdir "/var/tmp/simba_test/d2";
|
||||||
|
|
||||||
|
open my $fh, '>:raw', '/var/tmp/simba_test/d1/f1';
|
||||||
|
print $fh "test\n";
|
||||||
|
close($fh);
|
||||||
|
|
||||||
|
open $fh, '>:raw', '/var/tmp/simba_test/d2/f2';
|
||||||
|
print $fh "test\n";
|
||||||
|
close($fh);
|
||||||
|
|
||||||
|
$ca->run();
|
||||||
|
my $this_backup = $ca->{this_backup};
|
||||||
|
my $st1 = lstat("$this_backup/d1/f1");
|
||||||
|
ok($st1, "file 1 exists");
|
||||||
|
is($st1->nlink, 2, "file 1 has 2 links");
|
||||||
|
my $st2 = lstat("$this_backup/d2/f2");
|
||||||
|
ok($st2, "file 2 exists");
|
||||||
|
is($st2->nlink, 2, "file 2 has 2 links");
|
||||||
|
is($st1->ino, $st2->ino, , "file 1 and 2 are the same");
|
||||||
|
|
||||||
|
# cleanup
|
||||||
|
system("rm", "-rf", $this_backup);
|
||||||
|
$ca->{dbh}->do("delete from versions");
|
||||||
|
$ca->{dbh}->do("delete from files");
|
||||||
|
$ca->{dbh}->do("delete from filesets");
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue