Rename finddup to finddup2 and add new finddup for searching one directory

This commit is contained in:
Peter J. Holzer 2015-01-16 11:31:56 +01:00
parent 0ad0e43211
commit bd73933105
2 changed files with 112 additions and 45 deletions

59
finddup Executable file → Normal file
View File

@ -13,66 +13,55 @@ use File::stat;
use File::Slurp; use File::Slurp;
use Digest::SHA qw(sha256_hex); use Digest::SHA qw(sha256_hex);
my ($needledir, $haystackdir) = @ARGV; my ($dir) = @ARGV;
my $haystack; my $files;
find(sub { find(sub {
if (-f $_) { if (-f $_) {
my $st = lstat($_); my $st = lstat($_);
my $size = $st->size; my $size = $st->size;
my $hash = ""; my $hash = "";
unless ($haystack->{$size}{$hash}) { unless ($files->{$size}{$hash}) {
$haystack->{$size}{$hash} = []; $files->{$size}{$hash} = [];
} }
push $haystack->{$size}{$hash}, { path => $File::Find::name, push $files->{$size}{$hash}, { path => $File::Find::name,
size => $size, size => $size,
dev => $st->dev, dev => $st->dev,
ino => $st->ino, ino => $st->ino,
}; };
} }
}, },
$haystackdir $filesdir
); );
find( for my $size (keys $files) {
{ if (@{ $files->{$size}{""} } == 1) {
wanted => sub { next;
if (-f $_) { }
my $st = lstat($_); if ($files->{$size}{""}) {
my $size = $st->size; for my $file (@{ $files->{$size}{""} }) {
my $found;
if ($haystack->{$size}) {
if ($haystack->{$size}{""}) {
for my $file (@{ $haystack->{$size}{""} }) {
my $content = read_file($file->{path}); my $content = read_file($file->{path});
my $hash = sha256_hex($content); my $hash = sha256_hex($content);
unless ($haystack->{$size}{$hash}) { unless ($files->{$size}{$hash}) {
$haystack->{$size}{$hash} = []; $files->{$size}{$hash} = [];
} }
push $haystack->{$size}{$hash}, $file; push $files->{$size}{$hash}, $file;
} }
delete $haystack->{$size}{""}; delete $files->{$size}{""};
} }
my $content = read_file($_); for my $hash (keys $files->{$size}) {
my $hash = sha256_hex($content); if (@{ $files->{$size}{$hash} } > 1) {
if ($haystack->{$size}{$hash}) { for my $file (@{ $files->{$size}{$hash} }) {
for my $file (@{ $haystack->{$size}{$hash} }) {
unless ($st->dev == $file->{dev} && $st->ino == $file->{ino}) { unless ($st->dev == $file->{dev} && $st->ino == $file->{ino}) {
say "$_ -> $file->{path}"; say "$file->{path}";
$found = 1;
} }
} }
say "";
} }
} }
unless ($found) { }
say "$_ (NONE)";
}
}
},
no_chdir => 1,
},
$needledir
);
# vim: tw=132 sw=4 expandtab # vim: tw=132 sw=4 expandtab

78
finddup2 Executable file
View File

@ -0,0 +1,78 @@
#!/usr/bin/perl
=head1 NAME
finddup - yet another tool for finding duplicates
=cut
use v5.10;
no autovivification 'fetch';
use File::Find;
use File::stat;
use File::Slurp;
use Digest::SHA qw(sha256_hex);
my ($needledir, $haystackdir) = @ARGV;
my $haystack;
find(sub {
if (-f $_) {
my $st = lstat($_);
my $size = $st->size;
my $hash = "";
unless ($haystack->{$size}{$hash}) {
$haystack->{$size}{$hash} = [];
}
push $haystack->{$size}{$hash}, { path => $File::Find::name,
size => $size,
dev => $st->dev,
ino => $st->ino,
};
}
},
$haystackdir
);
find(
{
wanted => sub {
if (-f $_) {
my $st = lstat($_);
my $size = $st->size;
my $found;
if ($haystack->{$size}) {
if ($haystack->{$size}{""}) {
for my $file (@{ $haystack->{$size}{""} }) {
my $content = read_file($file->{path});
my $hash = sha256_hex($content);
unless ($haystack->{$size}{$hash}) {
$haystack->{$size}{$hash} = [];
}
push $haystack->{$size}{$hash}, $file;
}
delete $haystack->{$size}{""};
}
my $content = read_file($_);
my $hash = sha256_hex($content);
if ($haystack->{$size}{$hash}) {
for my $file (@{ $haystack->{$size}{$hash} }) {
unless ($st->dev == $file->{dev} && $st->ino == $file->{ino}) {
say "$_ -> $file->{path}";
$found = 1;
}
}
}
}
unless ($found) {
say "$_ (NONE)";
}
}
},
no_chdir => 1,
},
$needledir
);
# vim: tw=132 sw=4 expandtab