#!/usr/bin/perl =head1 NAME finddup - yet another tool for finding duplicates =cut use v5.10; no autovivification 'fetch'; use File::Find; use File::stat; use File::Slurp; use Digest::SHA qw(sha256_hex); my ($needledir, $haystackdir) = @ARGV; my $haystack; find(sub { if (-f $_) { my $st = lstat($_); my $size = $st->size; my $hash = ""; unless ($haystack->{$size}{$hash}) { $haystack->{$size}{$hash} = []; } push $haystack->{$size}{$hash}, { path => $File::Find::name, size => $size, dev => $st->dev, ino => $st->ino, }; } }, $haystackdir ); find( { wanted => sub { if (-f $_) { my $st = lstat($_); my $size = $st->size; my $found; if ($haystack->{$size}) { if ($haystack->{$size}{""}) { for my $file (@{ $haystack->{$size}{""} }) { my $content = read_file($file->{path}); my $hash = sha256_hex($content); unless ($haystack->{$size}{$hash}) { $haystack->{$size}{$hash} = []; } push $haystack->{$size}{$hash}, $file; } delete $haystack->{$size}{""}; } my $content = read_file($_); my $hash = sha256_hex($content); if ($haystack->{$size}{$hash}) { for my $file (@{ $haystack->{$size}{$hash} }) { unless ($st->dev == $file->{dev} && $st->ino == $file->{ino}) { say "$_ -> $file->{path}"; $found = 1; } } } } unless ($found) { say "$_ (NONE)"; } } }, no_chdir => 1, }, $needledir ); # vim: tw=132 sw=4 expandtab