From bd7393310542e1de4b59f21259b4bd8d68424058 Mon Sep 17 00:00:00 2001 From: "Peter J. Holzer" Date: Fri, 16 Jan 2015 11:31:56 +0100 Subject: [PATCH] Rename finddup to finddup2 and add new finddup for searching one directory --- finddup | 79 ++++++++++++++++++++++++-------------------------------- finddup2 | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 45 deletions(-) mode change 100755 => 100644 finddup create mode 100755 finddup2 diff --git a/finddup b/finddup old mode 100755 new mode 100644 index 73a2b7a..5ed6a26 --- a/finddup +++ b/finddup @@ -13,66 +13,55 @@ use File::stat; use File::Slurp; use Digest::SHA qw(sha256_hex); -my ($needledir, $haystackdir) = @ARGV; +my ($dir) = @ARGV; -my $haystack; +my $files; find(sub { if (-f $_) { my $st = lstat($_); my $size = $st->size; my $hash = ""; - unless ($haystack->{$size}{$hash}) { - $haystack->{$size}{$hash} = []; + unless ($files->{$size}{$hash}) { + $files->{$size}{$hash} = []; } - push $haystack->{$size}{$hash}, { path => $File::Find::name, + push $files->{$size}{$hash}, { path => $File::Find::name, size => $size, dev => $st->dev, ino => $st->ino, }; } }, - $haystackdir + $filesdir ); -find( - { - wanted => sub { - if (-f $_) { - my $st = lstat($_); - my $size = $st->size; - my $found; - if ($haystack->{$size}) { - if ($haystack->{$size}{""}) { - for my $file (@{ $haystack->{$size}{""} }) { - my $content = read_file($file->{path}); - my $hash = sha256_hex($content); - unless ($haystack->{$size}{$hash}) { - $haystack->{$size}{$hash} = []; - } - push $haystack->{$size}{$hash}, $file; - } - delete $haystack->{$size}{""}; - } - my $content = read_file($_); - my $hash = sha256_hex($content); - if ($haystack->{$size}{$hash}) { - for my $file (@{ $haystack->{$size}{$hash} }) { - unless ($st->dev == $file->{dev} && $st->ino == $file->{ino}) { - say "$_ -> $file->{path}"; - $found = 1; - } - } - } - } - unless ($found) { - say "$_ (NONE)"; - } - } - }, - no_chdir => 1, - }, - $needledir -); +for my $size (keys $files) { + if (@{ $files->{$size}{""} } == 1) { + next; + } + if ($files->{$size}{""}) { + for my $file (@{ $files->{$size}{""} }) { + my $content = read_file($file->{path}); + my $hash = sha256_hex($content); + unless ($files->{$size}{$hash}) { + $files->{$size}{$hash} = []; + } + push $files->{$size}{$hash}, $file; + } + delete $files->{$size}{""}; + } + for my $hash (keys $files->{$size}) { + if (@{ $files->{$size}{$hash} } > 1) { + for my $file (@{ $files->{$size}{$hash} }) { + unless ($st->dev == $file->{dev} && $st->ino == $file->{ino}) { + say "$file->{path}"; + } + } + say ""; + } + } +} + # vim: tw=132 sw=4 expandtab + diff --git a/finddup2 b/finddup2 new file mode 100755 index 0000000..73a2b7a --- /dev/null +++ b/finddup2 @@ -0,0 +1,78 @@ +#!/usr/bin/perl + +=head1 NAME + +finddup - yet another tool for finding duplicates + +=cut + +use v5.10; +no autovivification 'fetch'; +use File::Find; +use File::stat; +use File::Slurp; +use Digest::SHA qw(sha256_hex); + +my ($needledir, $haystackdir) = @ARGV; + +my $haystack; + +find(sub { + if (-f $_) { + my $st = lstat($_); + my $size = $st->size; + my $hash = ""; + unless ($haystack->{$size}{$hash}) { + $haystack->{$size}{$hash} = []; + } + push $haystack->{$size}{$hash}, { path => $File::Find::name, + size => $size, + dev => $st->dev, + ino => $st->ino, + }; + } + }, + $haystackdir +); + +find( + { + wanted => sub { + if (-f $_) { + my $st = lstat($_); + my $size = $st->size; + my $found; + if ($haystack->{$size}) { + if ($haystack->{$size}{""}) { + for my $file (@{ $haystack->{$size}{""} }) { + my $content = read_file($file->{path}); + my $hash = sha256_hex($content); + unless ($haystack->{$size}{$hash}) { + $haystack->{$size}{$hash} = []; + } + push $haystack->{$size}{$hash}, $file; + } + delete $haystack->{$size}{""}; + } + my $content = read_file($_); + my $hash = sha256_hex($content); + if ($haystack->{$size}{$hash}) { + for my $file (@{ $haystack->{$size}{$hash} }) { + unless ($st->dev == $file->{dev} && $st->ino == $file->{ino}) { + say "$_ -> $file->{path}"; + $found = 1; + } + } + } + } + unless ($found) { + say "$_ (NONE)"; + } + } + }, + no_chdir => 1, + }, + $needledir +); + +# vim: tw=132 sw=4 expandtab