#!/usr/bin/perl

=head1 NAME

finddup - yet another tool for finding duplicates

=cut

use v5.10;
no autovivification 'fetch';
use File::Find;
use File::stat;
use File::Slurp;
use Digest::SHA qw(sha256_hex);

my ($needledir, $haystackdir) = @ARGV;

my $haystack;

find(sub {
        if (-f $_) {
            my $st = lstat($_);
            my $size = $st->size;
            my $hash = "";
            unless ($haystack->{$size}{$hash}) {
                $haystack->{$size}{$hash} = [];
            }
            push $haystack->{$size}{$hash}, { path => $File::Find::name, 
                                              size => $size,
                                              dev  => $st->dev,
                                              ino  => $st->ino,
                                            };
        }
    }, 
    $haystackdir
);

find(
    {
        wanted => sub {
                    if (-f $_) {
                        my $st = lstat($_);
                        my $size = $st->size;
                        my $found;
                        if ($haystack->{$size}) {
                            if ($haystack->{$size}{""}) {
                                for my $file (@{ $haystack->{$size}{""} }) {
                                    my $content = read_file($file->{path});
                                    my $hash = sha256_hex($content);
                                    unless ($haystack->{$size}{$hash}) {
                                        $haystack->{$size}{$hash} = [];
                                    }
                                    push $haystack->{$size}{$hash}, $file;
                                }
                                delete $haystack->{$size}{""};
                            }
                            my $content = read_file($_);
                            my $hash = sha256_hex($content);
                            if ($haystack->{$size}{$hash}) {
                                for my $file (@{ $haystack->{$size}{$hash} }) {
                                    unless ($st->dev == $file->{dev} && $st->ino == $file->{ino}) {
                                        say "$_ -> $file->{path}";
                                        $found = 1;
                                    }
                                }
                            }
                        }
                        unless ($found) {
                            say "$_ (NONE)";
                        }
                    }
                },
        no_chdir => 1,
    },
    $needledir
);

# vim: tw=132 sw=4 expandtab