Rename finddup to finddup2 and add new finddup for searching one directory

2015-01-16 11:31:56 +01:00 · 2015-01-16 11:31:56 +01:00 · bd73933105
parent 0ad0e43211
commit bd73933105
2 changed files with 112 additions and 45 deletions
--- a/59
+++ b/59
@ -13,66 +13,55 @@ use File::stat;
 use File::Slurp;
 use Digest::SHA qw(sha256_hex);
-my ($needledir, $haystackdir) = @ARGV;
+my ($dir) = @ARGV;
-my $haystack;
+my $files;
 find(sub {
        if (-f $_) {
            my $st = lstat($_);
            my $size = $st->size;
            my $hash = "";
-            unless ($haystack->{$size}{$hash}) {
+            unless ($files->{$size}{$hash}) {
-                $haystack->{$size}{$hash} = [];
+                $files->{$size}{$hash} = [];
            }
-            push $haystack->{$size}{$hash}, { path => $File::Find::name, 
+            push $files->{$size}{$hash}, { path => $File::Find::name, 
                                              size => $size,
                                              dev  => $st->dev,
                                              ino  => $st->ino,
                                            };
        }
    }, 
-    $haystackdir
+    $filesdir
 );
-find(
+for my $size (keys $files) {
-    {
+    if (@{ $files->{$size}{""} } == 1) {
-        wanted => sub {
+        next;
-                    if (-f $_) {
+    }
-                        my $st = lstat($_);
+    if ($files->{$size}{""}) {
-                        my $size = $st->size;
+        for my $file (@{ $files->{$size}{""} }) {
                        my $found;
                        if ($haystack->{$size}) {
                            if ($haystack->{$size}{""}) {
                                for my $file (@{ $haystack->{$size}{""} }) {
            my $content = read_file($file->{path});
            my $hash = sha256_hex($content);
-                                    unless ($haystack->{$size}{$hash}) {
+            unless ($files->{$size}{$hash}) {
-                                        $haystack->{$size}{$hash} = [];
+                $files->{$size}{$hash} = [];
            }
-                                    push $haystack->{$size}{$hash}, $file;
+            push $files->{$size}{$hash}, $file;
        }
-                                delete $haystack->{$size}{""};
+        delete $files->{$size}{""};
    }
-                            my $content = read_file($_);
+    for my $hash (keys $files->{$size}) {
-                            my $hash = sha256_hex($content);
+        if (@{ $files->{$size}{$hash} } > 1) {
-                            if ($haystack->{$size}{$hash}) {
+            for my $file (@{ $files->{$size}{$hash} }) {
                                for my $file (@{ $haystack->{$size}{$hash} }) {
                unless ($st->dev == $file->{dev} && $st->ino == $file->{ino}) {
-                                        say "$_ -> $file->{path}";
+                    say "$file->{path}";
                                        $found = 1;
                }
            }
            say "";
        }
    }
-                        unless ($found) {
+}
-                            say "$_ (NONE)";
+
                        }
                    }
                },
        no_chdir => 1,
    },
    $needledir
 );
 # vim: tw=132 sw=4 expandtab
--- a/78
+++ b/78
@ -0,0 +1,78 @@
 #!/usr/bin/perl
 =head1 NAME
 finddup - yet another tool for finding duplicates
 =cut
 use v5.10;
 no autovivification 'fetch';
 use File::Find;
 use File::stat;
 use File::Slurp;
 use Digest::SHA qw(sha256_hex);
 my ($needledir, $haystackdir) = @ARGV;
 my $haystack;
 find(sub {
        if (-f $_) {
            my $st = lstat($_);
            my $size = $st->size;
            my $hash = "";
            unless ($haystack->{$size}{$hash}) {
                $haystack->{$size}{$hash} = [];
            }
            push $haystack->{$size}{$hash}, { path => $File::Find::name, 
                                              size => $size,
                                              dev  => $st->dev,
                                              ino  => $st->ino,
                                            };
        }
    }, 
    $haystackdir
 );
 find(
    {
        wanted => sub {
                    if (-f $_) {
                        my $st = lstat($_);
                        my $size = $st->size;
                        my $found;
                        if ($haystack->{$size}) {
                            if ($haystack->{$size}{""}) {
                                for my $file (@{ $haystack->{$size}{""} }) {
                                    my $content = read_file($file->{path});
                                    my $hash = sha256_hex($content);
                                    unless ($haystack->{$size}{$hash}) {
                                        $haystack->{$size}{$hash} = [];
                                    }
                                    push $haystack->{$size}{$hash}, $file;
                                }
                                delete $haystack->{$size}{""};
                            }
                            my $content = read_file($_);
                            my $hash = sha256_hex($content);
                            if ($haystack->{$size}{$hash}) {
                                for my $file (@{ $haystack->{$size}{$hash} }) {
                                    unless ($st->dev == $file->{dev} && $st->ino == $file->{ino}) {
                                        say "$_ -> $file->{path}";
                                        $found = 1;
                                    }
                                }
                            }
                        }
                        unless ($found) {
                            say "$_ (NONE)";
                        }
                    }
                },
        no_chdir => 1,
    },
    $needledir
 );
 # vim: tw=132 sw=4 expandtab