Compare commits

...

2 Commits

Author SHA1 Message Date
Peter J. Holzer 45095845fd Limit instances to deduplicate by id
The instances table is partitioned by id. If we restrinct the search to
recent ids we only have to search a few partitions which should be
faster.
2023-11-29 06:04:02 +01:00
Peter J. Holzer 118bb0a2e7 Fix pruning 2023-11-29 06:03:36 +01:00
2 changed files with 12 additions and 4 deletions

View File

@ -727,6 +727,9 @@ sub linkdup {
push @{ $self->{sessions_on_same_device} }, $r->{id}; # We only use the id
}
my $self->{min_instance_id} = $self->{dbh}->selectrow_array("select min(id) from instances where session in " . join(", ", map("?", @{ $self->{sessions_on_same_device} })) . ")");
$self->log(3, "min_instance_id set to $self->{min_instance_id}");
}
my $tdb0 = gettimeofday();
my $tdb1;
@ -735,8 +738,9 @@ sub linkdup {
and file_owner=? and file_group=? and file_acl=?
and file_unix_bits=?
and checksum=? and online=1
and instances.file=files.id
and versions2.id=instances.version
and instances.id >= ?
and instances.file=files.id
and instances.session=sessions.id
and sessions.id in (" . join(", ", map("?", @{ $self->{sessions_on_same_device} })) . ")" .
" order by instances.session desc
@ -746,6 +750,7 @@ sub linkdup {
$f->{o}, $f->{g}, $f->{acl},
join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)),
$f->{checksum},
$self->{min_instance_id},
@{ $self->{sessions_on_same_device} },
);
my $st = stat($backup_filename);

View File

@ -105,10 +105,13 @@ sub list {
$self->log(10, "list: in $File::Find::dir");
if ($self->{prune}{$File::Find::dir}) {
return ();
} else {
# not sure if sorting is useful
return sort @_;
}
my $last_component = $File::Find::dir =~ s{.*/}{}r;
if ($self->{prune}{$last_component}) {
return ();
}
# not sure if sorting is useful
return sort @_;
},
wanted
=> sub {