From 45095845fd4b88689f74c3eecbb3021151002d30 Mon Sep 17 00:00:00 2001 From: "Peter J. Holzer" Date: Wed, 29 Nov 2023 06:04:02 +0100 Subject: [PATCH] Limit instances to deduplicate by id The instances table is partitioned by id. If we restrinct the search to recent ids we only have to search a few partitions which should be faster. --- lib/Simba/CA.pm | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/Simba/CA.pm b/lib/Simba/CA.pm index 956cefc..6f04022 100644 --- a/lib/Simba/CA.pm +++ b/lib/Simba/CA.pm @@ -727,6 +727,9 @@ sub linkdup { push @{ $self->{sessions_on_same_device} }, $r->{id}; # We only use the id } + my $self->{min_instance_id} = $self->{dbh}->selectrow_array("select min(id) from instances where session in " . join(", ", map("?", @{ $self->{sessions_on_same_device} })) . ")"); + $self->log(3, "min_instance_id set to $self->{min_instance_id}"); + } my $tdb0 = gettimeofday(); my $tdb1; @@ -735,8 +738,9 @@ sub linkdup { and file_owner=? and file_group=? and file_acl=? and file_unix_bits=? and checksum=? and online=1 - and instances.file=files.id and versions2.id=instances.version + and instances.id >= ? + and instances.file=files.id and instances.session=sessions.id and sessions.id in (" . join(", ", map("?", @{ $self->{sessions_on_same_device} })) . ")" . " order by instances.session desc @@ -746,6 +750,7 @@ sub linkdup { $f->{o}, $f->{g}, $f->{acl}, join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)), $f->{checksum}, + $self->{min_instance_id}, @{ $self->{sessions_on_same_device} }, ); my $st = stat($backup_filename);