Treat zero length files specially for better performance

This commit is contained in:
Peter J. Holzer 2021-07-25 12:19:18 +02:00
parent 19b0faad02
commit 289670b313
1 changed files with 24 additions and 0 deletions

View File

@ -818,6 +818,25 @@ sub store_file {
$self->log(5, "cannot link $self->{last_backup}/$f->{name} to $self->{this_backup}/$f->{name}: $!"); $self->log(5, "cannot link $self->{last_backup}/$f->{name} to $self->{this_backup}/$f->{name}: $!");
} }
} }
# If size is zero, check if we have seen a matching file before. If we have, link to it.
# Ubuntu contains a lot of zero sized files (about 8000 per installed kernel).
# Searching for them in the database is slow, so we special-case that here.
# We could generalize that, but I don't think that there will ever be enough identical
# non-empty files to make that worthwhile.
# XXX - not yet implemented.
if ($f->{s} == 0 && $f->{t} eq 'f') {
no warnings 'uninitialized'; # unix bits may not exist
my $k = "$f->{m} $f->{o} $f->{g} $f->{acl} $f->{setuid} $f->{setgid} $f->{sticky}";
if ($self->{null_files}{$k}) {
my $oldfile = $self->{null_files}{$k}{name};
my $backup_filename = "$self->{this_backup}/$f->{name}";
if (link($oldfile, $backup_filename)) {
$self->log(10, "linked (empty)");
$self->{counts}{dup10}++;
return $success;
}
}
}
# else request from da # else request from da
unless ($self->{file_pid}) { unless ($self->{file_pid}) {
@ -884,6 +903,11 @@ sub store_file {
$self->setmeta($f); $self->setmeta($f);
$self->log(10, "stored"); $self->log(10, "stored");
} }
if ($f->{s} == 0 && $f->{t} eq 'f') {
no warnings 'uninitialized'; # unix bits may not exist
my $k = "$f->{m} $f->{o} $f->{g} $f->{acl} $f->{setuid} $f->{setgid} $f->{sticky}";
$self->{null_files}{$k}{name} = $backup_filename;
}
} else { } else {
$self->log(5, "unexpected header $header\n"); $self->log(5, "unexpected header $header\n");
$self->close_file_connection; $self->close_file_connection;