From f26d6b664022701ed7a4a6b6c391493b1e54434f Mon Sep 17 00:00:00 2001 From: hjp Date: Sat, 9 Nov 2019 10:22:34 +0000 Subject: [PATCH] Run backups for different filesets in parallel --- MYMETA.json | 6 ++-- MYMETA.yml | 2 +- lib/Simba/CA.pm | 85 ++++++++++++++++++++++++++++++++++++------------- scripts/backup | 7 +++- 4 files changed, 72 insertions(+), 28 deletions(-) diff --git a/MYMETA.json b/MYMETA.json index b795006..d0e7d4e 100644 --- a/MYMETA.json +++ b/MYMETA.json @@ -4,13 +4,13 @@ "unknown" ], "dynamic_config" : 0, - "generated_by" : "Module::Build version 0.422", + "generated_by" : "Module::Build version 0.4224", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", - "version" : "2" + "version" : 2 }, "name" : "Simba", "prereqs" : { @@ -50,5 +50,5 @@ ] }, "version" : "0.002", - "x_serialization_backend" : "JSON::PP version 2.27300_01" + "x_serialization_backend" : "JSON::PP version 2.97001" } diff --git a/MYMETA.yml b/MYMETA.yml index 9aed85b..d507fb5 100644 --- a/MYMETA.yml +++ b/MYMETA.yml @@ -4,7 +4,7 @@ author: - unknown build_requires: {} dynamic_config: 0 -generated_by: 'Module::Build version 0.422, CPAN::Meta::Converter version 2.150005' +generated_by: 'Module::Build version 0.4224, CPAN::Meta::Converter version 2.150010' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html diff --git a/lib/Simba/CA.pm b/lib/Simba/CA.pm index 07c8ea5..4a99828 100644 --- a/lib/Simba/CA.pm +++ b/lib/Simba/CA.pm @@ -87,33 +87,26 @@ sub new { $self->{log_level} = 99; $self->{record_file_id} = 0; $self->{record_time} = 0; - if ($opt->{dbi}) { - $self->{dbh} = DBI->connect(@{ $opt->{dbi} }, - { AutoCommit => 0, - PrintError => 1, - RaiseError => 1 - } - ); - } elsif ($opt->{dbi_file}) { + $self->{parallel} = $opt->{parallel}; + + if ($opt->{dbi_file}) { my $fn = $opt->{dbi_file}; open(FN, "<$fn") or die "cannot open $fn: $!"; my $line = ; close(FN); my @cred = split(/[\s\n]+/, $line); - $self->{dbh} = DBI->connect(@cred, - { AutoCommit => 0, - PrintError => 1, - RaiseError => 1 - } - ); - $self->{instances_part_size} = 10_000_000; - $self->adjust_partitions; - } elsif ($opt->{tokyocabinet}) { - my $tdb = $self->{tdb} = TokyoCabinet::TDB->new(); - $tdb->open($opt->{tokyocabinet}, $tdb->WRITER, $tdb->OCREAT) - or die "open $opt->{tokyocabinet} failed: " . $tdb->errmsg($tdb->ecode()); + $self->{dbi} = \@cred; } - # XXX - DBI + + $self->{dbh} = DBI->connect(@{ $self->{dbi} }, + { AutoCommit => 0, + PrintError => 1, + RaiseError => 1 + } + ); + $self->{instances_part_size} = 10_000_000; + $self->adjust_partitions; + $self->{targets} = $self->{dbh}->selectall_arrayref("select * from filesets", { Slice => {} }); if ($opt->{filesets}) { $self->{targets} = @@ -144,13 +137,59 @@ sub new { return $self; } + sub run { my ($self) = @_; # run sequentially for prototype. In production we probably # want some concurrency - for my $target (@{$self->{targets}}) { - $self->backup2disk($target); + if ($self->{parallel}) { + $self->{dbh}->disconnect(); + my %running = (); + for my $target (@{$self->{targets}}) { + $self->log(3, "found target host " . $target->{host} . " dir " . $target->{dir}); + while (scalar keys %running >= $self->{parallel}) { + $self->log(3, "reached parallel limit - waiting"); + my $pid = wait(); + delete $running{$pid}; + $self->log(3, "child with pid $pid terminated, " . (scalar keys %running) . " remaining"); + } + my $pid = fork(); + if (!defined($pid)) { + die "fork failed: $!"; + } + if ($pid == 0) { + $self->{dbh} = DBI->connect(@{ $self->{dbi} }, + { AutoCommit => 0, + PrintError => 1, + RaiseError => 1 + } + ); + $self->backup2disk($target); + $self->{dbh}->disconnect(); + exit(0); + } else { + $running{$pid} = 1; + $self->log(3, "child with pid $pid started, " . (scalar keys %running) . " running"); + } + sleep(10); + } + while (scalar keys %running) { + my $pid = wait(); + delete $running{$pid}; + $self->log(3, "child with pid $pid terminated, " . (scalar keys %running) . " remaining"); + } + $self->{dbh} = DBI->connect(@{ $self->{dbi} }, + { AutoCommit => 0, + PrintError => 1, + RaiseError => 1 + } + ); + + } else { + for my $target (@{$self->{targets}}) { + $self->backup2disk($target); + } } $self->log(3, "statistics:"); for (sort keys %{ $self->{counts} }) { diff --git a/scripts/backup b/scripts/backup index 8fcca32..76d788a 100755 --- a/scripts/backup +++ b/scripts/backup @@ -9,7 +9,11 @@ use File::stat; my @filesets; -GetOptions('filesets=i' => \@filesets); +my $parallel; +GetOptions( + 'filesets=i' => \@filesets, + 'parallel=i' => \$parallel +); @filesets = split(/,/,join(',',@filesets)); $ENV{PATH} = "/usr/bin"; @@ -22,6 +26,7 @@ my $ca = Simba::CA->new({ dbi_file => $ENV{SIMBA_DB_CONN} || "$ENV{HOME}/.dbi/simba", fh_log => $log, (@filesets ? ( filesets => \@filesets ) : ()), + parallel => $parallel, }); $ca->log_level(9);