2006-11-20 00:00:08 +01:00
#!/usr/bin/perl
2008-04-05 13:21:28 +02:00
= head1 NAME
Simba:: CA
= head1 DESCRIPTION
Collecting Agent of the Simba backup system .
This class represents one instance of a running collecting agent .
The only user - callable methods are the constructor new and the instance
method run , which collects all the files from various disk agents .
2021-01-21 16:34:30 +01:00
The Simba:: CA object is a hashref with the following keys :
2008-04-05 13:21:28 +02:00
= over
= item basedir
= item unknown_uid
= item unknown_gid
= item fh_log
= item log_level
= item dbh
= item targets
2021-01-21 16:34:30 +01:00
A list of entries ( hashes ) from table filesets .
2008-04-05 13:21:28 +02:00
= item ssh_id_file
= item target
= item last_backup
= item last_backup_id
= item timestamp
= item this_backup
= item session_id
= item file_pid
= item file_cfd
= item file_dfd
2021-01-21 16:34:30 +01:00
= item start_time
Timestamp when the backup of the current target started . Used to test when to abort due to timeout .
2008-04-05 13:21:28 +02:00
= back
= cut
2006-10-02 12:56:36 +02:00
package Simba::CA ;
2006-11-20 00:00:08 +01:00
use strict ;
use warnings ;
2006-10-02 12:56:36 +02:00
use Encode ;
use IPC::Open2 ;
2006-11-20 00:00:08 +01:00
use POSIX qw( strftime ) ;
use Simba::Util qw( quote unquote typestr ) ;
use Readonly ;
2012-09-20 12:47:19 +02:00
use Digest::SHA ;
2006-11-20 00:00:08 +01:00
use List::Util qw( min ) ;
2006-11-20 10:03:25 +01:00
use IO::Handle ;
use File::stat ;
2006-11-20 16:08:21 +01:00
use Scalar::Util qw( tainted ) ;
2007-06-18 21:42:08 +02:00
use DBI ;
2009-09-10 18:14:21 +02:00
use Time::HiRes qw( gettimeofday ) ;
2006-11-20 00:00:08 +01:00
Readonly my $ BUFSIZE = > 128 * 1024 ;
2006-10-02 12:56:36 +02:00
sub new {
2006-11-30 15:50:23 +01:00
my ( $ class , $ opt ) = @ _ ;
2010-09-02 10:24:07 +02:00
2006-10-02 12:56:36 +02:00
my $ self = { } ;
bless $ self , $ class ;
2006-11-20 12:26:30 +01:00
$ self - > { basedir } = '/backup' ;
2006-11-30 15:50:23 +01:00
$ self - > { unknown_uid } = 65533 ;
$ self - > { unknown_gid } = 65533 ;
$ self - > { fh_log } = exists ( $ opt - > { fh_log } ) ? $ opt - > { fh_log } : \ * STDERR ;
$ self - > { log_level } = 99 ;
2019-09-08 11:05:40 +02:00
$ self - > { record_file_id } = 0 ;
$ self - > { record_time } = 0 ;
2019-11-09 11:22:34 +01:00
$ self - > { parallel } = $ opt - > { parallel } ;
if ( $ opt - > { dbi_file } ) {
2007-06-17 23:40:04 +02:00
my $ fn = $ opt - > { dbi_file } ;
open ( FN , "<$fn" ) or die "cannot open $fn: $!" ;
my $ line = <FN> ;
close ( FN ) ;
my @ cred = split ( /[\s\n]+/ , $ line ) ;
2019-11-09 11:22:34 +01:00
$ self - > { dbi } = \ @ cred ;
2007-06-17 23:40:04 +02:00
}
2019-11-09 11:22:34 +01:00
$ self - > { dbh } = DBI - > connect ( @ { $ self - > { dbi } } ,
{ AutoCommit = > 0 ,
PrintError = > 1 ,
RaiseError = > 1
}
) ;
$ self - > { instances_part_size } = 10_000_000 ;
$ self - > adjust_partitions ;
2021-07-25 12:22:27 +02:00
# Order by ascending sort order with nulls last:
# MySQL considers NULL to be smaller than any number, so negate the numbers
# (making the largest the smallest) and then sort descending.
# Idea from https://www.designcise.com/web/tutorial/how-to-order-null-values-first-or-last-in-mysql
$ self - > { targets } = $ self - > { dbh } - > selectall_arrayref ( "select * from filesets order by -sortorder desc" , { Slice = > { } } ) ;
2008-03-21 12:12:42 +01:00
if ( $ opt - > { filesets } ) {
$ self - > { targets } =
[
grep {
my $ id = $ _ - > { id } ;
grep { $ id == $ _ } @ { $ opt - > { filesets } }
} @ { $ self - > { targets } }
] ;
2016-06-08 21:48:33 +02:00
} else {
$ self - > { targets } =
[
grep {
$ _ - > { active } ;
} @ { $ self - > { targets } }
] ;
2008-03-21 12:12:42 +01:00
}
2007-06-18 21:42:08 +02:00
if ( $ ENV { HOME } =~ m {([/\w]*)} ) {
if ( - f "$1/.ssh/id_rsa" ) {
if ( my $ st = stat ( "$1/.ssh/id_rsa" ) ) {
if ( $ st - > uid == $> ) {
$ self - > { ssh_id_file } = "$1/.ssh/id_rsa" ;
}
}
}
}
2006-10-02 12:56:36 +02:00
return $ self ;
}
2019-11-09 11:22:34 +01:00
2006-10-02 12:56:36 +02:00
sub run {
my ( $ self ) = @ _ ;
# run sequentially for prototype. In production we probably
# want some concurrency
2019-11-09 11:22:34 +01:00
if ( $ self - > { parallel } ) {
$ self - > { dbh } - > disconnect ( ) ;
my % running = ( ) ;
for my $ target ( @ { $ self - > { targets } } ) {
$ self - > log ( 3 , "found target host " . $ target - > { host } . " dir " . $ target - > { dir } ) ;
while ( scalar keys % running >= $ self - > { parallel } ) {
$ self - > log ( 3 , "reached parallel limit - waiting" ) ;
my $ pid = wait ( ) ;
delete $ running { $ pid } ;
$ self - > log ( 3 , "child with pid $pid terminated, " . ( scalar keys % running ) . " remaining" ) ;
}
my $ pid = fork ( ) ;
if ( ! defined ( $ pid ) ) {
die "fork failed: $!" ;
}
if ( $ pid == 0 ) {
$ self - > { dbh } = DBI - > connect ( @ { $ self - > { dbi } } ,
{ AutoCommit = > 0 ,
PrintError = > 1 ,
RaiseError = > 1
}
) ;
$ self - > backup2disk ( $ target ) ;
$ self - > { dbh } - > disconnect ( ) ;
exit ( 0 ) ;
} else {
$ running { $ pid } = 1 ;
$ self - > log ( 3 , "child with pid $pid started, " . ( scalar keys % running ) . " running" ) ;
}
sleep ( 10 ) ;
}
while ( scalar keys % running ) {
my $ pid = wait ( ) ;
delete $ running { $ pid } ;
$ self - > log ( 3 , "child with pid $pid terminated, " . ( scalar keys % running ) . " remaining" ) ;
}
$ self - > { dbh } = DBI - > connect ( @ { $ self - > { dbi } } ,
{ AutoCommit = > 0 ,
PrintError = > 1 ,
RaiseError = > 1
}
) ;
} else {
for my $ target ( @ { $ self - > { targets } } ) {
$ self - > backup2disk ( $ target ) ;
}
2006-10-02 12:56:36 +02:00
}
2014-01-11 19:50:14 +01:00
$ self - > export ( ) ;
2006-10-02 12:56:36 +02:00
}
sub backup2disk {
my ( $ self , $ target ) = @ _ ;
2021-07-25 12:22:54 +02:00
unless ( $ self - > reserve_fileset ( $ target ) ) {
return ;
}
2010-09-02 10:24:07 +02:00
2008-04-05 13:21:28 +02:00
$ self - > log ( 3 , "starting backup for target host " . $ target - > { host } . " dir " . $ target - > { dir } ) ;
$ self - > { target } = $ target ;
2021-01-21 16:34:30 +01:00
$ self - > { start_time } = time ( ) ;
2008-04-05 13:21:28 +02:00
2006-10-02 12:56:36 +02:00
# get previous generation
2010-04-09 22:28:20 +02:00
$ self - > get_last_session ( ) ;
2006-11-20 00:00:08 +01:00
my $ timestamp = $ self - > { timestamp } || strftime ( '%Y-%m-%dT%H.%M.%S' , localtime ) ;
$ self - > { this_backup } = $ self - > { basedir } . "/$timestamp/" . $ target - > { host } . '/' . $ target - > { dir } ;
2007-06-18 22:11:27 +02:00
$ self - > new_session ( ) ;
2006-10-02 12:56:36 +02:00
my ( $ list_pid , $ list_cfd , $ list_dfd ) ; # connection to get list of files
2007-06-18 21:42:08 +02:00
$ list_pid = open2 ( $ list_dfd , $ list_cfd ,
"/usr/bin/ssh" ,
"-l" , "simba_da" ,
$ self - > { ssh_id_file } ? ( "-i" , $ self - > { ssh_id_file } ) : ( ) ,
$ target - > { host } , "da" ) ;
2006-11-20 00:00:08 +01:00
$ list_cfd - > printflush ( "list $target->{dir}\n" ) ; # XXX - encode!
2006-11-20 10:03:25 +01:00
close ( $ list_cfd ) ;
2008-04-05 13:21:28 +02:00
my $ count = 0 ;
2021-01-31 13:39:59 +01:00
my $ last_report = $ self - > { start_time } ;
2006-10-02 12:56:36 +02:00
while ( <$list_dfd> ) {
2021-01-31 13:39:59 +01:00
my $ now = time ( ) ;
2021-01-21 16:34:30 +01:00
if ( $ target - > { timeout } ) {
my $ elapsed = $ now - $ self - > { start_time } ;
$ self - > log ( 10 , "checking timeout " . $ elapsed . " > " . $ target - > { timeout } ) ;
if ( $ elapsed > $ target - > { timeout } ) {
$ self - > log ( 3 , "Timeout exceeded. $elapsed > $target->{timeout}" ) ;
last ;
}
}
2008-04-05 13:21:28 +02:00
$ count + + ;
2007-06-18 03:00:29 +02:00
chomp ;
$ self - > log ( 10 , "file: $_" ) ;
2006-10-02 12:56:36 +02:00
# split into fields
2007-06-18 21:42:08 +02:00
chomp ;
2006-10-02 12:56:36 +02:00
my $ f = $ self - > parse ( $ _ ) ;
2021-01-31 13:39:59 +01:00
if ( $ now - $ last_report >= 10 ) {
2010-04-09 22:28:20 +02:00
$ self - > log ( 9 , "file $count: $f->{name}" ) ;
2021-01-31 13:39:59 +01:00
$ last_report = $ now ;
2010-04-09 22:28:20 +02:00
}
2008-04-05 13:21:28 +02:00
2008-06-20 08:19:18 +02:00
my $ success = 1 ;
2006-10-02 12:56:36 +02:00
if ( $ f - > { t } eq 'f' ) {
2008-06-20 08:19:18 +02:00
$ success = $ self - > store_file ( $ f ) ;
2006-11-20 00:00:08 +01:00
} elsif ( $ f - > { t } eq 'd' ) {
my $ d = "$self->{this_backup}/$f->{name}" ;
$ d =~ s , // + , / , g ;
2006-11-20 16:08:21 +01:00
mkdir_p ( $ d ) or die "cannot mkdir $d: $!" ; # XXX
$ self - > setmeta ( $ f ) ;
2006-11-28 17:27:59 +01:00
} elsif ( $ f - > { t } eq 'l' ) {
my $ l = "$self->{this_backup}/$f->{name}" ;
2007-06-17 23:58:57 +02:00
unless ( symlink ( $ f - > { lt } , $ l ) ) {
die "cannot symlink $l -> $f->{lt}: $!" ; # XXX
}
2006-11-28 17:27:59 +01:00
# $self->setmeta($f); ignore for symlinks. would need to use
# lchown, lchmod, etc.
2006-10-02 12:56:36 +02:00
} else {
# create local copy (or insert into DB only?)
2007-06-18 03:00:29 +02:00
$ self - > log ( 5 , "ignored $_\n" ) ;
2006-10-02 12:56:36 +02:00
}
# insert into DB.
2008-06-20 08:19:18 +02:00
$ self - > db_record_version ( $ target , $ f ) if ( $ success ) ;
2006-10-02 12:56:36 +02:00
}
2010-04-09 22:28:20 +02:00
$ self - > flush_insert_instances ( ) ;
2007-06-18 22:11:27 +02:00
$ self - > close_session ( ) ;
2010-09-11 09:26:27 +02:00
$ self - > release_fileset ( $ target ) ;
2008-04-05 13:21:28 +02:00
$ self - > log ( 3 , "finished backup for target host " . $ target - > { host } . " dir " . $ target - > { dir } . ": $count files" ) ;
2009-09-10 18:14:21 +02:00
$ self - > { counts } { objects } += $ count ;
2021-01-31 13:47:59 +01:00
$ self - > log ( 3 , "statistics:" ) ;
for ( sort keys % { $ self - > { counts } } ) {
$ self - > log ( 3 , " $_: $self->{counts}{$_}" ) ;
}
for ( sort keys % { $ self - > { times } } ) {
$ self - > log ( 3 , " $_: $self->{times}{$_} s" ) ;
}
2006-10-02 12:56:36 +02:00
}
2006-11-20 00:00:08 +01:00
2010-09-02 14:05:59 +02:00
sub reserve_fileset {
my ( $ self , $ target ) = @ _ ;
2021-07-25 12:22:54 +02:00
my $ start = time ( ) ;
2022-03-13 23:22:24 +01:00
my $ pid ;
2021-07-25 12:22:54 +02:00
while ( time ( ) - $ start < 3600 ) {
2010-09-02 14:05:59 +02:00
my $ rows = $ self - > { dbh } - > do ( q{ update filesets set pid=? where id = ? and pid is null } , { } , $$ , $ target - > { id } ) ;
2021-07-25 12:22:54 +02:00
return 1 if $ rows == 1 ;
2022-03-13 23:22:24 +01:00
$ pid = $ self - > { dbh } - > selectrow_array ( q{ select pid from filesets where id = ? } , { } , $ target - > { id } ) ;
2010-09-11 11:59:24 +02:00
$ self - > log ( 3 , "fileset $target->{id} appears to be in use by pid $pid" ) ;
2010-09-02 14:05:59 +02:00
if ( ! kill ( 0 , $ pid ) && $! { ESRCH } ) {
$ self - > log ( 3 , "pid $pid doesn't exist, trying to release fileset $target->{id}" ) ;
$ self - > { dbh } - > do ( q{ update filesets set pid=null where id = ? and pid=? } , { } , $ target - > { id } , $ pid ) ;
}
sleep 60 ;
}
2022-03-13 23:22:24 +01:00
$ self - > log ( 2 , "fileset $target->{id} appears to be still in use by pid $pid after grace period - giving up" ) ;
2021-07-25 12:22:54 +02:00
return 0 ;
2010-09-02 14:05:59 +02:00
}
sub release_fileset {
my ( $ self , $ target ) = @ _ ;
$ self - > log ( 3 , "releasing fileset $target->{id}" ) ;
$ self - > { dbh } - > do ( q{ update filesets set pid=null where id=? and pid=? } , { } , $ target - > { id } , $$ ) ;
2016-05-08 21:53:26 +02:00
$ self - > { dbh } - > commit ( ) ;
2010-09-02 14:05:59 +02:00
}
2006-11-20 00:00:08 +01:00
sub parse {
my ( $ self , $ s ) = @ _ ;
my @ s = split ( / +/ , $ s ) ;
my $ f = { } ;
$ f - > { name } = shift @ s ;
$ f - > { name } = $ 1 if ( $ f - > { name } =~ /(.*)/ ) ; # detaint XXX
for ( @ s ) {
my ( $ k , $ v ) = split ( /=/ , $ _ , 2 ) ;
$ f - > { $ k } = $ v ;
# special processing for permissions etc, here?
}
$ f - > { o } = unquote ( $ f - > { o } ) ;
$ f - > { g } = unquote ( $ f - > { g } ) ;
$ f - > { acl } = unquote ( $ f - > { acl } ) ;
2006-11-20 16:08:21 +01:00
$ f - > { m } = $ 1 if $ f - > { m } =~ /^(\d+)$/ ;
2006-11-30 14:55:53 +01:00
$ f - > { lt } = unquote ( $ 1 ) if defined $ f - > { lt } && $ f - > { lt } =~ /(.*)/ ;
2006-11-20 00:00:08 +01:00
return $ f ;
}
sub present {
my ( $ self , $ f ) = @ _ ;
2006-11-20 10:03:25 +01:00
return unless $ self - > { last_backup } ;
my $ st = lstat ( "$self->{last_backup}/$f->{name}" ) ;
return unless $ st ;
2006-11-20 16:08:21 +01:00
if ( $ st - > mtime == $ f - > { m } &&
$ st - > size == $ f - > { s } &&
2006-11-30 15:50:23 +01:00
$ st - > uid == $ self - > name2uid ( $ f - > { o } ) &&
$ st - > gid == $ self - > name2gid ( $ f - > { g } ) &&
2006-11-20 16:08:21 +01:00
( $ st - > mode & 07777 ) == $ self - > acl2mode ( $ f )
2006-11-20 00:00:08 +01:00
) {
return 1 ;
} else {
return 0 ;
}
}
sub mkdir_p {
my ( $ dir , $ perm ) = @ _ ;
$ perm = 0777 unless ( defined ( $ perm ) ) ;
if ( - d $ dir ) {
return 1 ;
} elsif ( mkdir ( $ dir , $ perm ) ) {
return 1 ;
} elsif ( $! { ENOENT } ) {
my $ parentdir = $ dir ;
$ parentdir =~ s | ( . * ) / . + | $ 1 | ;
mkdir_p ( $ parentdir , $ perm ) ;
2006-11-20 10:03:25 +01:00
if ( - d $ dir ) {
return 1 ;
} else {
return mkdir ( $ dir , $ perm ) ;
}
2006-11-20 00:00:08 +01:00
} else {
return undef ;
}
}
2006-11-20 12:26:30 +01:00
sub basedir {
my ( $ self , $ dir ) = @ _ ;
$ self - > { basedir } = $ dir if defined ( $ dir ) ;
return $ self - > { basedir } ;
}
sub targets {
my ( $ self , $ targets ) = @ _ ;
$ self - > { targets } = $ targets if defined ( $ targets ) ;
return $ self - > { targets } ;
}
sub add_target {
my ( $ self , $ target ) = @ _ ;
push @ { $ self - > { targets } } , $ target ;
return $ self - > { targets } ;
}
2006-11-20 16:08:21 +01:00
my % permstrbits = (
'---' = > 0 ,
'--x' = > 1 ,
'-w-' = > 2 ,
'-wx' = > 3 ,
'r--' = > 4 ,
'r-x' = > 5 ,
'rw-' = > 6 ,
'rwx' = > 7 ,
) ;
sub setmeta {
my ( $ self , $ f ) = @ _ ;
my $ fn = "$self->{this_backup}/$f->{name}" ;
2007-06-18 03:00:29 +02:00
$ self - > log ( 3 , "$fn is tainted!" ) if tainted ( $ fn ) ;
2006-11-20 16:08:21 +01:00
my $ mode = $ self - > acl2mode ( $ f ) ;
2007-06-18 03:00:29 +02:00
$ self - > log ( 3 , "$mode is tainted!" ) if tainted ( $ mode ) ;
2006-11-30 15:50:23 +01:00
chown ( $ self - > name2uid ( $ f - > { o } ) , $ self - > name2gid ( $ f - > { g } ) , $ fn ) ;
2009-06-28 22:24:18 +02:00
chmod ( $ mode , $ fn ) ;
2006-11-20 16:08:21 +01:00
utime ( time , $ f - > { m } , $ fn ) ;
}
# computes the mode from the acl (and the set[ug]id and sticky bits)
# and returns it. Optional ACL entries are currently ignored but should
# eventually be returned as a second value.
sub acl2mode {
my ( $ self , $ f ) = @ _ ;
my $ mode = 0 ;
if ( $ f - > { acl } ) {
for my $ ace ( split ( ',' , $ f - > { acl } ) ) {
if ( $ ace =~ /^u::(...)$/ ) {
$ mode |= ( $ permstrbits { $ 1 } << 6 ) ;
} elsif ( $ ace =~ /^g::(...)$/ ) {
$ mode |= ( $ permstrbits { $ 1 } << 3 ) ;
} elsif ( $ ace =~ /^o:(...)$/ ) {
$ mode |= ( $ permstrbits { $ 1 } << 0 ) ;
} else {
$ self - > log ( 5 , "warning: unknown ACE $ace ignored" ) ;
}
}
}
if ( $ f - > { setuid } ) { $ mode |= 04000 }
if ( $ f - > { setgid } ) { $ mode |= 02000 }
if ( $ f - > { sticky } ) { $ mode |= 01000 }
return $ mode ;
}
my % ucache ;
sub name2uid {
2006-11-30 15:50:23 +01:00
my ( $ self , $ uname ) = @ _ ;
2006-11-20 16:08:21 +01:00
$ uname = $ 1 if $ uname =~ /(.*)/ ; # detaint
return $ ucache { $ uname } if ( defined $ ucache { $ uname } ) ;
if ( $ uname =~ /^\d+$/ ) {
return $ ucache { $ uname } = $ uname ;
} else {
my $ uid = getpwnam ( $ uname ) ;
if ( defined ( $ uid ) ) {
return $ ucache { $ uname } = $ uid ;
} else {
2006-11-30 15:50:23 +01:00
return $ ucache { $ uname } = $ self - > { unknown_uid } ;
2006-11-20 16:08:21 +01:00
}
}
}
my % gcache ;
sub name2gid {
2006-11-30 15:50:23 +01:00
my ( $ self , $ gname ) = @ _ ;
2006-11-20 16:08:21 +01:00
$ gname = $ 1 if $ gname =~ /(.*)/ ; # detaint
return $ gcache { $ gname } if ( defined $ gcache { $ gname } ) ;
if ( $ gname =~ /^\d+$/ ) {
return $ gcache { $ gname } = $ gname ;
} else {
my $ gid = getgrnam ( $ gname ) ;
if ( defined ( $ gid ) ) {
return $ gcache { $ gname } = $ gid ;
} else {
2006-11-30 15:50:23 +01:00
return $ gcache { $ gname } = $ self - > { unknown_gid } ;
2006-11-20 16:08:21 +01:00
}
}
}
2010-04-24 22:23:21 +02:00
# currently used log levels:
# 0 - fatal error, backup failed
# 3 - global progress messages, like start and end of a backup, statistics.
# 5 - errors which prevent a file from being backed up
# 10 - progress messages for single files.
2006-11-30 15:50:23 +01:00
sub log {
my ( $ self , $ level , $ msg ) = @ _ ;
if ( $ level <= $ self - > { log_level } ) {
2007-06-22 22:41:56 +02:00
$ self - > { fh_log } - > print ( strftime ( "%Y-%m-%dT%H:%M:%S%z" , localtime ) , " $$ [$level]: $msg\n" )
or die "write to log failed: $!" ;
2006-11-30 15:50:23 +01:00
}
}
sub log_level {
my ( $ self , $ log_level ) = @ _ ;
$ self - > { log_level } = $ log_level if defined ( $ log_level ) ;
return $ self - > { log_level } ;
}
2007-06-18 21:42:08 +02:00
sub db_record_version {
my ( $ self , $ target , $ f ) = @ _ ;
2009-09-10 18:14:21 +02:00
my $ t0 = gettimeofday ( ) ;
2007-06-18 21:42:08 +02:00
my $ db_f = $ self - > { dbh } - > selectall_arrayref ( "select * from files where fileset=? and path=?" ,
{ Slice = > { } } ,
$ target - > { id } , $ f - > { name } ) ;
2009-09-25 21:58:20 +02:00
my $ t1 = gettimeofday ( ) ;
$ self - > { times } { db_record_version_select_files } += $ t1 - $ t0 ;
2007-06-18 21:42:08 +02:00
unless ( @$ db_f ) {
$ self - > { dbh } - > do ( "insert into files(fileset, path) values(?, ?)" ,
{ } ,
$ target - > { id } , $ f - > { name } ) ;
$ db_f = $ self - > { dbh } - > selectall_arrayref ( "select * from files where fileset=? and path=?" ,
{ Slice = > { } } ,
$ target - > { id } , $ f - > { name } ) ;
}
2009-09-25 21:58:20 +02:00
my $ t2 = gettimeofday ( ) ;
$ self - > { times } { db_record_version_insert_files } += $ t2 - $ t1 ;
2007-11-15 21:17:55 +01:00
if ( $ f - > { t } eq 'f' && ! defined ( $ f - > { checksum } ) ) {
# this must be a link to the previous version
2012-09-29 12:44:10 +02:00
$ f - > { checksum } = $ self - > get_checksum ( $ db_f - > [ 0 ] { id } ) ;
2007-11-15 21:17:55 +01:00
}
2010-05-20 11:24:54 +02:00
my $ t2a = gettimeofday ( ) ;
$ self - > { times } { db_record_version_versions2_get_checksum } += $ t2a - $ t2 ;
2008-04-17 23:34:39 +02:00
my $ query =
" select id from versions2
where
file_type = ? and file_size = ? and file_mtime = ? and
file_owner = ? and file_group = ? and file_acl = ? and
2008-05-04 15:42:31 +02:00
file_unix_bits = ?
2008-04-17 23:34:39 +02:00
" ;
my @ args = (
$ f - > { t } , $ f - > { s } , $ f - > { m } ,
$ f - > { o } , $ f - > { g } , $ f - > { acl } ,
join ( ',' , map { $ f - > { $ _ } ? ( $ _ ) : ( ) } qw( setuid setgid sticky ) ) ,
) ;
2008-05-04 15:42:31 +02:00
if ( $ f - > { t } eq 'f' ) {
$ query . = " and checksum=?" ;
push @ args , $ f - > { checksum } ;
} elsif ( $ f - > { t } eq 'l' ) {
2008-04-17 23:34:39 +02:00
$ query . = " and file_linktarget=?" ;
push @ args , $ f - > { lt } ;
2008-05-04 15:42:31 +02:00
} elsif ( $ f - > { t } eq 'b' || $ f - > { t } eq 'c' ) {
$ query . = " and file_rdev=?" ;
push @ args , $ f - > { rdev } ;
2008-04-17 23:34:39 +02:00
}
my $ version_id = $ self - > { dbh } - > selectrow_array ( $ query , { } , @ args ) ;
2010-05-20 11:24:54 +02:00
my $ t2b = gettimeofday ( ) ;
$ self - > { times } { db_record_version_versions2_get_version_id } += $ t2b - $ t2a ;
2008-04-17 23:34:39 +02:00
unless ( $ version_id ) {
2021-01-31 13:36:33 +01:00
# We use a 32 bit int field in the database, so we have to clamp the mtime to that range.
# Need to fix this sometime before 2038 :-)
if ( $ f - > { m } < - 2147483648 ) {
$ f - > { m } = - 2147483648 ;
} elsif ( $ f - > { m } > 2147483647 ) {
$ f - > { m } = 2147483647 ;
}
$ self - > log ( 10 , "insert into versions2(..., file_mtime=$f->{m}, ...)" ) ;
2008-05-04 15:42:31 +02:00
# XXX why is $f->{checksum} undef here for ./bin/dash?
2008-04-17 23:34:39 +02:00
$ self - > { dbh } - > do ( " insert into versions2 (
file_type , file_size , file_mtime ,
file_owner , file_group , file_acl ,
file_unix_bits ,
file_rdev ,
2008-05-04 15:42:31 +02:00
checksum , file_linktarget
2008-04-17 23:34:39 +02:00
)
values (
? , ? , ? ,
? , ? , ? ,
? ,
? ,
2008-05-04 15:42:31 +02:00
? , ?
2008-04-17 23:34:39 +02:00
) " ,
{ } ,
$ f - > { t } , $ f - > { s } , $ f - > { m } ,
$ f - > { o } , $ f - > { g } , $ f - > { acl } ,
join ( ',' , map { $ f - > { $ _ } ? ( $ _ ) : ( ) } qw( setuid setgid sticky ) ) ,
$ f - > { rdev } ,
$ f - > { checksum } , $ f - > { lt } ,
) ;
$ version_id = $ self - > { dbh } - > { mysql_insertid } ;
2021-01-31 13:38:50 +01:00
$ self - > log ( 10 , "insert into versions2 -> $version_id" ) ;
2008-04-17 23:34:39 +02:00
}
2009-09-25 21:58:20 +02:00
my $ t3 = gettimeofday ( ) ;
$ self - > { times } { db_record_version_versions2 } += $ t3 - $ t2 ;
2010-05-20 11:24:54 +02:00
$ self - > { times } { db_record_version_versions2_insert } += $ t3 - $ t2b ;
2010-04-09 22:28:20 +02:00
push @ { $ self - > { caches } { insert_instances } } ,
[
$ db_f - > [ 0 ] { id } ,
2019-09-08 11:05:40 +02:00
$ self - > { record_file_id } ? $ f - > { id } : undef ,
2019-09-28 11:15:21 +02:00
$ self - > { record_time } ? time ( ) : undef ,
2019-09-08 11:05:40 +02:00
1 ,
2010-04-09 22:28:20 +02:00
$ self - > { session_id } , $ version_id
] ;
if ( @ { $ self - > { caches } { insert_instances } } > 10 ) {
$ self - > flush_insert_instances ( ) ;
}
2009-09-10 18:14:21 +02:00
2009-09-25 21:58:20 +02:00
my $ t4 = gettimeofday ( ) ;
$ self - > { times } { db_record_version_insert_instances } += $ t4 - $ t3 ;
$ self - > { times } { db_record_version } += $ t4 - $ t0 ;
2007-06-18 21:42:08 +02:00
}
2012-09-29 12:44:10 +02:00
sub get_checksum {
my ( $ self , $ file ) = @ _ ;
if ( $ self - > { caches } { file_checksums } { $ self - > { last_backup_id } } ) {
return $ self - > { caches } { file_checksums } { $ self - > { last_backup_id } } { $ file } ;
} else {
$ self - > { caches } { file_checksums } { $ self - > { last_backup_id } } = my $ file_checksums = { } ;
my $ sth = $ self - > { dbh } - > prepare (
" select file , checksum from versions2 v , instances i
where session = ? and v . id = i . version " ) ;
$ sth - > execute ( $ self - > { last_backup_id } ) ;
while ( my ( $ file , $ checksum ) = $ sth - > fetchrow_array ) {
$ file_checksums - > { $ file } = $ checksum ;
}
return $ file_checksums - > { $ file } ;
}
}
2009-09-25 21:58:20 +02:00
2010-04-09 22:28:20 +02:00
sub flush_insert_instances {
my ( $ self ) = @ _ ;
my $ dbh = $ self - > { dbh } ;
2013-11-13 23:09:29 +01:00
if ( $ self - > { caches } { insert_instances } && @ { $ self - > { caches } { insert_instances } } ) {
2010-04-09 22:28:20 +02:00
my $ cmd = "insert into instances(file, file_id, date, online, session, version)"
. " values "
. join ( ", " ,
map {
"("
. join ( "," , map { $ dbh - > quote ( $ _ ) } @$ _ )
. ")"
} @ { $ self - > { caches } { insert_instances } }
) ;
$ dbh - > do ( $ cmd ) ;
}
2012-10-09 08:57:27 +02:00
$ dbh - > commit ( ) ;
2010-04-09 22:28:20 +02:00
$ self - > { caches } { insert_instances } = [] ;
}
2007-06-18 22:11:27 +02:00
sub new_session {
my ( $ self ) = @ _ ;
2007-06-22 22:41:56 +02:00
$ self - > { dbh } - > do ( "insert into sessions(start_date, prefix) values(?, ?)" , { } , time ( ) , $ self - > { this_backup } ) ;
2007-06-18 22:11:27 +02:00
$ self - > { session_id } = $ self - > { dbh } - > { 'mysql_insertid' } ;
}
sub close_session {
my ( $ self ) = @ _ ;
$ self - > { dbh } - > do ( "update sessions set end_date=? where id=?" , { } , time ( ) , $ self - > { session_id } ) ;
2016-05-08 21:53:26 +02:00
$ self - > { dbh } - > commit ( ) ;
2008-06-20 08:19:18 +02:00
$ self - > close_file_connection ;
delete $ self - > { target } ;
}
sub close_file_connection {
my ( $ self ) = @ _ ;
2008-04-05 13:21:28 +02:00
if ( $ self - > { file_pid } ) {
close ( $ self - > { file_cfd } ) ;
close ( $ self - > { file_dfd } ) ;
$ self - > log ( 3 , "waiting for $self->{file_pid}" ) ;
waitpid $ self - > { file_pid } , 0 ;
$ self - > log ( 3 , "$self->{file_pid} terminated with status $?" ) ;
delete $ self - > { file_cfd } ;
delete $ self - > { file_dfd } ;
delete $ self - > { file_pid } ;
}
2007-06-18 22:11:27 +02:00
}
2010-04-09 22:28:20 +02:00
sub get_last_session {
2007-11-15 21:17:55 +01:00
my ( $ self ) = @ _ ;
2010-04-09 22:28:20 +02:00
my $ sessions
= $ self - > { dbh } - > selectall_arrayref (
"select * from sessions where end_date is not null and prefix like ? order by end_date desc" ,
{ Slice = > { } } ,
$ self - > { basedir } . '/%/' . $ self - > { target } - > { host } . '/' . $ self - > { target } - > { dir }
) ;
$ self - > { last_backup } = $ sessions - > [ 0 ] { prefix } ;
$ self - > { last_backup_id } = $ sessions - > [ 0 ] { id } ;
2021-07-25 12:21:10 +02:00
$ self - > log ( 3 , "last backup: $self->{last_backup} ($self->{last_backup_id})" ) ;
2007-11-15 21:17:55 +01:00
}
2009-09-10 18:14:21 +02:00
= head2 linkdup
2007-11-17 22:03:10 +01:00
2008-04-13 12:36:46 +02:00
Try to find a duplicate of the current file in the database and replace the
current file with a hardlink to it . This is useful if you
2007-11-17 22:03:10 +01:00
have multiple copies of a file stored in different locations .
2008-04-13 12:36:46 +02:00
The search starts from the newest session and continues into the past until
either linking successful or we run out of duplicates .
This is done because creating a hard link may not always be possible ( duplicate is
on a different file system or has already reached the maximum link count )
and it is more likely that we can link to new copies than to old ones .
2007-11-17 22:03:10 +01:00
= cut
2008-04-13 12:36:46 +02:00
sub linkdup {
my ( $ self , $ f , $ backup_filename ) = @ _ ;
2009-09-10 18:14:21 +02:00
my $ t0 = gettimeofday ( ) ;
# XXX - this seems to be slow
2010-09-02 10:24:07 +02:00
# XXX - creates huge temp files. Restrict to last few sessions or at least sessions on the same device?
# XXX - that's not quite as simple: We only have the prefix, but there are many prefixes on the same
# device. We can create a list of them of them at first call, though and then pass the list
# to the query. Maybe even shorten the list. ($n newest sessions only)
# XXX - another possible optimization is to check the last few files we've written: .svn/prop-base
# normally contains a lot of identical files.
unless ( $ self - > { sessions_on_same_device } ) {
my $ st = stat ( $ backup_filename ) ;
my $ my_dev = defined $ st ? $ st - > dev : "" ; # can this happen?
2010-09-02 13:41:21 +02:00
my $ sth = $ self - > { dbh } - > prepare ( "select * from sessions order by id desc" ) ;
2010-09-02 10:24:07 +02:00
$ sth - > execute ( ) ;
while ( my $ r = $ sth - > fetchrow_hashref ( ) ) {
my $ st = lstat $ r - > { prefix } ;
my $ dev = defined $ st ? $ st - > dev : "" ; ;
2010-09-02 13:41:21 +02:00
next unless $ dev eq $ my_dev ;
last if $ self - > { sessions_on_same_device } && @ { $ self - > { sessions_on_same_device } } > 30 ;
2020-07-18 10:52:00 +02:00
push @ { $ self - > { sessions_on_same_device } } , $ r - > { id } ; # We only use the id
2010-09-02 10:24:07 +02:00
}
2023-11-29 06:04:02 +01:00
my $ self - > { min_instance_id } = $ self - > { dbh } - > selectrow_array ( "select min(id) from instances where session in " . join ( ", " , map ( "?" , @ { $ self - > { sessions_on_same_device } } ) ) . ")" ) ;
$ self - > log ( 3 , "min_instance_id set to $self->{min_instance_id}" ) ;
2010-09-02 10:24:07 +02:00
}
2021-07-25 12:18:09 +02:00
my $ tdb0 = gettimeofday ( ) ;
my $ tdb1 ;
2008-05-04 15:42:31 +02:00
my $ sth = $ self - > { dbh } - > prepare ( " select * from versions2 , instances , files , sessions
2007-11-17 22:03:10 +01:00
where file_type = ? and file_size = ? and file_mtime = ?
and file_owner = ? and file_group = ? and file_acl = ?
2010-04-09 22:28:20 +02:00
and file_unix_bits = ?
2007-11-17 22:03:10 +01:00
and checksum = ? and online = 1
2008-04-17 23:34:39 +02:00
and versions2 . id = instances . version
2023-11-29 06:04:02 +01:00
and instances . id >= ?
and instances . file = files . id
2008-05-04 15:42:31 +02:00
and instances . session = sessions . id
2010-09-02 10:24:07 +02:00
and sessions . id in ( " . join(" , ", map(" ? ", @{ $self->{sessions_on_same_device} })) . " ) " .
" order by instances . session desc
2008-04-13 12:36:46 +02:00
" ) ;
2007-11-17 22:03:10 +01:00
$ sth - > execute (
$ f - > { t } , $ f - > { s } , $ f - > { m } ,
$ f - > { o } , $ f - > { g } , $ f - > { acl } ,
join ( ',' , map { $ f - > { $ _ } ? ( $ _ ) : ( ) } qw( setuid setgid sticky ) ) ,
2010-09-02 10:24:07 +02:00
$ f - > { checksum } ,
2023-11-29 06:04:02 +01:00
$ self - > { min_instance_id } ,
2010-09-02 10:24:07 +02:00
@ { $ self - > { sessions_on_same_device } } ,
2007-11-17 22:03:10 +01:00
) ;
2009-09-10 18:14:21 +02:00
my $ st = stat ( $ backup_filename ) ;
my $ my_dev = defined $ st ? $ st - > dev : "" ;
2007-11-17 22:03:10 +01:00
while ( my $ r = $ sth - > fetchrow_hashref ( ) ) {
2021-07-25 12:18:09 +02:00
unless ( $ tdb1 ) {
$ tdb1 = gettimeofday ( ) ;
$ self - > { times } { linkdup_db } += $ tdb1 - $ tdb0 ;
}
2009-09-10 18:14:21 +02:00
# check if old file is on same device. If it isn't, skip it.
2010-09-02 10:24:07 +02:00
# XXX - this should now be obsolete because we already selected only matching sessions above.
2009-09-10 18:14:21 +02:00
unless ( $ self - > { prefix_device } { $ r - > { prefix } } ) {
my $ st = lstat $ r - > { prefix } ;
$ self - > { prefix_device } { $ r - > { prefix } }
= defined $ st ? $ st - > dev : "" ;
}
next unless $ self - > { prefix_device } { $ r - > { prefix } } eq $ my_dev ;
2007-11-17 22:03:10 +01:00
my $ oldfile = "$r->{prefix}/$r->{path}" ;
if ( my $ st = lstat ( $ oldfile ) ) {
if ( $ st - > mtime == $ f - > { m } &&
$ st - > size == $ f - > { s } &&
$ st - > uid == $ self - > name2uid ( $ f - > { o } ) &&
$ st - > gid == $ self - > name2gid ( $ f - > { g } ) &&
( $ st - > mode & 07777 ) == $ self - > acl2mode ( $ f )
) {
2010-05-26 22:09:05 +02:00
my ( $ dirname , $ basename ) = $ backup_filename =~ m {(.*)/(.*)} ;
my $ tmpname = "$basename.$$.simba_backup" ;
if ( length ( $ tmpname ) > 255 ) {
$ tmpname = substr ( $ basename , 0 , 235 ) . ".$$.simba_backup" ;
}
$ tmpname = "$dirname/$tmpname" ;
rename ( $ backup_filename , "$tmpname" ) or die "cannot save $backup_filename to $tmpname: $!" ;
2008-04-13 12:36:46 +02:00
if ( link ( $ oldfile , $ backup_filename ) ) {
$ self - > log ( 10 , "linked (dup)" ) ;
2010-05-26 22:09:05 +02:00
unlink ( "$tmpname" ) or die "cannot unlink $tmpname: $!" ;
2008-04-13 12:36:46 +02:00
$ sth - > finish ( ) ;
2009-09-10 18:14:21 +02:00
my $ t1 = gettimeofday ( ) ;
$ self - > { counts } { dup2 } + + ;
$ self - > { times } { linkdup } += $ t1 - $ t0 ;
2008-04-13 12:36:46 +02:00
return $ oldfile ;
} else {
$ self - > log ( 5 , "cannot link $oldfile to $backup_filename" ) ;
2010-05-26 22:09:05 +02:00
rename ( "$tmpname" , $ backup_filename ) or die "cannot restore $backup_filename from $tmpname: $!" ;
2008-04-13 12:36:46 +02:00
}
2007-11-17 22:03:10 +01:00
}
}
}
2009-09-10 18:14:21 +02:00
my $ t1 = gettimeofday ( ) ;
$ self - > { times } { linkdup } += $ t1 - $ t0 ;
2021-07-25 12:18:09 +02:00
unless ( $ tdb1 ) {
$ tdb1 = gettimeofday ( ) ;
$ self - > { times } { linkdup_db } += $ tdb1 - $ tdb0 ;
}
$ self - > { counts } { linkdup_miss } + + ;
2007-11-17 22:03:10 +01:00
return ;
}
2008-04-05 13:21:28 +02:00
= head2 store_file
store a file in the local filesystem . If the file appears to be unchanged since
the last backup , try to create a hard link . Otherwise , get the contents of the
file from the DA , and search for a file with the same contents ( i . e . , checksum )
and metadata , but possibly different name and try to link to that . If no link
can be created to an existing file , create a new one .
= cut
sub store_file {
my ( $ self , $ f ) = @ _ ;
2008-06-20 08:19:18 +02:00
my $ success = 1 ;
2008-04-05 13:21:28 +02:00
if ( $ self - > present ( $ f ) ) {
2008-04-13 12:36:46 +02:00
if ( link ( "$self->{last_backup}/$f->{name}" , "$self->{this_backup}/$f->{name}" ) ) {
$ self - > log ( 10 , "linked" ) ;
2009-09-10 18:14:21 +02:00
$ self - > { counts } { dup1 } + + ;
2008-06-20 08:19:18 +02:00
return $ success ;
2008-04-13 12:36:46 +02:00
} else {
$ self - > log ( 5 , "cannot link $self->{last_backup}/$f->{name} to $self->{this_backup}/$f->{name}: $!" ) ;
2008-04-05 13:21:28 +02:00
}
2008-04-13 12:36:46 +02:00
}
2021-07-25 12:19:18 +02:00
# If size is zero, check if we have seen a matching file before. If we have, link to it.
# Ubuntu contains a lot of zero sized files (about 8000 per installed kernel).
# Searching for them in the database is slow, so we special-case that here.
# We could generalize that, but I don't think that there will ever be enough identical
# non-empty files to make that worthwhile.
# XXX - not yet implemented.
if ( $ f - > { s } == 0 && $ f - > { t } eq 'f' ) {
no warnings 'uninitialized' ; # unix bits may not exist
my $ k = "$f->{m} $f->{o} $f->{g} $f->{acl} $f->{setuid} $f->{setgid} $f->{sticky}" ;
if ( $ self - > { null_files } { $ k } ) {
my $ oldfile = $ self - > { null_files } { $ k } { name } ;
my $ backup_filename = "$self->{this_backup}/$f->{name}" ;
if ( link ( $ oldfile , $ backup_filename ) ) {
$ self - > log ( 10 , "linked (empty)" ) ;
$ self - > { counts } { dup10 } + + ;
return $ success ;
}
}
}
2008-04-13 12:36:46 +02:00
# else request from da
unless ( $ self - > { file_pid } ) {
$ self - > { file_pid } = open2 ( $ self - > { file_dfd } , $ self - > { file_cfd } ,
"/usr/bin/ssh" ,
"-l" , "simba_da" ,
$ self - > { ssh_id_file } ? ( "-i" , $ self - > { ssh_id_file } ) : ( ) ,
$ self - > { target } - > { host } , "da" ) ;
}
$ self - > { file_cfd } - > printflush ( "get $self->{target}->{dir}/$f->{name}\n" ) ; # XXX - encode!
my $ header = $ self - > { file_dfd } - > getline ; # this should be the same as $_ - check?
if ( $ header =~ /^data (.*)/ ) {
my $ f2 = $ self - > parse ( $ 1 ) ;
my $ backup_filename = "$self->{this_backup}/$f->{name}" ;
2009-06-28 22:24:18 +02:00
my $ file_bfd ;
unless ( open ( $ file_bfd , '>:raw' , $ backup_filename ) ) {
$ self - > log ( 5 , "cannot open backup file $backup_filename: $!" ) ;
2010-05-18 10:03:45 +02:00
# XXX - There may be some errors from which we can recover, e.g., for
2009-06-28 22:24:18 +02:00
# "File name too long" we could just shorten the file name. But for
# now we just skip the file:
2010-05-18 10:03:45 +02:00
# XXX - some other errors are almost certainly fatal, e.g., ENOENT
# probably means that our backup device has been unmounted (BTDT).
2009-06-28 22:24:18 +02:00
$ self - > close_file_connection ;
return 0 ;
}
2008-04-13 12:36:46 +02:00
my $ size = $ f2 - > { s } ;
my $ err ;
2012-09-20 12:47:19 +02:00
my $ sha1 = Digest::SHA - > new ( 1 ) ;
2008-04-13 12:36:46 +02:00
while ( $ size > 0 ) {
my $ buffer ;
my $ rc = read ( $ self - > { file_dfd } , $ buffer , min ( $ size , $ BUFSIZE ) ) ;
if ( ! defined ( $ rc ) ) {
# I/O error
$ self - > log ( 5 , "error reading from data socket: $!" ) ;
last ;
} elsif ( $ rc == 0 ) {
# premature EOF.
$ self - > log ( 5 , "unexpected EOF reading from data socket" ) ;
last ;
2008-04-05 13:21:28 +02:00
}
2008-04-13 12:36:46 +02:00
$ file_bfd - > print ( $ buffer ) or die "write to backup failed: $!" ;
$ size -= length ( $ buffer ) ;
$ sha1 - > add ( $ buffer ) ;
}
close ( $ file_bfd ) or die "write to backup failed: $!" ;
my $ trailer = $ self - > { file_dfd } - > getline ; # should be empty line
$ trailer = $ self - > { file_dfd } - > getline ;
if ( $ trailer =~ /^fail / ) {
$ self - > log ( 5 , $ trailer ) ;
2008-06-20 08:19:18 +02:00
$ success = 0 ;
2008-04-13 12:36:46 +02:00
} elsif ( $ trailer =~ /^chk sha1 (\w+)/ ) {
my $ checksum = $ sha1 - > hexdigest ;
if ( $ checksum ne $ 1 ) {
$ self - > log ( 5 , "checksum error\n" ) ;
2008-04-05 13:21:28 +02:00
}
2008-04-13 12:36:46 +02:00
$ f - > { checksum } = $ checksum ;
2008-04-05 13:21:28 +02:00
} else {
2008-04-13 12:36:46 +02:00
$ self - > log ( 5 , "unexpected trailer $trailer\n" ) ;
2008-06-20 08:19:18 +02:00
$ self - > close_file_connection ;
$ success = 0 ;
2008-04-05 13:21:28 +02:00
}
2008-04-13 12:36:46 +02:00
unless ( $ self - > linkdup ( $ f , $ backup_filename ) ) {
$ self - > setmeta ( $ f ) ;
$ self - > log ( 10 , "stored" ) ;
}
2021-07-25 12:19:18 +02:00
if ( $ f - > { s } == 0 && $ f - > { t } eq 'f' ) {
no warnings 'uninitialized' ; # unix bits may not exist
my $ k = "$f->{m} $f->{o} $f->{g} $f->{acl} $f->{setuid} $f->{setgid} $f->{sticky}" ;
$ self - > { null_files } { $ k } { name } = $ backup_filename ;
}
2008-04-13 12:36:46 +02:00
} else {
$ self - > log ( 5 , "unexpected header $header\n" ) ;
2008-06-20 08:19:18 +02:00
$ self - > close_file_connection ;
$ success = 0 ;
2008-04-05 13:21:28 +02:00
}
2008-06-20 08:19:18 +02:00
return $ success ;
2008-04-05 13:21:28 +02:00
}
2013-11-26 22:33:11 +01:00
sub adjust_partitions {
my ( $ self ) = @ _ ;
my $ dbh = $ self - > { dbh } ;
my $ database = $ dbh - > selectrow_array ( "select database()" ) ;
my $ max_id = $ dbh - > selectrow_array ( "select max(id) from instances" ) ;
my ( $ max_part , $ limit )
= $ dbh - > selectrow_array (
" select partition_ordinal_position , partition_description
from information_schema . partitions
where table_schema = '$database' and table_name = 'instances'
order by partition_ordinal_position desc
limit 1 " ) ;
while ( $ max_id + $ self - > { instances_part_size } > $ limit ) {
$ max_part + + ;
$ limit += $ self - > { instances_part_size } ;
my $ partition_name = sprintf ( "p%03d" , $ max_part ) ;
$ dbh - > do ( "alter table instances add partition (partition $partition_name values less than ($limit))" ) ;
$ self - > log ( 3 , "added partition $partition_name < $limit" ) ;
}
}
2014-01-11 19:50:14 +01:00
sub export {
my ( $ self ) = @ _ ;
my $ dbh = $ self - > { dbh } ;
2017-01-28 15:06:34 +01:00
my $ exportdir = "/var/lib/simba/export" ;
2016-05-08 21:53:26 +02:00
mkdir ( $ exportdir ) ;
2014-01-11 19:50:14 +01:00
my $ sessions = $ dbh - > selectcol_arrayref ( "select id from sessions order by id" ) ;
for my $ session_id ( @$ sessions ) {
next if ( - f "$exportdir/$session_id.tsv"
|| - f "$exportdir/$session_id.tsv.gz" ) ;
$ self - > log ( 3 , "exporting session $session_id" ) ;
my $ sth
= $ dbh - > prepare (
qq{ select sessions.id as "sessions.id", start_date, end_date, prefix,
instances . id as "instances.id" , file_id , date , online ,
versions2 . id as "versions2.id" , file_type , file_size , file_mtime , file_owner , file_group , file_acl , file_unix_bits , file_rdev , checksum , file_linktarget ,
files . id as "files.id" , path ,
filesets . id as "filesets.id" , host , dir , options , pid
from sessions , instances , versions2 , files , filesets
where sessions . id = $ session_id
and sessions . id = instances . session
and instances . version = versions2 . id
and instances . file = files . id
and files . fileset = filesets . id
} ) ;
$ sth - > execute ;
2016-05-08 21:53:26 +02:00
open my $ fh , '>' , "$exportdir/$session_id.tsv.$$" or die "cannot create $exportdir/$session_id.tsv.$$: $!" ;
2014-01-11 19:50:14 +01:00
print $ fh join ( "\t" , @ { $ sth - > { NAME } } ) , "\n" ;
while ( my $ r = $ sth - > fetchrow_arrayref ) {
no warnings 'uninitialized' ;
2016-05-08 21:53:26 +02:00
print $ fh join ( "\t" , @$ r ) , "\n" or die "cannot write to $exportdir/$session_id.tsv.$$: $!" ;
2014-01-11 19:50:14 +01:00
}
close ( $ fh ) ;
2016-05-08 21:53:26 +02:00
rename "$exportdir/$session_id.tsv.$$" , "$exportdir/$session_id.tsv" or die "cannot rename $exportdir/$session_id.tsv.$$ to $exportdir/$session_id.tsv: $!" ;
2014-01-12 12:00:57 +01:00
system "/bin/gzip" , "$exportdir/$session_id.tsv" ;
2014-01-11 19:50:14 +01:00
}
}
2007-11-17 22:03:10 +01:00
sub DESTROY {
my ( $ self ) = @ _ ;
$ self - > { dbh } - > disconnect ( ) ;
}
2006-11-20 00:00:08 +01:00
# vim: tw=0 expandtab
1 ;