#!/usr/bin/perl package Simba::CA; use strict; use warnings; use Encode; use IPC::Open2; use POSIX qw(strftime); use Simba::Util qw(quote unquote typestr); use Readonly; use Digest::SHA1; use List::Util qw(min); use IO::Handle; use File::stat; use Scalar::Util qw(tainted); use DBI; Readonly my $BUFSIZE => 128 * 1024; sub new { my ($class, $opt) = @_; my $self = {}; bless $self, $class; $self->{basedir} = '/backup'; $self->{unknown_uid} = 65533; $self->{unknown_gid} = 65533; $self->{fh_log} = exists($opt->{fh_log}) ? $opt->{fh_log} : \*STDERR; $self->{log_level} = 99; if ($opt->{dbi}) { $self->{dbh} = DBI->connect(@{ $opt->{dbi} }, { AutoCommit => 0, PrintError => 1, RaiseError => 1 } ); } elsif ($opt->{dbi_file}) { my $fn = $opt->{dbi_file}; open(FN, "<$fn") or die "cannot open $fn: $!"; my $line = ; close(FN); my @cred = split(/[\s\n]+/, $line); $self->{dbh} = DBI->connect(@cred, { AutoCommit => 0, PrintError => 1, RaiseError => 1 } ); } $self->{targets} = $self->{dbh}->selectall_arrayref("select * from filesets", { Slice => {} }); if ($ENV{HOME} =~ m{([/\w]*)}) { if (-f "$1/.ssh/id_rsa") { if (my $st = stat("$1/.ssh/id_rsa")) { if ($st->uid == $>) { $self->{ssh_id_file} = "$1/.ssh/id_rsa"; } } } } return $self; } sub run { my ($self) = @_; # run sequentially for prototype. In production we probably # want some concurrency for my $target (@{$self->{targets}}) { $self->backup2disk($target); } } sub backup2disk { my ($self, $target) = @_; # get previous generation my @dirs = glob($self->{basedir} . '/????-??-??T??.??.??/' . $target->{host} . '/' . $target->{dir}); $self->{last_backup} = $dirs[-1]; $self->{last_backup} = $1 if $self->{last_backup} =~ /(.*)/; # detaint my $timestamp = $self->{timestamp} || strftime('%Y-%m-%dT%H.%M.%S', localtime); $self->{this_backup} = $self->{basedir} . "/$timestamp/" . $target->{host} . '/' . $target->{dir}; $self->new_session(); my ($list_pid, $list_cfd, $list_dfd); # connection to get list of files my ($file_pid, $file_cfd, $file_dfd); # connection to get content of files $list_pid = open2($list_dfd, $list_cfd, "/usr/bin/ssh", "-l", "simba_da", $self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (), $target->{host}, "da"); $list_cfd->printflush("list $target->{dir}\n"); # XXX - encode! close($list_cfd); while (<$list_dfd>) { chomp; $self->log(10, "file: $_"); # split into fields chomp; my $f = $self->parse($_); # if file is already present if ($f->{t} eq 'f') { if($self->present($f)) { link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}") or die; # XXX $self->log(10, "linked"); } else { # else request from da unless ($file_pid) { $file_pid = open2($file_dfd, $file_cfd, "/usr/bin/ssh", "-l", "simba_da", $self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (), $target->{host}, "da"); } $file_cfd->printflush("get $target->{dir}/$f->{name}\n"); # XXX - encode! my $header = <$file_dfd>; # this should be the same as $_ - check? if ($header =~ /^data (.*)/) { my $f2 = $self->parse($1); my $backup_filename = "$self->{this_backup}/$f->{name}"; open(my $file_bfd, '>:raw', $backup_filename) or die "cannot open backup file $backup_filename: $!"; my $size = $f2->{s}; my $err; my $sha1 = Digest::SHA1->new; while ($size > 0) { my $buffer; my $rc = read($file_dfd, $buffer, min($size, $BUFSIZE)); if (!defined($rc)) { # I/O error $self->log(5, "error reading from data socket: $!"); last; } elsif ($rc == 0) { # premature EOF. $self->log(5, "unexpected EOF reading from data socket"); last; } $file_bfd->print($buffer) or die "write to backup failed: $!"; $size -= length($buffer); $sha1->add($buffer); } close($file_bfd) or die "write to backup failed: $!"; my $trailer = <$file_dfd>; # should be empty line $trailer = <$file_dfd>; if ($trailer =~ /^fail /) { $self->log(5, $trailer); } elsif ($trailer =~ /^chk sha1 (\w+)/) { my $checksum = $sha1->hexdigest; if ($checksum ne $1) { $self->log(5, "checksum error\n"); } $f->{checksum} = $checksum; } else { $self->log(5, "unexpected trailer $trailer\n"); } $self->setmeta($f); $self->log(10, "stored"); } else { $self->log(5, "unexpected header $header\n"); } } } elsif ($f->{t} eq 'd') { my $d = "$self->{this_backup}/$f->{name}"; $d =~ s,//+,/,g; mkdir_p($d) or die "cannot mkdir $d: $!"; # XXX $self->setmeta($f); } elsif ($f->{t} eq 'l') { my $l = "$self->{this_backup}/$f->{name}"; unless (symlink($f->{lt}, $l)) { die "cannot symlink $l -> $f->{lt}: $!"; # XXX } # $self->setmeta($f); ignore for symlinks. would need to use # lchown, lchmod, etc. } else { # create local copy (or insert into DB only?) $self->log(5, "ignored $_\n"); } # insert into DB. $self->db_record_version($target, $f); } $self->close_session(); } sub parse { my ($self, $s) = @_; my @s = split(/ +/, $s); my $f = {}; $f->{name} = shift @s; $f->{name} = $1 if ($f->{name} =~ /(.*)/); # detaint XXX for (@s) { my ($k, $v) = split(/=/, $_, 2); $f->{$k} = $v; # special processing for permissions etc, here? } $f->{o} = unquote($f->{o}); $f->{g} = unquote($f->{g}); $f->{acl} = unquote($f->{acl}); $f->{m} = $1 if $f->{m} =~ /^(\d+)$/; $f->{lt} = unquote($1) if defined $f->{lt} && $f->{lt} =~ /(.*)/; return $f; } sub present { my ($self, $f) = @_; return unless $self->{last_backup}; my $st = lstat("$self->{last_backup}/$f->{name}"); return unless $st; if ($st->mtime == $f->{m} && $st->size == $f->{s} && $st->uid == $self->name2uid($f->{o}) && $st->gid == $self->name2gid($f->{g}) && ($st->mode & 07777) == $self->acl2mode($f) ) { return 1; } else { return 0; } } sub mkdir_p { my ($dir, $perm) = @_; $perm = 0777 unless(defined($perm)); if (-d $dir) { return 1; } elsif (mkdir($dir, $perm)) { return 1; } elsif ($!{ENOENT}) { my $parentdir = $dir; $parentdir =~ s|(.*)/.+|$1|; mkdir_p($parentdir, $perm); if (-d $dir) { return 1; } else { return mkdir($dir, $perm); } } else { return undef; } } sub basedir { my ($self, $dir) = @_; $self->{basedir} = $dir if defined($dir); return $self->{basedir}; } sub targets { my ($self, $targets) = @_; $self->{targets} = $targets if defined($targets); return $self->{targets}; } sub add_target { my ($self, $target) = @_; push @{ $self->{targets} }, $target; return $self->{targets}; } my %permstrbits = ( '---' => 0, '--x' => 1, '-w-' => 2, '-wx' => 3, 'r--' => 4, 'r-x' => 5, 'rw-' => 6, 'rwx' => 7, ); sub setmeta { my ($self, $f) = @_; my $fn = "$self->{this_backup}/$f->{name}"; $self->log(3, "$fn is tainted!") if tainted($fn); my $mode = $self->acl2mode($f); $self->log(3, "$mode is tainted!") if tainted($mode); chmod($mode, $fn); chown($self->name2uid($f->{o}), $self->name2gid($f->{g}), $fn); utime(time, $f->{m}, $fn); } # computes the mode from the acl (and the set[ug]id and sticky bits) # and returns it. Optional ACL entries are currently ignored but should # eventually be returned as a second value. sub acl2mode { my ($self, $f) = @_; my $mode = 0; if ($f->{acl}) { for my $ace (split(',', $f->{acl})) { if ($ace =~ /^u::(...)$/) { $mode |= ($permstrbits{$1} << 6); } elsif ($ace =~ /^g::(...)$/) { $mode |= ($permstrbits{$1} << 3); } elsif ($ace =~ /^o:(...)$/) { $mode |= ($permstrbits{$1} << 0); } else { $self->log(5, "warning: unknown ACE $ace ignored"); } } } if ($f->{setuid}) { $mode |= 04000 } if ($f->{setgid}) { $mode |= 02000 } if ($f->{sticky}) { $mode |= 01000 } return $mode; } my %ucache; sub name2uid { my ($self, $uname) = @_; $uname = $1 if $uname =~ /(.*)/; # detaint return $ucache{$uname} if (defined $ucache{$uname}); if ($uname =~ /^\d+$/) { return $ucache{$uname} = $uname; } else { my $uid = getpwnam($uname); if (defined($uid)) { return $ucache{$uname} = $uid; } else { return $ucache{$uname} = $self->{unknown_uid}; } } } my %gcache; sub name2gid { my ($self, $gname) = @_; $gname = $1 if $gname =~ /(.*)/; # detaint return $gcache{$gname} if (defined $gcache{$gname}); if ($gname =~ /^\d+$/) { return $gcache{$gname} = $gname; } else { my $gid = getgrnam($gname); if (defined($gid)) { return $gcache{$gname} = $gid; } else { return $gcache{$gname} = $self->{unknown_gid}; } } } sub log { my ($self, $level, $msg) = @_; if ($level <= $self->{log_level}) { $self->{fh_log}->print(strftime("%Y-%m-%dT%H:%M:%S%z", localtime), " $$ [$level]: $msg\n") or die "write to log failed: $!"; } } sub log_level { my ($self, $log_level) = @_; $self->{log_level} = $log_level if defined($log_level); return $self->{log_level}; } sub db_record_version { my ($self, $target, $f) = @_; my $db_f = $self->{dbh}->selectall_arrayref("select * from files where fileset=? and path=?", { Slice => {} }, $target->{id}, $f->{name}); unless (@$db_f) { $self->{dbh}->do("insert into files(fileset, path) values(?, ?)", {}, $target->{id}, $f->{name}); $db_f = $self->{dbh}->selectall_arrayref("select * from files where fileset=? and path=?", { Slice => {} }, $target->{id}, $f->{name}); } $self->{dbh}->do("insert into versions(file, file_id, file_type, file_size, file_mtime, file_owner, file_group, file_acl, file_unix_bits, file_rdev, date, checksum, online, file_linktarget, session) values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", {}, $db_f->[0]{id}, $f->{id}, $f->{t}, $f->{s}, $f->{m}, $f->{o}, $f->{g}, $f->{acl}, join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)), $f->{rdev}, time(), $f->{checksum}, 1, $f->{lt}, $self->{session_id} ); } sub new_session { my ($self) = @_; $self->{dbh}->do("insert into sessions(start_date, prefix) values(?, ?)", {}, time(), $self->{this_backup}); $self->{session_id} = $self->{dbh}->{'mysql_insertid'}; } sub close_session { my ($self) = @_; $self->{dbh}->do("update sessions set end_date=? where id=?", {}, time(), $self->{session_id}); } # vim: tw=0 expandtab 1;