simba/lib/Simba/CA.pm

400 lines
12 KiB
Perl
Raw Normal View History

#!/usr/bin/perl
2006-10-02 12:56:36 +02:00
package Simba::CA;
use strict;
use warnings;
2006-10-02 12:56:36 +02:00
use Encode;
use IPC::Open2;
use POSIX qw(strftime);
use Simba::Util qw(quote unquote typestr);
use Readonly;
use Digest::SHA1;
use List::Util qw(min);
use IO::Handle;
use File::stat;
use Scalar::Util qw(tainted);
use DBI;
Readonly my $BUFSIZE => 128 * 1024;
2006-10-02 12:56:36 +02:00
sub new {
2006-11-30 15:50:23 +01:00
my ($class, $opt) = @_;
2006-10-02 12:56:36 +02:00
my $self = {};
bless $self, $class;
2006-11-20 12:26:30 +01:00
$self->{basedir} = '/backup';
2006-11-30 15:50:23 +01:00
$self->{unknown_uid} = 65533;
$self->{unknown_gid} = 65533;
$self->{fh_log} = exists($opt->{fh_log}) ? $opt->{fh_log} : \*STDERR;
$self->{log_level} = 99;
2007-06-17 23:40:04 +02:00
if ($opt->{dbi}) {
$self->{dbh} = DBI->connect(@{ $opt->{dbi} },
{ AutoCommit => 0,
PrintError => 1,
RaiseError => 1
}
);
} elsif ($opt->{dbi_file}) {
my $fn = $opt->{dbi_file};
open(FN, "<$fn") or die "cannot open $fn: $!";
my $line = <FN>;
close(FN);
my @cred = split(/[\s\n]+/, $line);
$self->{dbh} = DBI->connect(@cred,
{ AutoCommit => 0,
PrintError => 1,
RaiseError => 1
}
);
}
$self->{targets} = $self->{dbh}->selectall_arrayref("select * from filesets", { Slice => {} });
if ($ENV{HOME} =~ m{([/\w]*)}) {
if (-f "$1/.ssh/id_rsa") {
if (my $st = stat("$1/.ssh/id_rsa")) {
if ($st->uid == $>) {
$self->{ssh_id_file} = "$1/.ssh/id_rsa";
}
}
}
}
2006-10-02 12:56:36 +02:00
return $self;
}
sub run {
my ($self) = @_;
# run sequentially for prototype. In production we probably
# want some concurrency
for my $target (@{$self->{targets}}) {
$self->backup2disk($target);
}
}
sub backup2disk {
my ($self, $target) = @_;
# get previous generation
my @dirs = glob($self->{basedir} . '/????-??-??T??.??.??/' . $target->{host} . '/' . $target->{dir});
$self->{last_backup} = $dirs[-1];
$self->{last_backup} = $1 if $self->{last_backup} =~ /(.*)/; # detaint
my $timestamp = $self->{timestamp} || strftime('%Y-%m-%dT%H.%M.%S', localtime);
$self->{this_backup} = $self->{basedir} . "/$timestamp/" . $target->{host} . '/' . $target->{dir};
2006-10-02 12:56:36 +02:00
my ($list_pid, $list_cfd, $list_dfd); # connection to get list of files
my ($file_pid, $file_cfd, $file_dfd); # connection to get content of files
$list_pid = open2($list_dfd, $list_cfd,
"/usr/bin/ssh",
"-l", "simba_da",
$self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (),
$target->{host}, "da");
$list_cfd->printflush("list $target->{dir}\n"); # XXX - encode!
close($list_cfd);
2006-10-02 12:56:36 +02:00
while (<$list_dfd>) {
2007-06-18 03:00:29 +02:00
chomp;
$self->log(10, "file: $_");
2006-10-02 12:56:36 +02:00
# split into fields
chomp;
2006-10-02 12:56:36 +02:00
my $f = $self->parse($_);
# if file is already present
if ($f->{t} eq 'f') {
if($self->present($f)) {
link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}") or die; # XXX
2007-06-18 03:00:29 +02:00
$self->log(10, "linked");
2006-10-02 12:56:36 +02:00
} else {
# else request from da
unless ($file_pid) {
$file_pid = open2($file_dfd, $file_cfd,
"/usr/bin/ssh",
"-l", "simba_da",
$self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (),
$target->{host}, "da");
}
$file_cfd->printflush("get $target->{dir}/$f->{name}\n"); # XXX - encode!
my $header = <$file_dfd>; # this should be the same as $_ - check?
if ($header =~ /^data (.*)/) {
my $f2 = $self->parse($1);
open(my $file_bfd, '>:raw', "$self->{this_backup}/$f->{name}") or die; # XXX
my $size = $f2->{s};
my $err;
my $sha1 = Digest::SHA1->new;
while ($size > 0) {
my $buffer;
my $rc = read($file_dfd, $buffer, min($size, $BUFSIZE));
if (!defined($rc)) {
# I/O error
2007-06-18 03:00:29 +02:00
$self->log(5, "error reading from data socket: $!");
last;
} elsif ($rc == 0) {
# premature EOF.
2007-06-18 03:00:29 +02:00
$self->log(5, "unexpected EOF reading from data socket");
last;
}
$file_bfd->print($buffer);
$size -= length($buffer);
$sha1->add($buffer);
}
close($file_bfd);
my $trailer = <$file_dfd>; # should be empty line
$trailer = <$file_dfd>;
if ($trailer =~ /^fail /) {
2007-06-18 03:00:29 +02:00
$self->log(5, $trailer);
} elsif ($trailer =~ /^chk sha1 (\w+)/) {
my $checksum = $sha1->hexdigest;
if ($checksum ne $1) {
2007-06-18 03:00:29 +02:00
$self->log(5, "checksum error\n");
}
$f->{checksum} = $checksum;
} else {
2007-06-18 03:00:29 +02:00
$self->log(5, "unexpected trailer $trailer\n");
}
$self->setmeta($f);
2007-06-18 03:00:29 +02:00
$self->log(10, "stored");
} else {
2007-06-18 03:00:29 +02:00
$self->log(5, "unexpected header $header\n");
}
2006-10-02 12:56:36 +02:00
}
} elsif ($f->{t} eq 'd') {
my $d = "$self->{this_backup}/$f->{name}";
$d =~ s,//+,/,g;
mkdir_p($d) or die "cannot mkdir $d: $!"; # XXX
$self->setmeta($f);
2006-11-28 17:27:59 +01:00
} elsif ($f->{t} eq 'l') {
my $l = "$self->{this_backup}/$f->{name}";
2007-06-17 23:58:57 +02:00
unless (symlink($f->{lt}, $l)) {
die "cannot symlink $l -> $f->{lt}: $!"; # XXX
}
2006-11-28 17:27:59 +01:00
# $self->setmeta($f); ignore for symlinks. would need to use
# lchown, lchmod, etc.
2006-10-02 12:56:36 +02:00
} else {
# create local copy (or insert into DB only?)
2007-06-18 03:00:29 +02:00
$self->log(5, "ignored $_\n");
2006-10-02 12:56:36 +02:00
}
# insert into DB.
$self->db_record_version($target, $f);
2006-10-02 12:56:36 +02:00
}
}
sub parse {
my ($self, $s) = @_;
my @s = split(/ +/, $s);
my $f = {};
$f->{name} = shift @s;
$f->{name} = $1 if ($f->{name} =~ /(.*)/); # detaint XXX
for (@s) {
my ($k, $v) = split(/=/, $_, 2);
$f->{$k} = $v;
# special processing for permissions etc, here?
}
$f->{o} = unquote($f->{o});
$f->{g} = unquote($f->{g});
$f->{acl} = unquote($f->{acl});
$f->{m} = $1 if $f->{m} =~ /^(\d+)$/;
2006-11-30 14:55:53 +01:00
$f->{lt} = unquote($1) if defined $f->{lt} && $f->{lt} =~ /(.*)/;
return $f;
}
sub present {
my ($self, $f) = @_;
return unless $self->{last_backup};
my $st = lstat("$self->{last_backup}/$f->{name}");
return unless $st;
if ($st->mtime == $f->{m} &&
$st->size == $f->{s} &&
2006-11-30 15:50:23 +01:00
$st->uid == $self->name2uid($f->{o}) &&
$st->gid == $self->name2gid($f->{g}) &&
($st->mode & 07777) == $self->acl2mode($f)
) {
return 1;
} else {
return 0;
}
}
sub mkdir_p {
my ($dir, $perm) = @_;
$perm = 0777 unless(defined($perm));
if (-d $dir) {
return 1;
} elsif (mkdir($dir, $perm)) {
return 1;
} elsif ($!{ENOENT}) {
my $parentdir = $dir;
$parentdir =~ s|(.*)/.+|$1|;
mkdir_p($parentdir, $perm);
if (-d $dir) {
return 1;
} else {
return mkdir($dir, $perm);
}
} else {
return undef;
}
}
2006-11-20 12:26:30 +01:00
sub basedir {
my ($self, $dir) = @_;
$self->{basedir} = $dir if defined($dir);
return $self->{basedir};
}
sub targets {
my ($self, $targets) = @_;
$self->{targets} = $targets if defined($targets);
return $self->{targets};
}
sub add_target {
my ($self, $target) = @_;
push @{ $self->{targets} }, $target;
return $self->{targets};
}
my %permstrbits = (
'---' => 0,
'--x' => 1,
'-w-' => 2,
'-wx' => 3,
'r--' => 4,
'r-x' => 5,
'rw-' => 6,
'rwx' => 7,
);
sub setmeta {
my ($self, $f) = @_;
my $fn = "$self->{this_backup}/$f->{name}";
2007-06-18 03:00:29 +02:00
$self->log(3, "$fn is tainted!") if tainted($fn);
my $mode = $self->acl2mode($f);
2007-06-18 03:00:29 +02:00
$self->log(3, "$mode is tainted!") if tainted($mode);
chmod($mode, $fn);
2006-11-30 15:50:23 +01:00
chown($self->name2uid($f->{o}), $self->name2gid($f->{g}), $fn);
utime(time, $f->{m}, $fn);
}
# computes the mode from the acl (and the set[ug]id and sticky bits)
# and returns it. Optional ACL entries are currently ignored but should
# eventually be returned as a second value.
sub acl2mode {
my ($self, $f) = @_;
my $mode = 0;
if ($f->{acl}) {
for my $ace (split(',', $f->{acl})) {
if ($ace =~ /^u::(...)$/) {
$mode |= ($permstrbits{$1} << 6);
} elsif ($ace =~ /^g::(...)$/) {
$mode |= ($permstrbits{$1} << 3);
} elsif ($ace =~ /^o:(...)$/) {
$mode |= ($permstrbits{$1} << 0);
} else {
$self->log(5, "warning: unknown ACE $ace ignored");
}
}
}
if ($f->{setuid}) { $mode |= 04000 }
if ($f->{setgid}) { $mode |= 02000 }
if ($f->{sticky}) { $mode |= 01000 }
return $mode;
}
my %ucache;
sub name2uid {
2006-11-30 15:50:23 +01:00
my ($self, $uname) = @_;
$uname = $1 if $uname =~ /(.*)/; # detaint
return $ucache{$uname} if (defined $ucache{$uname});
if ($uname =~ /^\d+$/) {
return $ucache{$uname} = $uname;
} else {
my $uid = getpwnam($uname);
if (defined($uid)) {
return $ucache{$uname} = $uid;
} else {
2006-11-30 15:50:23 +01:00
return $ucache{$uname} = $self->{unknown_uid};
}
}
}
my %gcache;
sub name2gid {
2006-11-30 15:50:23 +01:00
my ($self, $gname) = @_;
$gname = $1 if $gname =~ /(.*)/; # detaint
return $gcache{$gname} if (defined $gcache{$gname});
if ($gname =~ /^\d+$/) {
return $gcache{$gname} = $gname;
} else {
my $gid = getgrnam($gname);
if (defined($gid)) {
return $gcache{$gname} = $gid;
} else {
2006-11-30 15:50:23 +01:00
return $gcache{$gname} = $self->{unknown_gid};
}
}
}
2006-11-30 15:50:23 +01:00
sub log {
my ($self, $level, $msg) = @_;
if ($level <= $self->{log_level}) {
$self->{fh_log}->print(strftime("%Y-%m-%dT%H:%M:%S%z", localtime), " $$ [$level]: $msg\n");
}
}
sub log_level {
my ($self, $log_level) = @_;
$self->{log_level} = $log_level if defined($log_level);
return $self->{log_level};
}
sub db_record_version {
my ($self, $target, $f) = @_;
my $db_f = $self->{dbh}->selectall_arrayref("select * from files where fileset=? and path=?",
{ Slice => {} },
$target->{id}, $f->{name});
unless (@$db_f) {
$self->{dbh}->do("insert into files(fileset, path) values(?, ?)",
{},
$target->{id}, $f->{name});
$db_f = $self->{dbh}->selectall_arrayref("select * from files where fileset=? and path=?",
{ Slice => {} },
$target->{id}, $f->{name});
}
$self->{dbh}->do("insert into versions(file,
file_id, file_type, file_size, file_mtime,
file_owner, file_group, file_acl,
file_unix_bits,
file_rdev,
date, checksum, online, file_linktarget)
values(?,
?, ?, ?, ?,
?, ?, ?,
?,
?,
?, ?, ?, ?)",
{},
$db_f->[0]{id},
$f->{id}, $f->{t}, $f->{s}, $f->{m},
$f->{o}, $f->{g}, $f->{acl},
join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)),
$f->{rdev},
time(),
$f->{checksum},
1,
$f->{lt}
);
}
# vim: tw=0 expandtab
1;