#!/usr/bin/perl
package Simba::CA;
use strict;
use warnings;

use Encode;
use IPC::Open2;
use POSIX qw(strftime);
use Simba::Util qw(quote unquote typestr);
use Readonly;
use Digest::SHA1;
use List::Util qw(min);
use IO::Handle;
use File::stat;
use Scalar::Util qw(tainted);

Readonly my $BUFSIZE => 128 * 1024;

sub new {
    my ($class, $opt) = @_;

    my $self = {};
    bless $self, $class;

    $self->{basedir} = '/backup';
    $self->{targets} = [
	{ host => 'localhost', dir => '/var/tmp', },
    ];
    $self->{unknown_uid} = 65533;
    $self->{unknown_gid} = 65533;
    $self->{fh_log}  = exists($opt->{fh_log}) ? $opt->{fh_log} : \*STDERR;
    $self->{log_level} = 99;

    return $self;
}

sub run {
    my ($self) = @_;

    # run sequentially for prototype. In production we probably
    # want some concurrency
    for my $target (@{$self->{targets}}) {
	$self->backup2disk($target);
    }
}

sub backup2disk {
    my ($self, $target) = @_;

    # get previous generation
    my @dirs = glob($self->{basedir} . '/????-??-??T??.??.??/' .  $target->{host} . '/' .  $target->{dir});

    $self->{last_backup} = $dirs[-1];
    $self->{last_backup} = $1 if $self->{last_backup} =~ /(.*)/; # detaint

    my $timestamp = $self->{timestamp} || strftime('%Y-%m-%dT%H.%M.%S', localtime);
    $self->{this_backup} = $self->{basedir} . "/$timestamp/" . $target->{host} . '/' .  $target->{dir};

    my ($list_pid, $list_cfd, $list_dfd);	# connection to get list of files
    my ($file_pid, $file_cfd, $file_dfd);	# connection to get content of files
    $list_pid = open2($list_dfd, $list_cfd, "/usr/bin/ssh", "-l", "simba_da", $target->{host}, "da");
    $list_cfd->printflush("list $target->{dir}\n");	# XXX - encode!
    close($list_cfd);
    while (<$list_dfd>) {
	# split into fields
	my $f = $self->parse($_);
	# if file is already present
	if ($f->{t} eq 'f') {
	    if($self->present($f)) {
		link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}") or die; # XXX
	    } else {

		# else request from da
		unless ($file_pid) {
		    $file_pid = open2($file_dfd, $file_cfd, "/usr/bin/ssh", "-l", "simba_da", $target->{host}, "da");
		}
		$file_cfd->printflush("get $target->{dir}/$f->{name}\n");	# XXX - encode!
                my $header = <$file_dfd>; # this should be the same as $_ - check?
                if ($header =~ /^data (.*)/) {
                    my $f2 = $self->parse($1);
                    open(my $file_bfd, '>:raw', "$self->{this_backup}/$f->{name}") or die; # XXX
                    my $size = $f2->{s};
                    my $err;
                    my $sha1 = Digest::SHA1->new;

                    while ($size > 0) {
                        my $buffer;
                        my $rc = read($file_dfd, $buffer, min($size, $BUFSIZE));
                        if (!defined($rc)) {
                            # I/O error
                            print STDERR "error reading from data socket: $!";
                            last;
                        } elsif ($rc == 0) {
                            # premature EOF.
                            print STDERR "unexpected EOF reading from data socket";
                            last;
                        }
                        $file_bfd->print($buffer);
                        $size -= length($buffer);
                        $sha1->add($buffer);
                    }
                    close($file_bfd);
                    my $trailer = <$file_dfd>; # should be empty line
                    $trailer = <$file_dfd>; 
                    if ($trailer =~ /^fail /) {
                        print STDERR $trailer;
                    } elsif ($trailer =~ /^chk sha1 (\w+)/) {
                        if ($sha1->hexdigest ne $1) {
                            print STDERR "checksum error\n";
                        }
                    } else {
                        print STDERR "unexpected trailer $trailer\n";
                    }
                    $self->setmeta($f);
                } else {
                    print STDERR "unexpected header $header\n";
                }
	    }
	} elsif ($f->{t} eq 'd') {
                my $d = "$self->{this_backup}/$f->{name}";
                $d =~ s,//+,/,g;
                mkdir_p($d) or die "cannot mkdir $d: $!"; # XXX
                $self->setmeta($f);
	} elsif ($f->{t} eq 'l') {
                my $l = "$self->{this_backup}/$f->{name}";
                symlink($f->{lt}, $l) or die "cannot symlink $l -> $f->{lt}: $!"; # XXX
                # $self->setmeta($f); ignore for symlinks. would need to use 
                # lchown, lchmod, etc.
	} else {
	    # create local copy (or insert into DB only?)
            print STDERR "ignored $_\n";
	}
	# insert into DB.
    }
}

sub parse {
    my ($self, $s) = @_;

    my @s = split(/ +/, $s);
    my $f = {};
    $f->{name} = shift @s;
    $f->{name} = $1 if ($f->{name} =~ /(.*)/);  # detaint XXX
    for (@s) {
	my ($k, $v) = split(/=/, $_, 2);
	$f->{$k} = $v;
	# special processing for permissions etc, here?
    }
    $f->{o} = unquote($f->{o});
    $f->{g} = unquote($f->{g});
    $f->{acl} = unquote($f->{acl});
    $f->{m} = $1 if $f->{m} =~ /^(\d+)$/;
    $f->{lt} = unquote($1) if defined $f->{lt} && $f->{lt} =~ /(.*)/;
    return $f;

}

sub present {
    my ($self, $f) = @_;
    return unless $self->{last_backup};
    my $st = lstat("$self->{last_backup}/$f->{name}");
    return unless $st;
    if ($st->mtime          == $f->{m} &&
	$st->size           == $f->{s} &&
	$st->uid            == $self->name2uid($f->{o}) &&
	$st->gid            == $self->name2gid($f->{g}) &&
	($st->mode & 07777) == $self->acl2mode($f)
    ) {
       	return 1;
    } else {
	return 0;
    }
}

sub mkdir_p {
    my ($dir, $perm) = @_;
    $perm = 0777 unless(defined($perm));

    if (-d $dir) {
	return 1;
    } elsif (mkdir($dir, $perm)) {
	return 1;
    } elsif ($!{ENOENT}) {
	my $parentdir = $dir;
	$parentdir =~ s|(.*)/.+|$1|;
	mkdir_p($parentdir, $perm);
        if (-d $dir) {
            return 1;
        } else {
            return mkdir($dir, $perm);
        }
    } else {
	return undef;
    }
}

sub basedir {
    my ($self, $dir) = @_;
    $self->{basedir} = $dir if defined($dir);
    return $self->{basedir};
}

sub targets {
    my ($self, $targets) = @_;
    $self->{targets} = $targets if defined($targets);
    return $self->{targets};
}

sub add_target {
    my ($self, $target) = @_;
    push @{ $self->{targets} }, $target;
    return $self->{targets};
}

my %permstrbits = (
    '---' => 0,
    '--x' => 1,
    '-w-' => 2,
    '-wx' => 3,
    'r--' => 4,
    'r-x' => 5,
    'rw-' => 6,
    'rwx' => 7,
);

sub setmeta {
    my ($self, $f) = @_;
    my $fn = "$self->{this_backup}/$f->{name}";
    print STDERR "$fn is tainted!" if tainted($fn);
    my $mode = $self->acl2mode($f);
    print STDERR "$mode is tainted!" if tainted($mode);
    chmod($mode, $fn);
    chown($self->name2uid($f->{o}), $self->name2gid($f->{g}), $fn);
    utime(time, $f->{m}, $fn);
}

# computes the mode from the acl (and the set[ug]id and sticky bits)
# and returns it. Optional ACL entries are currently ignored but should
# eventually be returned as a second value.

sub acl2mode {
    my ($self, $f) = @_;

    my $mode = 0;
    if ($f->{acl}) {
        for my $ace (split(',', $f->{acl})) {
            if ($ace =~ /^u::(...)$/) {
                $mode |= ($permstrbits{$1} << 6);
            } elsif ($ace =~ /^g::(...)$/) {
                $mode |= ($permstrbits{$1} << 3);
            } elsif ($ace =~ /^o:(...)$/) {
                $mode |= ($permstrbits{$1} << 0);
            } else {
                $self->log(5, "warning: unknown ACE $ace ignored");
            }
        }
    }
    if ($f->{setuid}) { $mode |= 04000 }
    if ($f->{setgid}) { $mode |= 02000 }
    if ($f->{sticky}) { $mode |= 01000 }

    return $mode;
}

my %ucache;
sub name2uid {
    my ($self, $uname) = @_;
    $uname = $1 if $uname =~ /(.*)/; # detaint
    return $ucache{$uname} if (defined $ucache{$uname});
    if ($uname =~ /^\d+$/) {
        return $ucache{$uname} = $uname;
    } else {
        my $uid = getpwnam($uname);
        if (defined($uid)) {
            return $ucache{$uname} = $uid;
        } else {
            return $ucache{$uname} = $self->{unknown_uid};
        }
    }
}

my %gcache;
sub name2gid {
    my ($self, $gname) = @_;
    $gname = $1 if $gname =~ /(.*)/; # detaint
    return $gcache{$gname} if (defined $gcache{$gname});
    if ($gname =~ /^\d+$/) {
        return $gcache{$gname} = $gname;
    } else {
        my $gid = getgrnam($gname);
        if (defined($gid)) {
            return $gcache{$gname} = $gid;
        } else {
            return $gcache{$gname} = $self->{unknown_gid};
        }
    }
}

sub log {
    my ($self, $level, $msg) = @_;
    if ($level <= $self->{log_level}) {
        $self->{fh_log}->print(strftime("%Y-%m-%dT%H:%M:%S%z", localtime), " $$ [$level]: $msg\n");
    }
}

sub log_level {
    my ($self, $log_level) = @_;
    $self->{log_level}  = $log_level if defined($log_level);
    return $self->{log_level};
}

# vim: tw=0 expandtab
1;