#!/usr/bin/perl
package Simba::CA;
use strict;
use warnings;

use Encode;
use IPC::Open2;
use POSIX qw(strftime);
use Simba::Util qw(quote unquote typestr);
use Readonly;
use Digest::SHA1;
use List::Util qw(min);
use IO::Handle;
use File::stat;
use Scalar::Util qw(tainted);
use DBI;

Readonly my $BUFSIZE => 128 * 1024;

sub new {
    my ($class, $opt) = @_;

    my $self = {};
    bless $self, $class;

    $self->{basedir} = '/backup';
    $self->{unknown_uid} = 65533;
    $self->{unknown_gid} = 65533;
    $self->{fh_log}  = exists($opt->{fh_log}) ? $opt->{fh_log} : \*STDERR;
    $self->{log_level} = 99;
    if ($opt->{dbi}) {
        $self->{dbh} = DBI->connect(@{ $opt->{dbi} },
                                       { AutoCommit => 0,
                                         PrintError => 1, 
                                         RaiseError => 1
                                       }
                           );
    } elsif ($opt->{dbi_file}) {
        my $fn = $opt->{dbi_file};
        open(FN, "<$fn") or die "cannot open $fn: $!";
        my $line = <FN>;
        close(FN);
        my @cred = split(/[\s\n]+/, $line);
        $self->{dbh} = DBI->connect(@cred,
                                       { AutoCommit => 0,
                                         PrintError => 1, 
                                         RaiseError => 1
                                       }
                           );
    }
    $self->{targets} = $self->{dbh}->selectall_arrayref("select * from filesets", { Slice => {} });
    if ($ENV{HOME} =~ m{([/\w]*)}) {
        if (-f "$1/.ssh/id_rsa") {
            if (my $st = stat("$1/.ssh/id_rsa")) {
                if ($st->uid == $>) {
                    $self->{ssh_id_file} = "$1/.ssh/id_rsa";
                }
            }
        }
    }

    return $self;
}

sub run {
    my ($self) = @_;

    # run sequentially for prototype. In production we probably
    # want some concurrency
    for my $target (@{$self->{targets}}) {
	$self->backup2disk($target);
    }
}

sub backup2disk {
    my ($self, $target) = @_;

    # get previous generation
    my @dirs = glob($self->{basedir} . '/????-??-??T??.??.??/' .  $target->{host} . '/' .  $target->{dir});

    $self->{last_backup} = $dirs[-1];
    $self->{last_backup} = $1 if $self->{last_backup} =~ /(.*)/; # detaint
    $self->{last_backup_id} = $self->get_last_session_id();

    my $timestamp = $self->{timestamp} || strftime('%Y-%m-%dT%H.%M.%S', localtime);
    $self->{this_backup} = $self->{basedir} . "/$timestamp/" . $target->{host} . '/' .  $target->{dir};
    $self->new_session();

    my ($list_pid, $list_cfd, $list_dfd);	# connection to get list of files
    my ($file_pid, $file_cfd, $file_dfd);	# connection to get content of files
    $list_pid = open2($list_dfd, $list_cfd,
                      "/usr/bin/ssh",
                      "-l", "simba_da",
                      $self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (),
                      $target->{host}, "da");
    $list_cfd->printflush("list $target->{dir}\n");	# XXX - encode!
    close($list_cfd);
    while (<$list_dfd>) {
        chomp;
        $self->log(10, "file: $_");
	# split into fields
        chomp;
	my $f = $self->parse($_);
	# if file is already present
	if ($f->{t} eq 'f') {
	    if($self->present($f)) {
		link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}") or die; # XXX
                $self->log(10, "linked");
	    } else {

		# else request from da
		unless ($file_pid) {
		    $file_pid = open2($file_dfd, $file_cfd,
                                      "/usr/bin/ssh",
                                      "-l", "simba_da",
                                      $self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (),
                                      $target->{host}, "da");
		}
		$file_cfd->printflush("get $target->{dir}/$f->{name}\n");	# XXX - encode!
                my $header = <$file_dfd>; # this should be the same as $_ - check?
                if ($header =~ /^data (.*)/) {
                    my $f2 = $self->parse($1);
                    my $backup_filename = "$self->{this_backup}/$f->{name}";
                    open(my $file_bfd, '>:raw', $backup_filename) or die "cannot open backup file $backup_filename: $!";
                    my $size = $f2->{s};
                    my $err;
                    my $sha1 = Digest::SHA1->new;

                    while ($size > 0) {
                        my $buffer;
                        my $rc = read($file_dfd, $buffer, min($size, $BUFSIZE));
                        if (!defined($rc)) {
                            # I/O error
                            $self->log(5, "error reading from data socket: $!");
                            last;
                        } elsif ($rc == 0) {
                            # premature EOF.
                            $self->log(5, "unexpected EOF reading from data socket");
                            last;
                        }
                        $file_bfd->print($buffer) or die "write to backup failed: $!";
                        $size -= length($buffer);
                        $sha1->add($buffer);
                    }
                    close($file_bfd) or die "write to backup failed: $!";
                    my $trailer = <$file_dfd>; # should be empty line
                    $trailer = <$file_dfd>; 
                    if ($trailer =~ /^fail /) {
                        $self->log(5, $trailer);
                    } elsif ($trailer =~ /^chk sha1 (\w+)/) {
                        my $checksum = $sha1->hexdigest;
                        if ($checksum ne $1) {
                            $self->log(5, "checksum error\n");
                        }
                        $f->{checksum} = $checksum;
                    } else {
                        $self->log(5, "unexpected trailer $trailer\n");
                    }
                    $self->setmeta($f);
                    $self->log(10, "stored");
                } else {
                    $self->log(5, "unexpected header $header\n");
                }
	    }
	} elsif ($f->{t} eq 'd') {
                my $d = "$self->{this_backup}/$f->{name}";
                $d =~ s,//+,/,g;
                mkdir_p($d) or die "cannot mkdir $d: $!"; # XXX
                $self->setmeta($f);
	} elsif ($f->{t} eq 'l') {
                my $l = "$self->{this_backup}/$f->{name}";
                unless (symlink($f->{lt}, $l)) {
                    die "cannot symlink $l -> $f->{lt}: $!"; # XXX
                }
                # $self->setmeta($f); ignore for symlinks. would need to use 
                # lchown, lchmod, etc.
	} else {
	    # create local copy (or insert into DB only?)
            $self->log(5, "ignored $_\n");
	}
	# insert into DB.
        $self->db_record_version($target, $f);
    }
    $self->close_session();
}

sub parse {
    my ($self, $s) = @_;

    my @s = split(/ +/, $s);
    my $f = {};
    $f->{name} = shift @s;
    $f->{name} = $1 if ($f->{name} =~ /(.*)/);  # detaint XXX
    for (@s) {
	my ($k, $v) = split(/=/, $_, 2);
	$f->{$k} = $v;
	# special processing for permissions etc, here?
    }
    $f->{o} = unquote($f->{o});
    $f->{g} = unquote($f->{g});
    $f->{acl} = unquote($f->{acl});
    $f->{m} = $1 if $f->{m} =~ /^(\d+)$/;
    $f->{lt} = unquote($1) if defined $f->{lt} && $f->{lt} =~ /(.*)/;
    return $f;

}

sub present {
    my ($self, $f) = @_;
    return unless $self->{last_backup};
    my $st = lstat("$self->{last_backup}/$f->{name}");
    return unless $st;
    if ($st->mtime          == $f->{m} &&
	$st->size           == $f->{s} &&
	$st->uid            == $self->name2uid($f->{o}) &&
	$st->gid            == $self->name2gid($f->{g}) &&
	($st->mode & 07777) == $self->acl2mode($f)
    ) {
       	return 1;
    } else {
	return 0;
    }
}

sub mkdir_p {
    my ($dir, $perm) = @_;
    $perm = 0777 unless(defined($perm));

    if (-d $dir) {
	return 1;
    } elsif (mkdir($dir, $perm)) {
	return 1;
    } elsif ($!{ENOENT}) {
	my $parentdir = $dir;
	$parentdir =~ s|(.*)/.+|$1|;
	mkdir_p($parentdir, $perm);
        if (-d $dir) {
            return 1;
        } else {
            return mkdir($dir, $perm);
        }
    } else {
	return undef;
    }
}

sub basedir {
    my ($self, $dir) = @_;
    $self->{basedir} = $dir if defined($dir);
    return $self->{basedir};
}

sub targets {
    my ($self, $targets) = @_;
    $self->{targets} = $targets if defined($targets);
    return $self->{targets};
}

sub add_target {
    my ($self, $target) = @_;
    push @{ $self->{targets} }, $target;
    return $self->{targets};
}

my %permstrbits = (
    '---' => 0,
    '--x' => 1,
    '-w-' => 2,
    '-wx' => 3,
    'r--' => 4,
    'r-x' => 5,
    'rw-' => 6,
    'rwx' => 7,
);

sub setmeta {
    my ($self, $f) = @_;
    my $fn = "$self->{this_backup}/$f->{name}";
    $self->log(3, "$fn is tainted!") if tainted($fn);
    my $mode = $self->acl2mode($f);
    $self->log(3, "$mode is tainted!") if tainted($mode);
    chmod($mode, $fn);
    chown($self->name2uid($f->{o}), $self->name2gid($f->{g}), $fn);
    utime(time, $f->{m}, $fn);
}

# computes the mode from the acl (and the set[ug]id and sticky bits)
# and returns it. Optional ACL entries are currently ignored but should
# eventually be returned as a second value.

sub acl2mode {
    my ($self, $f) = @_;

    my $mode = 0;
    if ($f->{acl}) {
        for my $ace (split(',', $f->{acl})) {
            if ($ace =~ /^u::(...)$/) {
                $mode |= ($permstrbits{$1} << 6);
            } elsif ($ace =~ /^g::(...)$/) {
                $mode |= ($permstrbits{$1} << 3);
            } elsif ($ace =~ /^o:(...)$/) {
                $mode |= ($permstrbits{$1} << 0);
            } else {
                $self->log(5, "warning: unknown ACE $ace ignored");
            }
        }
    }
    if ($f->{setuid}) { $mode |= 04000 }
    if ($f->{setgid}) { $mode |= 02000 }
    if ($f->{sticky}) { $mode |= 01000 }

    return $mode;
}

my %ucache;
sub name2uid {
    my ($self, $uname) = @_;
    $uname = $1 if $uname =~ /(.*)/; # detaint
    return $ucache{$uname} if (defined $ucache{$uname});
    if ($uname =~ /^\d+$/) {
        return $ucache{$uname} = $uname;
    } else {
        my $uid = getpwnam($uname);
        if (defined($uid)) {
            return $ucache{$uname} = $uid;
        } else {
            return $ucache{$uname} = $self->{unknown_uid};
        }
    }
}

my %gcache;
sub name2gid {
    my ($self, $gname) = @_;
    $gname = $1 if $gname =~ /(.*)/; # detaint
    return $gcache{$gname} if (defined $gcache{$gname});
    if ($gname =~ /^\d+$/) {
        return $gcache{$gname} = $gname;
    } else {
        my $gid = getgrnam($gname);
        if (defined($gid)) {
            return $gcache{$gname} = $gid;
        } else {
            return $gcache{$gname} = $self->{unknown_gid};
        }
    }
}

sub log {
    my ($self, $level, $msg) = @_;
    if ($level <= $self->{log_level}) {
        $self->{fh_log}->print(strftime("%Y-%m-%dT%H:%M:%S%z", localtime), " $$ [$level]: $msg\n") 
            or die "write to log failed: $!";
    }
}

sub log_level {
    my ($self, $log_level) = @_;
    $self->{log_level}  = $log_level if defined($log_level);
    return $self->{log_level};
}

sub db_record_version {
    my ($self, $target, $f) = @_;

    my $db_f = $self->{dbh}->selectall_arrayref("select * from files where fileset=? and path=?",
                                                { Slice => {} },
                                                $target->{id}, $f->{name});
    unless (@$db_f) {
        $self->{dbh}->do("insert into files(fileset, path) values(?, ?)",
                         {},
                         $target->{id}, $f->{name});
        $db_f = $self->{dbh}->selectall_arrayref("select * from files where fileset=? and path=?",
                                                    { Slice => {} },
                                                    $target->{id}, $f->{name});

    }
    if ($f->{t} eq 'f' && !defined($f->{checksum})) {
        # this must be a link to the previous version
        my $db_pv = $self->{dbh}->selectall_arrayref("select * from versions where file=? and session=?",
                                                     { Slice => {} },
                                                     $db_f->[0]{id},
                                                     $self->{last_backup_id});
        $f->{checksum} = $db_pv->[0]{checksum};
    }
    $self->{dbh}->do("insert into versions(file,
                                           file_id, file_type, file_size, file_mtime,
                                           file_owner, file_group, file_acl,
                                           file_unix_bits,
                                           file_rdev,
                                           date, checksum, online, file_linktarget,
                                           session)
                      values(?,
                             ?, ?, ?, ?,
                             ?, ?, ?,
                             ?,
                             ?,
                             ?, ?, ?, ?,
                             ?)",
                     {},
                     $db_f->[0]{id},
                     $f->{id}, $f->{t}, $f->{s}, $f->{m},
                     $f->{o}, $f->{g}, $f->{acl},
                     join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)),
                     $f->{rdev},
                     time(), $f->{checksum}, 1, $f->{lt},
                     $self->{session_id}
                    );
}

sub new_session {
    my ($self) = @_;
    $self->{dbh}->do("insert into sessions(start_date, prefix) values(?, ?)", {}, time(), $self->{this_backup});
    $self->{session_id} = $self->{dbh}->{'mysql_insertid'};
}

sub close_session {
    my ($self) = @_;
    $self->{dbh}->do("update sessions set end_date=? where id=?", {}, time(), $self->{session_id});
}

sub get_last_session_id {
    my ($self) = @_;
    my $sessions = $self->{dbh}->selectall_arrayref("select * from sessions where prefix=?",
                                                    { Slice => {} },
                                                    $self->{last_backup});
    die "$self->{last_backup} not a unique prefix" unless @$sessions == 1;
    return $sessions->[0]{id};
}

# vim: tw=0 expandtab
1;