#!/usr/bin/perl

=head1 NAME

convert_db_to_v2 - convert simba metadata database to v2 layout

=head1 DESCRIPTION

This script converts the simba metadata database from v1 (up to r62)
to v2 (somewhere after r63) layout, by splitting the versions table into
two new tables versions2 and instances.

=cut


use warnings;
use strict;

use Simba::CA;

my $ca = Simba::CA->new({
			    dbi_file => $ENV{SIMBA_DB_CONN} || "$ENV{HOME}/.dbi/simba",
			});

my $dbh = $ca->{dbh};

#  $dbh->{mysql_use_result} = 1;	# fetch row by row, not all at once

$dbh->do(q{
	    create table versions2 (
	      `id` int(11) NOT NULL auto_increment,
	      `file_type` char(1) default NULL,
	      `file_size` bigint(20) default NULL,
	      `file_mtime` int(11) default NULL,
	      `file_owner` varchar(255) default NULL,
	      `file_group` varchar(255) default NULL,
	      `file_acl` varchar(255) default NULL,
	      `file_unix_bits` set('setuid','setgid','sticky') default NULL,
	      `file_rdev` int(11) default NULL,
	      `checksum` varchar(255) default NULL,
	      `file_linktarget` text,
	      PRIMARY KEY  (`id`),
	      KEY `versions2_content_idx` (`checksum`, file_mtime)
	    ) ENGINE=MyISAM DEFAULT CHARSET=utf8;
          });

$dbh->do(q{
	    create table instances (
	      `id` int(11) NOT NULL auto_increment,
	      `file` int(11) default NULL,
	      `file_id` varchar(255) default NULL,
	      `date` int(11) default NULL,
	      `online` tinyint(1) default NULL,
	      `session` int(11) default NULL,
	      version int not null,
	      PRIMARY KEY  (`id`),
	      KEY `instances_session_file_idx` (`session`,`file`),
	      KEY `instances_file_session_idx` (file, `session`),
	      KEY instances_version_idx (version)
	    ) ENGINE=MyISAM DEFAULT CHARSET=utf8;
          });

# mysql doesn't support nested commands,
# so we need to read a chunk of data from versions,
# insert it into versions2 and instances,
# then read the next chunk ...

my $chunk_size = 1_000_000;
my $max_version_id = $dbh->selectrow_array("select max(id) from versions");

my $sth_select = $dbh->prepare("select * from versions where id >= ? and id < ?");

my @version2_fields = qw(
			  file_type
			  file_size
			  file_mtime
			  file_owner
			  file_group
			  file_acl
			  file_unix_bits
			  file_rdev
			  checksum
			  file_linktarget
                      );

my $sth_ins_versions2
    = $dbh->prepare(
	'insert into versions2( ' .
	join (",", @version2_fields) .
	") values (" .
	join(",", ("?") x @version2_fields) .
	")"
      );

my $sth_ins_instances
    = $dbh->prepare(
	q{
	    insert into instances(
	      id,
	      file,
	      file_id,
	      date,
	      online,
	      session,
	      version
	    )
	    values(?, ?, ?, ?, ?, ?, ?)
	 }
      );

my %versions2;
for (my $version_id = 0; $version_id <= $max_version_id; $version_id += $chunk_size) {
    print time - $^T, " ", $version_id, " ", scalar keys %versions2, "\n";
    $sth_select->execute($version_id, $version_id + $chunk_size);
    while(my $r = $sth_select->fetchrow_hashref) {
        my $key = join($;, map((defined $_ ? $_ : ''),
                               @{$r}{@version2_fields}
                              )
                      );
        my $version2_id;
        if ($versions2{$key}) {
            $version2_id = $versions2{$key};
        } else {
            $sth_ins_versions2->execute(@{$r}{@version2_fields});
            $version2_id = $sth_ins_versions2->{mysql_insertid};
            $versions2{$key} = $version2_id;
        }
        $sth_ins_instances->execute(@{$r}{qw(id file file_id date online session)}, $version2_id);
    }
}
print time - $^T, " ", $max_version_id, " ", scalar keys %versions2, "\n";


# vim: tw=132 expandtab sw=4 ts=8