135 lines
3.7 KiB
Perl
135 lines
3.7 KiB
Perl
#!/usr/bin/perl
|
|
|
|
=head1 NAME
|
|
|
|
convert_db_to_v2 - convert simba metadata database to v2 layout
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This script converts the simba metadata database from v1 (up to r62)
|
|
to v2 (somewhere after r63) layout, by splitting the versions table into
|
|
two new tables versions2 and instances.
|
|
|
|
=cut
|
|
|
|
|
|
use warnings;
|
|
use strict;
|
|
|
|
use Simba::CA;
|
|
|
|
my $ca = Simba::CA->new({
|
|
dbi_file => $ENV{SIMBA_DB_CONN} || "$ENV{HOME}/.dbi/simba",
|
|
});
|
|
|
|
my $dbh = $ca->{dbh};
|
|
|
|
# $dbh->{mysql_use_result} = 1; # fetch row by row, not all at once
|
|
|
|
$dbh->do(q{
|
|
create table versions2 (
|
|
`id` int(11) NOT NULL auto_increment,
|
|
`file_type` char(1) default NULL,
|
|
`file_size` bigint(20) default NULL,
|
|
`file_mtime` int(11) default NULL,
|
|
`file_owner` varchar(255) default NULL,
|
|
`file_group` varchar(255) default NULL,
|
|
`file_acl` varchar(255) default NULL,
|
|
`file_unix_bits` set('setuid','setgid','sticky') default NULL,
|
|
`file_rdev` int(11) default NULL,
|
|
`checksum` varchar(255) default NULL,
|
|
`file_linktarget` text,
|
|
PRIMARY KEY (`id`),
|
|
KEY `versions2_content_idx` (`checksum`, file_mtime)
|
|
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
|
|
});
|
|
|
|
$dbh->do(q{
|
|
create table instances (
|
|
`id` int(11) NOT NULL auto_increment,
|
|
`file` int(11) default NULL,
|
|
`file_id` varchar(255) default NULL,
|
|
`date` int(11) default NULL,
|
|
`online` tinyint(1) default NULL,
|
|
`session` int(11) default NULL,
|
|
version int not null,
|
|
PRIMARY KEY (`id`),
|
|
KEY `instances_session_file_idx` (`session`,`file`),
|
|
KEY `instances_file_session_idx` (file, `session`),
|
|
KEY instances_version_idx (version)
|
|
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
|
|
});
|
|
|
|
# mysql doesn't support nested commands,
|
|
# so we need to read a chunk of data from versions,
|
|
# insert it into versions2 and instances,
|
|
# then read the next chunk ...
|
|
|
|
my $chunk_size = 1_000_000;
|
|
my $max_version_id = $dbh->selectrow_array("select max(id) from versions");
|
|
|
|
my $sth_select = $dbh->prepare("select * from versions where id >= ? and id < ?");
|
|
|
|
my @version2_fields = qw(
|
|
file_type
|
|
file_size
|
|
file_mtime
|
|
file_owner
|
|
file_group
|
|
file_acl
|
|
file_unix_bits
|
|
file_rdev
|
|
checksum
|
|
file_linktarget
|
|
);
|
|
|
|
my $sth_ins_versions2
|
|
= $dbh->prepare(
|
|
'insert into versions2( ' .
|
|
join (",", @version2_fields) .
|
|
") values (" .
|
|
join(",", ("?") x @version2_fields) .
|
|
")"
|
|
);
|
|
|
|
my $sth_ins_instances
|
|
= $dbh->prepare(
|
|
q{
|
|
insert into instances(
|
|
id,
|
|
file,
|
|
file_id,
|
|
date,
|
|
online,
|
|
session,
|
|
version
|
|
)
|
|
values(?, ?, ?, ?, ?, ?, ?)
|
|
}
|
|
);
|
|
|
|
my %versions2;
|
|
for (my $version_id = 0; $version_id <= $max_version_id; $version_id += $chunk_size) {
|
|
print time - $^T, " ", $version_id, " ", scalar keys %versions2, "\n";
|
|
$sth_select->execute($version_id, $version_id + $chunk_size);
|
|
while(my $r = $sth_select->fetchrow_hashref) {
|
|
my $key = join($;, map((defined $_ ? $_ : ''),
|
|
@{$r}{@version2_fields}
|
|
)
|
|
);
|
|
my $version2_id;
|
|
if ($versions2{$key}) {
|
|
$version2_id = $versions2{$key};
|
|
} else {
|
|
$sth_ins_versions2->execute(@{$r}{@version2_fields});
|
|
$version2_id = $sth_ins_versions2->{mysql_insertid};
|
|
$versions2{$key} = $version2_id;
|
|
}
|
|
$sth_ins_instances->execute(@{$r}{qw(id file file_id date online session)}, $version2_id);
|
|
}
|
|
}
|
|
print time - $^T, " ", $max_version_id, " ", scalar keys %versions2, "\n";
|
|
|
|
|
|
# vim: tw=132 expandtab sw=4 ts=8
|