[rt-users] Migration from MySQL to PostgreSQL - Pg datatypes for blobs?
Václav Ovsík
vaclav.ovsik at i.cz
Fri May 14 17:43:35 EDT 2010
Dear Users and Developers,
I'm facing a migration four instances RT 3.2.2 to RT 3.8.8 with a number
of customizations.
Back-end database is MySQL and I have prepared a script for loading
database dumps from old MySQL 3.23.58 to 5.0.51 (Debian Lenny)
and then upgrading RT schema (rt-setup-database).
A next script will migrate from MySQL to PostgreSQL.
The PostgreSQL database cluster is initialized with cs_CZ.UTF-8 locale.
A problem arises with a column Attachments.Content with a pg type text.
DBD::Pg::st execute failed: ERROR: invalid byte sequence for encoding "UTF8": 0xed2066
HINT: This error can also happen if the byte sequence does not match the encoding expected by the server, which is controlled by "client_encoding"...
I think the correct data-type should by bytea, but this data type has
a bit strange behavior. Nevertheless a data copy ends with success after
data-type change:
alter table attachments drop column content;
alter table attachments add column content bytea;
The problem with bytea is (at least in my script), that values needs
extra handling. Binding values to parameters of type bytea must by done
according manual page of DBD::Pg... (RT already uses bytea in the tables
session).
Other fields: ObjectCustomFieldValues.LargeContent, Attributes.Content?
Any suggestions?
Regards
--
Zito
-------------- next part --------------
#!/usr/bin/env perl
use strict;
use DBI;
use Data::Dumper;
use Getopt::Long;
use DBD::Pg qw(PG_BYTEA);
my ($verbose, $dryrun);
sub usage
{
my ($exitcode) = @_;
print STDERR <<EOF;
rt-mysql2pg [options]
Converts Request Tracker database from MySQL to PostgreSQL
Do a copy of every record in user tables from a source database to
a destination database. A destination schema must exists (same as source
schema), so inserts can be done without error. Tables in destination database
are deleted before doing a copy!
options:
--src-dsn dsn perl DBI data source name (e.g. dbi:Pg:dbname=ca)
--src-user user perl DBI user name
--src-password pass perl DBI password
--dst-dsn dsn perl DBI data source name (e.g. dbi:Pg:dbname=ca)
--dst-user user perl DBI user name
--dst-password pass perl DBI password
-n --dry-run dry run (no db modifications)
-v --verbose run verbosly (incremental)
-h --help help usage
EOF
exit($exitcode) if defined $exitcode;
}
sub user_tables
{
my ($dbh) = @_;
my $sth = $dbh->table_info(undef, undef, undef, 'TABLE');
$sth->execute();
my @user_tables;
while ( my $r = $sth->fetchrow_arrayref() )
{
my ($table_cat, $table_schem, $table_name, $table_type, $remarks) = @$r;
next unless $table_type eq 'TABLE';
next if $dbh->{Driver}->{Name} eq 'Pg' && $table_schem ne 'public';
push @user_tables, $table_name;
}
return \@user_tables;
}
sub user_seqs
{
my ($dbh) = @_;
return $dbh->selectcol_arrayref('SELECT sequence_name FROM information_schema.sequences');
}
sub column_info
{
my ($dbh, $table) = @_;
my $schema = $dbh->{Driver}->{Name} eq 'Pg' ? 'public' : undef;
my $sth = $dbh->column_info(undef, $schema, $table, undef);
$sth->execute();
my (@coln, %colt);
while ( my $r = $sth->fetchrow_arrayref() )
{
my ($table_cat, $table_schem, $table_name, $column_name, $data_type,
$type_name, $column_size, $buffer_length, $decimal_digits,
$num_prec_radix, $nullable, $remarks, $column_def,
$sql_data_type, $sql_datetime_sub, $char_octet_length,
$ordinal_position, $is_nullable, $type_name_and_size) = @$r;
push @coln, lc($column_name);
$colt{lc($column_name)} = {
'data_type' => $data_type,
'type_name' => lc($type_name),
'type_name_and_size' => lc($type_name_and_size),
};
}
return (\@coln, \%colt);
}
sub copy_table
{
my ($table, $dbh_src, $dbh_dst) = @_;
my ($scoln, $scolt) = column_info($dbh_src, $table);
my ($dcoln, $dcolt) = column_info($dbh_dst, lc($table));
my ($s_nrows) = $dbh_src->selectrow_array("SELECT count(*) FROM $table");
my $ncols = @$dcoln;
my $slist = join(',', sort keys %$scolt);
my $dlist = join(',', sort keys %$dcolt);
if ( $slist ne $dlist )
{
die qq|\nerror: columns of "$table" on source and destination differs!\n|
.qq|src: $slist\ndst: $dlist\n|;
}
my $chunked = $scolt->{'id'}{'type_name'} eq 'int' ? 1 : 0;
my ($id_min, $id_max) = (0, 1);
my ($id_step) = 100;
if ( $chunked )
{
($id_min, $id_max) = $dbh_src->selectrow_array("SELECT min(id), max(id) FROM $table");
}
my $c_sel = "SELECT " . join(',', @$dcoln) . " FROM $table";
$c_sel .= " WHERE id >= ? AND id <= ?" if $chunked;
$verbose < 2 || print "\n\texecuting on source:\n\t$c_sel\n";
my @colattr = map {
$dcolt->{$_}{'type_name'} eq 'bytea' ? { pg_type => PG_BYTEA } : undef;
} @$dcoln;
my $c_ins = "INSERT INTO $table ("
. join(',', @$dcoln)
. ") VALUES ("
. ( join(',', ('?') x @$dcoln) )
. ")";
my $sth_dst = $dbh_dst->prepare($c_ins);
$verbose < 2 || print "\n\texecuting on dest:\n\t$c_ins\n\n";
my $sth_src = $dbh_src->prepare($c_sel);
my $n = 0;
for(my $id_low = $id_min; $id_low <= $id_max; $id_low+=$id_step)
{
$sth_src->execute($chunked ? ($id_low, $id_low + $id_step -1) : ());
while ( my $ar = $sth_src->fetchrow_arrayref() )
{
$verbose < 3 || print Data::Dumper->Dump([$ar], [qw(values)]);
unless ( $dryrun ) {
for(my $i = 0; $i < $ncols; $i++)
{
$sth_dst->bind_param($i +1, $ar->[$i], $colattr[$i]);
}
$sth_dst->execute();
};
$n++;
}
if ( $verbose )
{
my $msg = "; $n/$s_nrows rec";
print($msg, ("\b" x length($msg)));
}
}
$n == $s_nrows
|| die qq|error: $n rows copied, but source contains $s_nrows!\n|;
return $n;
}
sub copy_everything
{
my ($dbh_src, $dbh_dst) = @_;
my @stabs = @{user_tables($dbh_src)};
my @dtabs = @{user_tables($dbh_dst)};
my @tabs = @stabs;
foreach my $t (@dtabs)
{
push @tabs, $t unless grep(lc($_) eq lc($t), @stabs);
}
foreach my $table (@tabs)
{
if ( !grep(lc($_) eq lc($table), @dtabs) )
{
warn qq|warn: skipping table "$table" not existing on destination\n|;
next;
}
if ( !grep(lc($_) eq lc($table), @stabs) )
{
warn qq|warn: skipping table "$table" not existing on source\n|;
next;
}
$verbose && print "tab: $table: ";
$dryrun || $dbh_dst->do("DELETE FROM $table");
$verbose && print "del";
my $n = copy_table($table, $dbh_src, $dbh_dst);
$verbose && print "; $n records copied";
$verbose && print "\n";
}
my $user_seqs = user_seqs($dbh_dst);
foreach my $seq (@$user_seqs)
{
$verbose && print "seq: $seq: ";
my $table = $seq;
$table =~ s/_id_s(?:eq)?$//;
$dbh_dst->do("SELECT setval('$seq', (SELECT max(id) FROM $table)+1)");
$verbose && print "updated\n";
}
}
MAIN: {
$| = 1;
my ($usage);
my ($dbi_src_dsn, $dbi_src_user, $dbi_src_password);
my ($dbi_dst_dsn, $dbi_dst_user, $dbi_dst_password);
usage(1) unless Getopt::Long::GetOptions( 'h|help' => \$usage,
'v|verbose+' => \$verbose,
'n|dry-run' => \$dryrun,
'src-dsn=s' => \$dbi_src_dsn,
'src-user=s' => \$dbi_src_user,
'src-password=s' => \$dbi_src_password,
'dst-dsn=s' => \$dbi_dst_dsn,
'dst-user=s' => \$dbi_dst_user,
'dst-password=s' => \$dbi_dst_password);
usage(0) if $usage;
if ( @ARGV == 2 )
{
($dbi_src_dsn, $dbi_dst_dsn) = @ARGV;
}
elsif ( @ARGV )
{
usage(1);
}
die "destination DSN not set!\n" unless $dbi_dst_dsn ne '';
die "source DSN not set!\n" unless $dbi_src_dsn ne '';
my $dbh_dst = DBI->connect($dbi_dst_dsn, $dbi_dst_user, $dbi_dst_password,
{
'RaiseError' => 1,
'AutoCommit' => 1,
}
);
my $dbh_src = DBI->connect($dbi_src_dsn, $dbi_src_user, $dbi_src_password,
{
'RaiseError' => 1,
'AutoCommit' => 1,
}
);
$dbh_src->{'mysql_enable_utf8'} = 1;
$dbh_src->do("SET NAMES utf8");
copy_everything($dbh_src, $dbh_dst);
$dbh_dst->disconnect();
$dbh_src->disconnect();
}
More information about the rt-users
mailing list