[rt-users] Migration from MySQL to PostgreSQL - Pg datatypes for blobs?

Fri May 14 17:43:35 EDT 2010

Dear Users and Developers,
I'm facing a migration four instances RT 3.2.2 to RT 3.8.8 with a number
of customizations.
Back-end database is MySQL and I have prepared a script for loading
database dumps from old MySQL 3.23.58 to 5.0.51 (Debian Lenny)
and then upgrading RT schema (rt-setup-database).
A next script will migrate from MySQL to PostgreSQL.
The PostgreSQL database cluster is initialized with cs_CZ.UTF-8 locale.
A problem arises with a column Attachments.Content with a pg type text.

DBD::Pg::st execute failed: ERROR:  invalid byte sequence for encoding "UTF8": 0xed2066
HINT:  This error can also happen if the byte sequence does not match the encoding expected by the server, which is controlled by "client_encoding"...

I think the correct data-type should by bytea, but this data type has
a bit strange behavior. Nevertheless a data copy ends with success after
data-type change:

alter table attachments drop column content;
alter table attachments add column content bytea;

The problem with bytea is (at least in my script), that values needs
extra handling. Binding values to parameters of type bytea must by done
according manual page of DBD::Pg... (RT already uses bytea in the tables
session).

Other fields: ObjectCustomFieldValues.LargeContent, Attributes.Content?
Any suggestions?

Regards
-- 
Zito
-------------- next part --------------
#!/usr/bin/env perl
use strict;
use DBI;
use Data::Dumper;
use Getopt::Long;
use DBD::Pg qw(PG_BYTEA);

my ($verbose, $dryrun);

sub usage
{
    my ($exitcode) = @_;
    print STDERR <<EOF;
rt-mysql2pg [options]

Converts Request Tracker database from MySQL to PostgreSQL

Do a copy of every record in user tables from a source database to
a destination database. A destination schema must exists (same as source
schema), so inserts can be done without error. Tables in destination database
are deleted before doing a copy!

options:

    --src-dsn dsn	    perl DBI data source name (e.g. dbi:Pg:dbname=ca)
    --src-user user	    perl DBI user name
    --src-password pass	    perl DBI password

    --dst-dsn dsn	    perl DBI data source name (e.g. dbi:Pg:dbname=ca)
    --dst-user user	    perl DBI user name
    --dst-password pass	    perl DBI password

 -n --dry-run		    dry run (no db modifications)

 -v --verbose		    run verbosly (incremental)
 -h --help		    help usage

EOF
    exit($exitcode) if defined $exitcode;
}

sub user_tables
{
    my ($dbh) = @_;
    my $sth = $dbh->table_info(undef, undef, undef, 'TABLE');
    $sth->execute();
    my @user_tables;
    while ( my $r = $sth->fetchrow_arrayref() )
    {
	my ($table_cat, $table_schem, $table_name, $table_type, $remarks) = @$r;
	next unless $table_type eq 'TABLE';
	next if $dbh->{Driver}->{Name} eq 'Pg' && $table_schem ne 'public';
	push @user_tables, $table_name;
    }
    return \@user_tables;
}

sub user_seqs
{
    my ($dbh) = @_;
    return $dbh->selectcol_arrayref('SELECT sequence_name FROM information_schema.sequences');
}

sub column_info
{
    my ($dbh, $table) = @_;
    my $schema = $dbh->{Driver}->{Name} eq 'Pg' ? 'public' : undef;
    my $sth = $dbh->column_info(undef, $schema, $table, undef);
    $sth->execute();
    my (@coln, %colt);
    while ( my $r = $sth->fetchrow_arrayref() )
    {
	my ($table_cat, $table_schem, $table_name, $column_name, $data_type,
		$type_name, $column_size, $buffer_length, $decimal_digits,
		$num_prec_radix, $nullable, $remarks, $column_def,
		$sql_data_type, $sql_datetime_sub, $char_octet_length,
		$ordinal_position, $is_nullable, $type_name_and_size) = @$r;
	push @coln, lc($column_name);
	$colt{lc($column_name)} = {
		'data_type' => $data_type,
		'type_name' => lc($type_name),
		'type_name_and_size' => lc($type_name_and_size),
	};
    }
    return (\@coln, \%colt);
}

sub copy_table
{
    my ($table, $dbh_src, $dbh_dst) = @_;
    my ($scoln, $scolt) = column_info($dbh_src, $table);
    my ($dcoln, $dcolt) = column_info($dbh_dst, lc($table));
    my ($s_nrows) = $dbh_src->selectrow_array("SELECT count(*) FROM $table");
    my $ncols = @$dcoln;
    my $slist = join(',', sort keys %$scolt);
    my $dlist = join(',', sort keys %$dcolt);
    if ( $slist ne $dlist )
    {
	die qq|\nerror: columns of "$table" on source and destination differs!\n|
		.qq|src: $slist\ndst: $dlist\n|;
    }
    my $chunked = $scolt->{'id'}{'type_name'} eq 'int' ? 1 : 0;
    my ($id_min, $id_max) = (0, 1);
    my ($id_step) = 100;
    if ( $chunked )
    {
	($id_min, $id_max) = $dbh_src->selectrow_array("SELECT min(id), max(id) FROM $table");
    }
    my $c_sel = "SELECT " . join(',', @$dcoln) . " FROM $table";
    $c_sel .= " WHERE id >= ? AND id <= ?" if $chunked;
    $verbose < 2 || print "\n\texecuting on source:\n\t$c_sel\n";
    my @colattr = map {
	    $dcolt->{$_}{'type_name'} eq 'bytea' ? { pg_type => PG_BYTEA } : undef;
	} @$dcoln;
    my $c_ins = "INSERT INTO $table ("
	    . join(',', @$dcoln)
	    . ") VALUES ("
	    . ( join(',', ('?') x @$dcoln) )
	    . ")";
    my $sth_dst = $dbh_dst->prepare($c_ins);
    $verbose < 2 || print "\n\texecuting on dest:\n\t$c_ins\n\n";
    my $sth_src = $dbh_src->prepare($c_sel);
    my $n = 0;
    for(my $id_low = $id_min; $id_low <= $id_max; $id_low+=$id_step)
    {
	$sth_src->execute($chunked ? ($id_low, $id_low + $id_step -1) : ());
	while ( my $ar = $sth_src->fetchrow_arrayref() )
	{
	    $verbose < 3 || print Data::Dumper->Dump([$ar], [qw(values)]);
	    unless ( $dryrun ) {
		for(my $i = 0; $i < $ncols; $i++)
		{
		    $sth_dst->bind_param($i +1, $ar->[$i], $colattr[$i]);
		}
		$sth_dst->execute();
	    };
	    $n++;
	}
	if ( $verbose )
	{
	    my $msg = "; $n/$s_nrows rec";
	    print($msg, ("\b" x length($msg)));
	}
    }
    $n == $s_nrows
	    || die qq|error: $n rows copied, but source contains $s_nrows!\n|;
    return $n;
}

sub copy_everything
{
    my ($dbh_src, $dbh_dst) = @_;
    my @stabs = @{user_tables($dbh_src)};
    my @dtabs = @{user_tables($dbh_dst)};
    my @tabs = @stabs;
    foreach my $t (@dtabs)
    {
	push @tabs, $t unless grep(lc($_) eq lc($t), @stabs);
    }
    foreach my $table (@tabs)
    {
	if ( !grep(lc($_) eq lc($table), @dtabs) )
	{
	    warn qq|warn: skipping table "$table" not existing on destination\n|;
	    next;
	}
	if ( !grep(lc($_) eq lc($table), @stabs) )
	{
	    warn qq|warn: skipping table "$table" not existing on source\n|;
	    next;
	}
	$verbose && print "tab: $table: ";
	$dryrun || $dbh_dst->do("DELETE FROM $table");
	$verbose && print "del";
	my $n = copy_table($table, $dbh_src, $dbh_dst);
	$verbose && print "; $n records copied";
	$verbose && print "\n";
    }
    my $user_seqs = user_seqs($dbh_dst);
    foreach my $seq (@$user_seqs)
    {
	$verbose && print "seq: $seq: ";
	my $table = $seq;
	$table =~ s/_id_s(?:eq)?$//;
	$dbh_dst->do("SELECT setval('$seq', (SELECT max(id) FROM $table)+1)");
	$verbose && print "updated\n";
    }
}

MAIN: {
    $| = 1;
    my ($usage);
    my ($dbi_src_dsn, $dbi_src_user, $dbi_src_password);
    my ($dbi_dst_dsn, $dbi_dst_user, $dbi_dst_password);

    usage(1) unless Getopt::Long::GetOptions( 'h|help' => \$usage,
	    'v|verbose+' => \$verbose,
	    'n|dry-run' => \$dryrun,
	    'src-dsn=s' => \$dbi_src_dsn,
	    'src-user=s' => \$dbi_src_user,
	    'src-password=s' => \$dbi_src_password,
	    'dst-dsn=s' => \$dbi_dst_dsn,
	    'dst-user=s' => \$dbi_dst_user,
	    'dst-password=s' => \$dbi_dst_password);
    usage(0) if $usage;
    if ( @ARGV == 2 )
    {
	($dbi_src_dsn, $dbi_dst_dsn) = @ARGV;
    }
    elsif ( @ARGV )
    {
	usage(1);
    }
    die "destination DSN not set!\n" unless $dbi_dst_dsn ne '';
    die "source DSN not set!\n" unless $dbi_src_dsn ne '';
    my $dbh_dst = DBI->connect($dbi_dst_dsn, $dbi_dst_user, $dbi_dst_password,
			{
			    'RaiseError' => 1,
			    'AutoCommit' => 1,
			}
		    );
    my $dbh_src = DBI->connect($dbi_src_dsn, $dbi_src_user, $dbi_src_password,
			{
			    'RaiseError' => 1,
			    'AutoCommit' => 1,
			}
		    );
    $dbh_src->{'mysql_enable_utf8'} = 1;
    $dbh_src->do("SET NAMES utf8");
    copy_everything($dbh_src, $dbh_dst);
    $dbh_dst->disconnect();
    $dbh_src->disconnect();
}