[Rt-commit] rt branch 5.0/speed-up-importer created. rt-5.0.3-137-g32f8de14d5

BPS Git Server git at git.bestpractical.com
Thu Oct 27 21:21:19 UTC 2022


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "rt".

The branch, 5.0/speed-up-importer has been created
        at  32f8de14d50cb998860b1ba5ebb0ce01f0a62fbe (commit)

- Log -----------------------------------------------------------------
commit 32f8de14d50cb998860b1ba5ebb0ce01f0a62fbe
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Thu Oct 13 21:34:01 2022 +0800

    Add batch mode to importer for data serialized with --clone or --all
    
    By reducing SQL calls a lot, importer can run much faster now. Cloned
    data doesn't have dependencies to resolve, so we can also insert rows in
    parallel, which makes the speedup more noticable.

diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index 91036377b1..5c355e444d 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -74,6 +74,8 @@ sub Init {
         AutoCommit          => 1,
         BatchUserPrincipals  => 0,
         BatchGroupPrincipals => 0,
+        BatchSize            => 0,
+        MaxProcesses         => 10,
         @_,
     );
 
@@ -87,6 +89,8 @@ sub Init {
     $self->{AutoCommit} = $args{AutoCommit};
 
     $self->{$_} = $args{$_} for qw/BatchUserPrincipals BatchGroupPrincipals/;
+    $self->{BatchSize}    = $args{BatchSize};
+    $self->{MaxProcesses} = $args{MaxProcesses} || 10;
 
     $self->{HandleError} = sub { 0 };
     $self->{HandleError} = $args{HandleError}
@@ -166,6 +170,7 @@ sub InitStream {
                 Type  => 'FreeformSingle',
             );
         }
+        $self->{OriginalId} = $cf->Id;
     }
 
     if ( !$self->{Clone} ) {
@@ -221,6 +226,29 @@ sub NextPrincipalId {
     return $id;
 }
 
+sub NextId {
+    my $self  = shift;
+    my $class = shift;
+    my $id    = shift;
+
+    if ( $id ) {
+        $self->{_next_id}{$class} = $id;
+        return $id;
+    }
+
+    if ( defined $self->{_next_id}{$class} ) {
+        return $self->{_next_id}{$class}++;
+    }
+    return;
+}
+
+sub HasNextId {
+    my $self  = shift;
+    my $class = shift;
+
+    return defined $self->{_next_id}{$class} ? 1 : 0;
+}
+
 sub Resolve {
     my $self = shift;
     my ($uid, $class, $id) = @_;
@@ -235,27 +263,39 @@ sub Resolve {
 
     return unless $self->{Pending}{$uid};
 
+    my @left;
     for my $ref (@{$self->{Pending}{$uid}}) {
-        my ($pclass, $pid) = @{ $self->Lookup( $ref->{uid} ) };
-        my $obj = $pclass->new( RT->SystemUser );
-        $obj->LoadByCols( Id => $pid );
-        $obj->__Set(
-            Field => $ref->{column},
-            Value => $id,
-        ) if defined $ref->{column};
-        $obj->__Set(
-            Field => $ref->{classcolumn},
-            Value => $class,
-        ) if defined $ref->{classcolumn};
-        $obj->__Set(
-            Field => $ref->{uri},
-            Value => $self->LookupObj($uid)->URI,
-        ) if defined $ref->{uri};
-        if (my $method = $ref->{method}) {
-            $obj->$method($self, $ref, $class, $id);
+        if ( my $lookup = $self->Lookup( $ref->{uid} ) ) {
+            my ( $pclass, $pid ) = @{$lookup};
+            my $obj = $pclass->new( RT->SystemUser );
+            $obj->LoadByCols( Id => $pid );
+            $obj->__Set(
+                Field => $ref->{column},
+                Value => $id,
+            ) if defined $ref->{column};
+            $obj->__Set(
+                Field => $ref->{classcolumn},
+                Value => $class,
+            ) if defined $ref->{classcolumn};
+            $obj->__Set(
+                Field => $ref->{uri},
+                Value => $self->LookupObj($uid)->URI,
+            ) if defined $ref->{uri};
+            if ( my $method = $ref->{method} ) {
+                $obj->$method( $self, $ref, $class, $id );
+            }
+        }
+        else {
+            push @left, $ref;
         }
     }
-    delete $self->{Pending}{$uid};
+
+    if ( @left ) {
+        $self->{Pending}{$uid} = \@left;
+    }
+    else {
+        delete $self->{Pending}{$uid};
+    }
 }
 
 sub Lookup {
@@ -419,6 +459,23 @@ sub Create {
         }
     }
 
+    if ( $self->{BatchSize} && ( $self->{Clone} || $self->{All} ) ) {
+
+        # Finish up the previous class if there are any records left
+        my ($previous_class) = grep { $_ ne $class && @{ $self->{_batch}{$_} } } keys %{ $self->{_batch} || {} };
+        if ($previous_class) {
+            $self->BatchCreate( $previous_class, $self->{_batch}{$previous_class} );
+        }
+
+        if ( $data->{id} || $self->HasNextId($class) ) {
+            push @{ $self->{_batch}{$class} }, [ $uid, $data ];
+            if ( @{ $self->{_batch}{$class} } == $self->{BatchSize} ) {
+                $self->BatchCreate( $class, $self->{_batch}{$class} );
+            }
+            return;
+        }
+    }
+
     my ($id, $msg) = eval {
         # catch and rethrow on the outside so we can provide more info
         local $SIG{__DIE__};
@@ -436,6 +493,16 @@ sub Create {
         }
     }
 
+    return $obj if $self->{Clone};
+
+    # Users/Groups have id set in PreInflate, no need to set here
+    if (   $self->{BatchSize}
+        && $self->{All}
+        && $class =~ /^RT::(?:Ticket|Transaction|Attachment|GroupMember|ObjectCustomFieldValue|Attribute|Link)$/ )
+    {
+        $self->NextId( $class, $id + 1 );
+    }
+
     $self->{ObjectCount}{$class}++;
     $self->Resolve( $uid => $class, $id );
 
@@ -513,18 +580,7 @@ sub ReadStream {
     # If it's a ticket, we might need to create a
     # TicketCustomField for the previous ID
     if ($class eq "RT::Ticket" and $self->{OriginalId}) {
-        my $value = $self->{ExcludeOrganization}
-                  ? $origid
-                  : $self->Organization . ":$origid";
-
-        $obj->Load( $self->Lookup($uid)->[1] );
-        my ($id, $msg) = $obj->AddCustomFieldValue(
-            Field             => $self->{OriginalId},
-            Value             => $value,
-            RecordTransaction => 0,
-        );
-        warn "Failed to add custom field to $uid: $msg"
-            unless $id;
+        $self->CreateOriginalIdOCFVs( { $self->Lookup($uid)->[1] => $origid } );
     }
 
     # If it's a CF, we don't know yet if it's global (the OCF
@@ -542,6 +598,33 @@ sub CloseStream {
 
     $self->{Progress}->(undef, 'force') if $self->{Progress};
 
+    my %order = (
+        'RT::Ticket'                 => 1,
+        'RT::Group'                  => 2,
+        'RT::GroupMember'            => 3,
+        'RT::ObjectCustomFieldValue' => 4,
+        'RT::Transaction'            => 5,
+        'RT::Attachment'             => 6,
+    );
+
+    for my $class (
+        sort { ( $order{$a} // 0 ) <=> ( $order{$b} // 0 ) }
+        grep { @{ $self->{_batch}{$_} } } keys %{ $self->{_batch} || {} }
+        )
+    {
+        $self->BatchCreate( $class, $self->{_batch}{$class} );
+    }
+
+    $self->{_pm}->wait_all_children if $self->{_pm};
+
+    # Now have all data imported, try to resolve again.
+    my @uids = grep { $self->{UIDs}{$_} } keys %{ $self->{Pending} };
+
+    for my $uid (@uids) {
+        my ( $class, $id ) = split /-/, $self->{UIDs}{$uid}, 2;
+        $self->Resolve( $uid, $class, $id );
+    }
+
     # Fill CGM
 
     # Groups
@@ -644,6 +727,144 @@ sub Progress {
     return $self->{Progress} = $_[0];
 }
 
+sub BatchCreate {
+    my $self  = shift;
+    my $class = shift;
+    my $items = shift or return;
+
+    return unless @$items;
+
+    if ( $self->{Clone} ) {
+
+        if ( !$self->{_pm} ) {
+            require Parallel::ForkManager;
+            $self->{_pm} = Parallel::ForkManager->new( $self->{MaxProcesses} );
+        }
+
+        $self->{ObjectCount}{$class} += @$items;
+        my @copy = @$items;
+        @$items = ();
+
+        $RT::Handle->Commit unless $self->{AutoCommit};
+        $RT::Handle->Disconnect;
+
+        if ( $self->{_pm}->start ) {
+            $RT::Handle->Connect;
+            $RT::Handle->BeginTransaction unless $self->{AutoCommit};
+            return 1;
+        }
+
+        $RT::Handle->Connect;
+        $self->_BatchCreate( $class, \@copy );
+        $self->{_pm}->finish;
+    }
+    else {
+        # In case there are duplicates, which could happen for merged members.
+        if ( $class eq 'RT::GroupMember' ) {
+            my %added;
+            @$items = grep { !$added{ $_->[1]{GroupId} }{ $_->[1]{MemberId} }++ } @$items;
+        }
+
+        my $map = $self->_BatchCreate( $class, $items );
+        $self->{ObjectCount}{$class} += @$items;
+        @$items = ();
+
+        if ($map) {
+            $self->{UIDs}{$_} = $map->{$_} for keys %$map;
+
+            my %ticket_map;
+            for my $uid ( keys %$map ) {
+                my ( $class, $id ) = split /-/, $map->{$uid}, 2;
+
+                $self->Resolve( $uid => $class, $id );
+                if ( $class eq 'RT::User' && $uid =~ /-RT_System$/ ) {
+                    RT->InitSystemObjects;
+                }
+                elsif ( $class eq 'RT::Attribute' ) {
+                    my $attr = RT::Attribute->new( RT->SystemUser );
+                    $attr->Load($id);
+                    $attr->PostInflate( $self, $uid );
+                }
+                elsif ( $self->{OriginalId} && $class eq 'RT::Ticket' ) {
+                    my ($orig_id) = ( $uid =~ /-(\d+)$/ );
+                    $ticket_map{$id} = $orig_id;
+                }
+            }
+
+            $self->CreateOriginalIdOCFVs( \%ticket_map ) if %ticket_map;
+        }
+        return 1;
+    }
+}
+
+sub _BatchCreate {
+    my $self  = shift;
+    my $class = shift;
+    my $items = shift or return;
+    return unless @$items;
+
+    my %map;
+
+    # Do not actually insert, just get the SQL, with sorted field/value pairs
+    local *RT::Handle::Insert = sub {
+        my $self  = shift;
+        my $table = shift;
+        my %attr  = @_;
+        return $self->InsertQueryString( $table, map { $_ => $attr{$_} } sort keys %attr );
+    };
+
+    my %query;
+    for (@$items) {
+        my ( $uid, $data ) = @$_;
+        my $obj = $class->new( RT->SystemUser );
+
+        my $id = $data->{id} || $self->NextId($class);
+        my ( $sql, @bind ) = $obj->DBIx::SearchBuilder::Record::Create( %$data, id => $id );
+        $map{$uid} = join '-', $class, $id unless $self->{Clone};
+        push @{ $query{$sql} }, \@bind;
+    }
+
+    my $dbh = $RT::Handle->dbh;
+
+    for my $sql ( keys %query ) {
+
+        my $count = @{ $query{$sql} };
+        my $values_paren;
+        if ( $sql =~ /(\(\?.+?\))/i ) {
+            $values_paren = $1;
+        }
+
+        # DBs have placeholder limitations(64k for Pg), here we replace
+        # placeholders to support bigger batch sizes. The performance is similar.
+        my $batch_sql
+            = $RT::Handle->FillIn( $sql . ( ", $values_paren" x ( $count - 1 ) ), [ map @$_, @{ $query{$sql} } ] );
+        $self->RunSQL($batch_sql);
+    }
+
+    # Clone doesn't need to return anything
+    return $self->{Clone} ? () : \%map;
+}
+
+sub CreateOriginalIdOCFVs {
+    my $self = shift;
+    my $ticket_map = shift;
+    if ( %$ticket_map ) {
+        my $sql
+            = 'INSERT INTO ObjectCustomFieldValues (CustomField, ObjectType, ObjectId, Content, Creator) VALUES ';
+        my @values;
+        if ( !$self->{ExcludeOrganization} ) {
+            $ticket_map->{$_} = "$self->{Organization}:$ticket_map->{$_}" for keys %$ticket_map;
+        }
+
+        my $creator = RT->SystemUser->Id;
+        for my $id ( sort { $a <=> $b } keys %$ticket_map ) {
+            push @values, qq{($self->{OriginalId}, 'RT::Ticket', $id, '$ticket_map->{$id}', $creator)};
+        }
+        $sql .= join ',', @values;
+        $self->RunSQL($sql);
+    }
+}
+
 sub RunSQL {
     my $self = shift;
     my $rv;
diff --git a/sbin/rt-importer.in b/sbin/rt-importer.in
index bd9b9ae587..2c8e56c039 100644
--- a/sbin/rt-importer.in
+++ b/sbin/rt-importer.in
@@ -104,6 +104,8 @@ GetOptions(
 
     "batch-user-principals=i",
     "batch-group-principals=i",
+    "batch-size=i",
+    "max-processes=i",
 
     "dump=s@",
 ) or Pod::Usage::pod2usage();
@@ -168,6 +170,8 @@ my $import = RT::Migrate::Importer::File->new(
     AutoCommit          => $OPT{'auto-commit'},
     BatchUserPrincipals => $OPT{'batch-user-principals'},
     BatchGroupPrincipals => $OPT{'batch-group-principals'},
+    BatchSize           => $OPT{'batch-size'},
+    MaxProcesses        => $OPT{'max-processes'} || 10,
     HandleError         => $error_handler,
 );
 
@@ -317,6 +321,18 @@ The number of user/group principals to create in batch beforehand. Default is 0.
 This is to improve performance for not-cloned serialized data of big instances,
 usually you don't need to specify this.
 
+=item B<--batch-size> I<BATCH_SIZE>
+
+Create objects in batch. Default is 0, meaning batch processing is not
+enabled. This is for data serialized with C<--clone> or C<--all>. For cloned
+serialized data, each batch processing will also take place in a separate
+child process.
+
+=item B<--max-processes> I<MAX_PROCESSES>
+
+The number of max allowed child processes for batch processing. Default is
+10. This is for cloned serialized data only.
+
 =back
 
 

commit 88076841a7a95089b09b44bf19404a58576664c6
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Sat Oct 22 20:56:41 2022 +0800

    Avoid duplicates of postponed id resolution
    
    This could happen when a dashboard contains duplicated saved searches.

diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index c486b5014d..91036377b1 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -298,6 +298,8 @@ sub Postpone {
         uri         => undef,
         @_,
     );
+    return if $self->{_postponed}{ join ';', map { $_ . ':' . ( $args{$_} // '' ) } sort keys %args }++;
+
     my $uid = delete $args{for};
 
     if (defined $uid) {

commit 72f92fbd2df2ac5a515ed44b6f367bdc025621c3
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Oct 21 18:56:27 2022 +0800

    Skip rights check on Attachment access for system user
    
    This is initially for serialization performance, so we don't need to
    query Transactions table to get Attachment field values.

diff --git a/lib/RT/Attachment.pm b/lib/RT/Attachment.pm
index 8b6dc5ea0f..c3b9681d8e 100644
--- a/lib/RT/Attachment.pm
+++ b/lib/RT/Attachment.pm
@@ -1070,6 +1070,8 @@ Returns its value as a string, if the user passes an ACL check
 
 sub _Value {
     my $self  = shift;
+    return $self->__Value(@_) if $self->CurrentUser->Id == RT->SystemUser->Id;
+
     my $field = shift;
 
     #if the field is public, return it.
diff --git a/lib/RT/Attachments.pm b/lib/RT/Attachments.pm
index 0c005c2d43..e839bc172f 100644
--- a/lib/RT/Attachments.pm
+++ b/lib/RT/Attachments.pm
@@ -248,7 +248,7 @@ sub AddRecord {
     my $self = shift;
     my ($record) = @_;
 
-    return unless $record->TransactionObj->CurrentUserCanSee;
+    return unless $self->CurrentUser->Id == RT->SystemUser->Id || $record->TransactionObj->CurrentUserCanSee;
     return $self->SUPER::AddRecord( $record );
 }
 

commit 92fd3e52ec8f05d200bfa0377e8bde51fbae4ac2
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Oct 21 18:52:01 2022 +0800

    Skip rights check on ACE access for system user
    
    This is initially for serialization performance, so we don't need to
    query ACL and CGM tables to get ACE field values.

diff --git a/lib/RT/ACE.pm b/lib/RT/ACE.pm
index 07571c8552..cbdb3296fb 100644
--- a/lib/RT/ACE.pm
+++ b/lib/RT/ACE.pm
@@ -545,6 +545,7 @@ sub _Set {
 
 sub _Value {
     my $self = shift;
+    return $self->__Value(@_) if $self->CurrentUser->Id == RT->SystemUser->Id;
 
     if ( $self->PrincipalObj->IsGroup
             && $self->PrincipalObj->Object->HasMemberRecursively(

commit 55d94e8a9011f9cd7c01f288b74679a04b82723f
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Oct 21 16:19:05 2022 +0800

    Cache various objects for records
    
    This covers the following methods:
    
        RT::ACE::Object
        RT::ACE::PrincipalObj
        RT::Attribute::Object
        RT::Group::PrincipalObj
        RT::Transaction::Object
        RT::ObjectCustomFieldValue::Object
        RT::ObjectCustomFieldValue::CustomFieldObj
    
    The returned objects usually do not change.

diff --git a/lib/RT/ACE.pm b/lib/RT/ACE.pm
index 86bd562655..07571c8552 100644
--- a/lib/RT/ACE.pm
+++ b/lib/RT/ACE.pm
@@ -492,16 +492,17 @@ If the user has no rights, returns undef.
 sub Object {
     my $self = shift;
 
-    my $appliesto_obj;
+    return $self->{_cached}{Object} if $self->{_cached}{Object};
 
-    if ($self->__Value('ObjectType') && $self->__Value('ObjectType')->DOES('RT::Record::Role::Rights') ) {
-        $appliesto_obj =  $self->__Value('ObjectType')->new($self->CurrentUser);
-        unless (ref( $appliesto_obj) eq $self->__Value('ObjectType')) {
+    if ( $self->__Value('ObjectType') && $self->__Value('ObjectType')->DOES('RT::Record::Role::Rights') ) {
+        my $appliesto_obj = $self->__Value('ObjectType')->new( $self->CurrentUser );
+        unless ( ref($appliesto_obj) eq $self->__Value('ObjectType') ) {
             return undef;
         }
         $appliesto_obj->Load( $self->__Value('ObjectId') );
-        return ($appliesto_obj);
-     }
+        $self->{_cached}{Object} = $appliesto_obj;
+        return $self->{_cached}{Object};
+    }
     else {
         $RT::Logger->warning( "$self -> Object called for an object "
                               . "of an unknown type:"
@@ -521,15 +522,15 @@ Returns the RT::Principal object for this ACE.
 sub PrincipalObj {
     my $self = shift;
 
-    my $princ_obj = RT::Principal->new( $self->CurrentUser );
-    $princ_obj->Load( $self->__Value('PrincipalId') );
+    unless ( $self->{_cached}{PrincipalObj} ) {
+        $self->{_cached}{PrincipalObj} = RT::Principal->new( $self->CurrentUser );
+        $self->{_cached}{PrincipalObj}->Load( $self->__Value('PrincipalId') );
 
-    unless ( $princ_obj->Id ) {
-        $RT::Logger->err(
-                   "ACE " . $self->Id . " couldn't load its principal object" );
+        unless ( $self->{_cached}{PrincipalObj}->Id ) {
+            $RT::Logger->err( "ACE " . $self->Id . " couldn't load its principal object" );
+        }
     }
-    return ($princ_obj);
-
+    return $self->{_cached}{PrincipalObj};
 }
 
 
diff --git a/lib/RT/Attribute.pm b/lib/RT/Attribute.pm
index 4e0201b542..20bbea2e3d 100644
--- a/lib/RT/Attribute.pm
+++ b/lib/RT/Attribute.pm
@@ -390,17 +390,18 @@ sub SetSubValues {
 
 sub Object {
     my $self = shift;
-    my $object_type = $self->__Value('ObjectType');
-    my $object;
-    eval { $object = $object_type->new($self->CurrentUser) };
-    unless(UNIVERSAL::isa($object, $object_type)) {
-        $RT::Logger->error("Attribute ".$self->Id." has a bogus object type - $object_type (".$@.")");
-        return(undef);
-     }
-    $object->Load($self->__Value('ObjectId'));
-
-    return($object);
-
+    unless ( $self->{_cached}{Object} ) {
+        my $object_type = $self->__Value('ObjectType');
+        my $object;
+        eval { $object = $object_type->new( $self->CurrentUser ) };
+        unless ( UNIVERSAL::isa( $object, $object_type ) ) {
+            $RT::Logger->error( "Attribute " . $self->Id . " has a bogus object type - $object_type (" . $@ . ")" );
+            return (undef);
+        }
+        $object->Load( $self->__Value('ObjectId') );
+        $self->{_cached}{Object} = $object;
+    }
+    return $self->{_cached}{Object};
 }
 
 
diff --git a/lib/RT/Group.pm b/lib/RT/Group.pm
index f466235830..9f104607c3 100644
--- a/lib/RT/Group.pm
+++ b/lib/RT/Group.pm
@@ -1335,9 +1335,16 @@ The response is cached. PrincipalObj should never ever change.
 
 sub PrincipalObj {
     my $self = shift;
-    my $res = RT::Principal->new( $self->CurrentUser );
-    $res->Load( $self->id );
-    return $res;
+    unless ( $self->{_cached}{PrincipalObj} && $self->{_cached}{PrincipalObj}->Id ) {
+        $self->{_cached}{PrincipalObj} = RT::Principal->new( $self->CurrentUser );
+        if ( $self->Id ) {
+            my ( $ret, $msg ) = $self->{_cached}{PrincipalObj}->Load( $self->id );
+            if ( !$ret ) {
+                RT->Logger->warning( "Couldn't load principal #" . $self->id . ": $msg" );
+            }
+        }
+    }
+    return $self->{_cached}{PrincipalObj};
 }
 
 
diff --git a/lib/RT/ObjectCustomFieldValue.pm b/lib/RT/ObjectCustomFieldValue.pm
index e28c33765c..116f74bcf4 100644
--- a/lib/RT/ObjectCustomFieldValue.pm
+++ b/lib/RT/ObjectCustomFieldValue.pm
@@ -219,10 +219,12 @@ Returns the CustomField Object which has the id returned by CustomField
 
 sub CustomFieldObj {
     my $self = shift;
-    my $CustomField = RT::CustomField->new( $self->CurrentUser );
-    $CustomField->SetContextObject( $self->Object );
-    $CustomField->Load( $self->__Value('CustomField') );
-    return $CustomField;
+    unless ( $self->{_cached}{CustomFieldObj} ) {
+        $self->{_cached}{CustomFieldObj} = RT::CustomField->new( $self->CurrentUser );
+        $self->{_cached}{CustomFieldObj}->SetContextObject( $self->Object );
+        $self->{_cached}{CustomFieldObj}->Load( $self->__Value('CustomField') );
+    }
+    return $self->{_cached}{CustomFieldObj};
 }
 
 
@@ -298,9 +300,11 @@ Returns the object this value applies to
 
 sub Object {
     my $self  = shift;
-    my $Object = $self->__Value('ObjectType')->new( $self->CurrentUser );
-    $Object->LoadById( $self->__Value('ObjectId') );
-    return $Object;
+    unless ( $self->{_cached}{Object} ) {
+        $self->{_cached}{Object} = $self->__Value('ObjectType')->new( $self->CurrentUser );
+        $self->{_cached}{Object}->LoadById( $self->__Value('ObjectId') );
+    }
+    return $self->{_cached}{Object};
 }
 
 
diff --git a/lib/RT/Record.pm b/lib/RT/Record.pm
index fc2bfa48eb..03d84b5922 100644
--- a/lib/RT/Record.pm
+++ b/lib/RT/Record.pm
@@ -373,6 +373,7 @@ sub LoadByCols {
     # We don't want to hang onto this
     $self->ClearAttributes;
     delete $self->{_Roles};
+    delete $self->{_cached};
 
     unless ( $self->_Handle->CaseSensitive ) {
         my ( $ret, $msg ) = $self->SUPER::LoadByCols( @_ );
diff --git a/lib/RT/Transaction.pm b/lib/RT/Transaction.pm
index b7c5e89405..438a9f36c5 100644
--- a/lib/RT/Transaction.pm
+++ b/lib/RT/Transaction.pm
@@ -1569,9 +1569,11 @@ sub NewValue {
 
 sub Object {
     my $self  = shift;
-    my $Object = $self->__Value('ObjectType')->new($self->CurrentUser);
-    $Object->Load($self->__Value('ObjectId'));
-    return $Object;
+    unless ( $self->{_cached}{Object} ) {
+        $self->{_cached}{Object} = $self->__Value('ObjectType')->new( $self->CurrentUser );
+        $self->{_cached}{Object}->LoadById( $self->__Value('ObjectId') );
+    }
+    return $self->{_cached}{Object};
 }
 
 =head2 NewReferenceObject
diff --git a/share/html/Search/Elements/EditSearches b/share/html/Search/Elements/EditSearches
index 6871e702bb..a959bd9d29 100644
--- a/share/html/Search/Elements/EditSearches
+++ b/share/html/Search/Elements/EditSearches
@@ -381,6 +381,7 @@ if ( $obj && $obj->id ) {
                 push @results, loc( 'Unable to set privacy id: [_1]', $msg )
                   unless ($val);
             }
+            $obj->Load( $obj->Id );
         }
         else {
             # two loc are just for convenience so we don't need to
diff --git a/t/rest2/group-members.t b/t/rest2/group-members.t
index 842d595bf6..f9c59ee000 100644
--- a/t/rest2/group-members.t
+++ b/t/rest2/group-members.t
@@ -106,6 +106,7 @@ $group2->Load($group2_id);
     is($res->code, 200, 'Enable group with AdminGroup & SeeGroup rights');
     is_deeply($mech->json_response, ['Group enabled']);
 
+    $group2->Load( $group2->Id );
     is($group2->Disabled, 0, "Group enabled");
 }
 

commit 8b6d57aa195e9ca7b55fc4001a3f7ae34a062014
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Oct 21 16:21:38 2022 +0800

    Skip rights checks for serializer/importer
    
    This commit short-circuits CurrentUserCanSee and CurrentUserHasRight
    accordingly.

diff --git a/sbin/rt-importer.in b/sbin/rt-importer.in
index 882b68b097..bd9b9ae587 100644
--- a/sbin/rt-importer.in
+++ b/sbin/rt-importer.in
@@ -117,6 +117,18 @@ die "No such directory $dir\n" unless -d $dir;
 die "$dir doesn't appear to contain serialized data\n"
     unless -f "$dir/001.dat";
 
+for my $class ( grep m{^RT/\w+\.pm$}, keys %INC ) {
+    no warnings 'redefine';
+    no strict 'refs';
+    $class =~ s!\/!::!;
+    $class =~ s!\.pm$!!;
+    for my $method (qw/CurrentUserCanSee CurrentUserHasRight/) {
+        next unless $class->can($method);
+        my $full_name = "${class}::$method";
+        *$full_name = sub {1};
+    }
+}
+
 if ($OPT{dump}) {
     die "Dumping objects only works in conjunction with --list\n"
         unless $OPT{list};
diff --git a/sbin/rt-serializer.in b/sbin/rt-serializer.in
index 482b2c542d..0322449800 100644
--- a/sbin/rt-serializer.in
+++ b/sbin/rt-serializer.in
@@ -78,6 +78,18 @@ use RT;
 RT::LoadConfig();
 RT::Init();
 
+for my $class ( grep m{^RT/\w+\.pm$}, keys %INC ) {
+    no warnings 'redefine';
+    no strict 'refs';
+    $class =~ s!\/!::!;
+    $class =~ s!\.pm$!!;
+    for my $method (qw/CurrentUserCanSee CurrentUserHasRight/) {
+        next unless $class->can($method);
+        my $full_name = "${class}::$method";
+        *$full_name = sub {1};
+    }
+}
+
 @RT::Record::ISA = qw( DBIx::SearchBuilder::Record RT::Base );
 
 use RT::Migrate;

commit f82adc3bf13fbd509937353f9c712c7aee77eeb0
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Oct 21 04:51:56 2022 +0800

    Tweak UID generation code and also cache user UID for performance

diff --git a/lib/RT/Attribute.pm b/lib/RT/Attribute.pm
index d8d2db1434..4e0201b542 100644
--- a/lib/RT/Attribute.pm
+++ b/lib/RT/Attribute.pm
@@ -995,18 +995,13 @@ sub Serialize {
         my $content = $self->_DeserializeContent($store{Content});
         for my $pane (values %{ $content || {} }) {
             for (@$pane) {
-                my $attr = RT::Attribute->new($self->CurrentUser);
-                $attr->LoadById($_);
-                $_ = \($attr->UID);
+                $_ = \( join '-', 'RT::Attribute', $RT::Organization, $_ );
             }
         }
         $store{Content} = $self->_SerializeContent($content);
     }
     elsif ( $store{Name} =~ /DefaultDashboard$/ ) {
-        my $content = $store{Content};
-        my $attr    = RT::Attribute->new( $self->CurrentUser );
-        $attr->LoadById($content);
-        $store{Content} = \$attr->UID;
+        $store{Content} = \( join '-', 'RT::Attribute', $RT::Organization, $store{Content} );
     }
     # encode saved searches and dashboards to be UIDs
     elsif ($store{Name} eq 'Dashboard') {
@@ -1014,9 +1009,7 @@ sub Serialize {
         for my $pane (values %{ $content->{Panes} || {} }) {
             for (@$pane) {
                 if ($_->{portlet_type} eq 'search' || $_->{portlet_type} eq 'dashboard') {
-                    my $attr = RT::Attribute->new($self->CurrentUser);
-                    $attr->LoadById($_->{id});
-                    $_->{uid} = \($attr->UID);
+                    $_->{uid} = \( join '-', 'RT::Attribute', $RT::Organization, $_->{id} );
                 }
                 # pass through everything else (e.g. component)
             }
@@ -1026,9 +1019,7 @@ sub Serialize {
     # encode subscriptions to have dashboard UID
     elsif ($store{Name} eq 'Subscription') {
         my $content = $self->_DeserializeContent($store{Content});
-        my $attr = RT::Attribute->new($self->CurrentUser);
-        $attr->LoadById($content->{DashboardId});
-        $content->{DashboardId} = \($attr->UID);
+        $content->{DashboardId} = \( join '-', 'RT::Attribute', $RT::Organization, $content->{DashboardId} );
         $store{Content} = $self->_SerializeContent($content);
     }
 
diff --git a/lib/RT/Group.pm b/lib/RT/Group.pm
index 308f05955e..f466235830 100644
--- a/lib/RT/Group.pm
+++ b/lib/RT/Group.pm
@@ -1355,6 +1355,15 @@ sub PrincipalId {
 sub InstanceObj {
     my $self = shift;
 
+    my $class = $self->InstanceClass or return;
+
+    my $obj = $class->new( $self->CurrentUser );
+    $obj->Load( $self->Instance );
+    return $obj;
+}
+
+sub InstanceClass {
+    my $self = shift;
     my $class;
     if ( $self->Domain eq 'ACLEquivalence' ) {
         $class = "RT::User";
@@ -1365,12 +1374,7 @@ sub InstanceObj {
     } elsif ($self->Domain eq 'RT::Asset-Role') {
         $class = "RT::Asset";
     }
-
-    return unless $class;
-
-    my $obj = $class->new( $self->CurrentUser );
-    $obj->Load( $self->Instance );
-    return $obj;
+    return $class;
 }
 
 sub BasicColumns {
@@ -1670,8 +1674,18 @@ sub Serialize {
     my %args = (@_);
     my %store = $self->SUPER::Serialize(@_);
 
-    my $instance = $self->InstanceObj;
-    $store{Instance} = \($instance->UID) if $instance;
+    if ( my $class = $self->InstanceClass ) {
+        if ( $class->can('UID') eq RT::Record->can('UID') ) {
+            $store{Instance} = \( join '-', $class, $RT::Organization, $store{Instance} );
+        }
+        elsif ( $class eq 'RT::User' ) {
+            $store{Instance} = \$args{serializer}{_uid}{user}{ $store{Instance} };
+        }
+        else {
+            my $instance = $self->InstanceObj;
+            $store{Instance} = \($instance->UID);
+        }
+    }
 
     $store{Disabled} = $self->PrincipalObj->Disabled;
     $store{Principal} = $self->PrincipalObj->UID;
diff --git a/lib/RT/Record.pm b/lib/RT/Record.pm
index 13691bf1ad..fc2bfa48eb 100644
--- a/lib/RT/Record.pm
+++ b/lib/RT/Record.pm
@@ -69,6 +69,7 @@ use warnings;
 use RT;
 use base RT->Config->Get('RecordBaseClass');
 use base 'RT::Base';
+use v5.10;
 
 require RT::Date;
 require RT::User;
@@ -2837,8 +2838,19 @@ sub Serialize {
     }
     return %store unless $args{UIDs};
 
+    state %simple_uid_class;
+
     # Use FooObj to turn Foo into a reference to the UID
     for my $col ( grep {$store{$_}} @cols ) {
+        next if $col =~ /^(?:Created|LastUpdated)$/;;
+
+        if ( $self->isa('RT::Ticket') ) {
+            next if $col =~ /^(?:Due|Resolved|Starts|Started|Told)$/;
+        }
+        elsif ( $self->isa('RT::Group') ) {
+            next if $col eq 'Instance';
+        }
+
         my $method = $methods{$col};
         if (not $method) {
             $method = $col;
@@ -2846,15 +2858,44 @@ sub Serialize {
         }
         next unless $self->can($method);
 
-        my $obj = $self->$method;
-        next unless $obj and $obj->isa("RT::Record");
-        $store{$col} = \($obj->UID);
+        my $uid;
+
+        my $value = $self->$col;
+        if ( $simple_uid_class{ ref $self }{$col} && $value =~ /^\d+$/ && $value > 0 ) {
+
+            # UID is based on id, so we can generate UID directly
+            $uid = join '-', $simple_uid_class{ ref $self }{$col}, $RT::Organization, $value;
+        }
+        elsif ( $method =~ /^(?:Creator|LastUpdatedBy)Obj$/ || ( $self->isa('RT::Ticket') && $method eq 'OwnerObj' ) ) {
+            $uid = $args{serializer}{_uid}{user}{$value} if $value;
+        }
+
+        if (!$uid) {
+            my $obj = $self->$method;
+            next unless $obj and $obj->isa("RT::Record");
+            $uid = $obj->UID;
+
+            if ( $obj->can('UID') eq RT::Record->can('UID') ) {
+                # Group Instance column points to various classes.
+                $simple_uid_class{ ref $self }{$col} = ref $obj;
+            }
+        }
+
+        $store{$col} = \$uid if $uid;
     }
 
     # Anything on an object should get the UID stored instead
     if ($store{ObjectType} and $store{ObjectId} and $self->can("Object")) {
+        if ( $store{ObjectType}->can('UID') eq RT::Record->can('UID') ) {
+            $store{Object} = \( join '-', $store{ObjectType}, $RT::Organization, $store{ObjectId} );
+        }
+        elsif ( $store{ObjectType} eq 'RT::User' ) {
+            $store{Object} = \( $args{serializer}{uid}{user}{ $store{ObjectId} } ||= $self->Object->UID );
+        }
+        else {
+            $store{Object} = \( $self->Object->UID );
+        }
         delete $store{$_} for qw/ObjectType ObjectId/;
-        $store{Object} = \($self->Object->UID);
     }
 
     return %store;
diff --git a/lib/RT/Ticket.pm b/lib/RT/Ticket.pm
index d63881f79e..4d858a8b8f 100644
--- a/lib/RT/Ticket.pm
+++ b/lib/RT/Ticket.pm
@@ -3759,9 +3759,7 @@ sub Serialize {
     my %args = (@_);
     my %store = $self->SUPER::Serialize(@_);
 
-    my $obj = RT::Ticket->new( RT->SystemUser );
-    $obj->Load( $store{EffectiveId} );
-    $store{EffectiveId} = \($obj->UID);
+    $store{EffectiveId} = \( join '-', 'RT::Ticket', $RT::Organization, $store{EffectiveId} );
 
     return %store;
 }
diff --git a/lib/RT/Transaction.pm b/lib/RT/Transaction.pm
index 4f1286bf1b..b7c5e89405 100644
--- a/lib/RT/Transaction.pm
+++ b/lib/RT/Transaction.pm
@@ -2158,26 +2158,21 @@ sub Serialize {
 
     my $type = $store{Type};
     if ($type eq "CustomField") {
-        my $cf = RT::CustomField->new( RT->SystemUser );
-        $cf->Load( $store{Field} );
-        $store{Field} = \($cf->UID);
-
-        $store{OldReference} = \($self->OldReferenceObject->UID) if $self->OldReference;
-        $store{NewReference} = \($self->NewReferenceObject->UID) if $self->NewReference;
-    } elsif ($type =~ /^(Take|Untake|Force|Steal|Give)$/) {
+        $store{Field} = \( join '-', 'RT::CustomField', $RT::Organization, $store{Field} );
+
+        $store{OldReference} = \( join '-', $store{ReferenceType}, $RT::Organization, $store{OldReference} )
+            if $store{OldReference};
+        $store{NewReference} = \( join '-', $store{ReferenceType}, $RT::Organization, $store{NewReference} )
+            if $store{NewReference};
+   }
+   elsif ($type =~ /^(Take|Untake|Force|Steal|Give)$/) {
         for my $field (qw/OldValue NewValue/) {
-            my $user = RT::User->new( RT->SystemUser );
-            $user->Load( $store{$field} );
-            $store{$field} = \($user->UID);
+            $store{$field} = \$args{serializer}{_uid}{user}{ $store{$field} };
         }
     } elsif ($type eq "DelWatcher") {
-        my $principal = RT::Principal->new( RT->SystemUser );
-        $principal->Load( $store{OldValue} );
-        $store{OldValue} = \($principal->UID);
+        $store{OldValue} = \( join '-', 'RT::Principal', $RT::Organization, $store{OldValue} );
     } elsif ($type eq "AddWatcher") {
-        my $principal = RT::Principal->new( RT->SystemUser );
-        $principal->Load( $store{NewValue} );
-        $store{NewValue} = \($principal->UID);
+        $store{NewValue} = \( join '-', 'RT::Principal', $RT::Organization, $store{NewValue} );
     } elsif ($type eq "DeleteLink") {
         if ($store{OldValue}) {
             my $base = RT::URI->new( $self->CurrentUser );
@@ -2223,9 +2218,7 @@ sub Serialize {
             }
         }
     } elsif ($type =~ /^(Add|Open|Resolve)Reminder$/) {
-        my $ticket = RT::Ticket->new( RT->SystemUser );
-        $ticket->Load( $store{NewValue} );
-        $store{NewValue} = \($ticket->UID);
+        $store{NewValue} = \( join '-', 'RT::Ticket', $RT::Organization, $store{NewValue} );
     }
 
     return %store;
diff --git a/lib/RT/User.pm b/lib/RT/User.pm
index 26a4692738..ea65b15b3a 100644
--- a/lib/RT/User.pm
+++ b/lib/RT/User.pm
@@ -3062,6 +3062,9 @@ sub BeforeWipeout {
 
 sub Serialize {
     my $self = shift;
+    my %args = @_;
+    $args{serializer}{_uid}{user}{ $self->Id } = $self->UID;
+
     return (
         Disabled => $self->PrincipalObj->Disabled,
         Principal => $self->PrincipalObj->UID,

commit b440822a33f6ba6448837c55821c3fac3b590227
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Thu Oct 20 23:03:39 2022 +0800

    Add --all to serializer to export all data with UIDs and not check dependencies
    
    This is faster than walking through dependencies.
    
    The collections are reordered so most dependencies can be serialized
    first, which reduces the importer work to resolve references.

diff --git a/lib/RT/Group.pm b/lib/RT/Group.pm
index ee84a5fffb..308f05955e 100644
--- a/lib/RT/Group.pm
+++ b/lib/RT/Group.pm
@@ -1737,13 +1737,20 @@ sub PreInflate {
         return $duplicated->() if $obj->Id;
     }
 
-    my $id = $importer->NextPrincipalId( PrincipalType => 'Group', Disabled => $disabled );
+    # serialized data with --all has principals, in which case we don't need to create a new one.
+    my $principal_obj = $importer->LookupObj( $principal_uid );
+    if ( $principal_obj ) {
+        $data->{id} = $principal_obj->Id;
+    }
+    else {
+        my $id = $importer->NextPrincipalId( PrincipalType => 'Group', Disabled => $disabled );
 
-    # Now we have a principal id, set the id for the group record
-    $data->{id} = $id;
+        # Now we have a principal id, set the id for the group record
+        $data->{id} = $id;
+
+        $importer->Resolve( $principal_uid => 'RT::Principal', $id );
+    }
 
-    $importer->Resolve( $principal_uid => 'RT::Principal', $id );
-    $data->{id} = $id;
 
     return 1;
 }
diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index e46c098de9..c486b5014d 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -132,6 +132,8 @@ sub LoadMetadata {
     $self->{Organization} = $data->{Organization};
     $self->{Clone}        = $data->{Clone};
     $self->{Incremental}  = $data->{Incremental};
+    $self->{All}          = $data->{All};
+
     $self->{Files}        = $data->{Files} if $data->{Final};
 }
 
diff --git a/lib/RT/Migrate/Serializer.pm b/lib/RT/Migrate/Serializer.pm
index bce8530f22..253f65a2c6 100644
--- a/lib/RT/Migrate/Serializer.pm
+++ b/lib/RT/Migrate/Serializer.pm
@@ -76,6 +76,7 @@ sub Init {
 
         Clone       => 0,
         Incremental => 0,
+        All         => 0,
 
         Verbose => 1,
         @_,
@@ -99,6 +100,7 @@ sub Init {
                   HyperlinkUnmigrated
                   Clone
                   Incremental
+                  All
               /;
 
     $self->{Clone} = 1 if $self->{Incremental};
@@ -108,7 +110,7 @@ sub Init {
     # Keep track of the number of each type of object written out
     $self->{ObjectCount} = {};
 
-    if ($self->{Clone}) {
+    if ($self->{Clone} || $self->{All}) {
         $self->PushAll;
     } else {
         $self->PushBasics;
@@ -132,6 +134,7 @@ sub Metadata {
         Organization => $RT::Organization,
         Clone        => $self->{Clone},
         Incremental  => $self->{Incremental},
+        All          => $self->{All},
         ObjectCount  => { $self->ObjectCount },
         @_,
     },
@@ -159,13 +162,32 @@ sub PushAll {
     # Principals first; while we don't serialize these separately during
     # normal dependency walking (we fold them into users and groups),
     # having them separate during cloning makes logic simpler.
-    $self->PushCollections(qw(Principals));
+    $self->PushCollections(qw(Principals)) if $self->{Clone};
 
-    # Users and groups
-    $self->PushCollections(qw(Users Groups GroupMembers));
+    # Users
+    $self->PushCollections(qw(Users));
+
+    # groups
+    if ( $self->{Clone} ) {
+        $self->PushCollections(qw(Groups));
+    }
+    else {
+        my $groups = RT::Groups->new(RT->SystemUser);
+        $groups->FindAllRows if $self->{FollowDisabled};
+        $groups->CleanSlate;
+        $groups->UnLimit;
+        $groups->Limit(
+            FIELD         => 'Domain',
+            VALUE         => [ 'RT::Queue-Role', 'RT::Ticket-Role', 'RT::Catalog-Role', 'RT::Asset-Role' ],
+            OPERATOR      => 'NOT IN',
+            CASESENSITIVE => 0,
+        );
+        $groups->OrderBy( FIELD => 'id' );
+        $self->PushObj($groups);
+    }
 
     # Tickets
-    $self->PushCollections(qw(Queues Tickets Transactions Attachments Links));
+    $self->PushCollections(qw(Queues Tickets));
 
     # Articles
     $self->PushCollections(qw(Articles), map { ($_, "Object$_") } qw(Classes Topics));
@@ -176,6 +198,23 @@ sub PushAll {
     # Assets
     $self->PushCollections(qw(Catalogs Assets));
 
+    if ( !$self->{Clone} ) {
+        my $groups = RT::Groups->new( RT->SystemUser );
+        $groups->FindAllRows if $self->{FollowDisabled};
+        $groups->CleanSlate;
+        $groups->UnLimit;
+        $groups->Limit(
+            FIELD         => 'Domain',
+            VALUE         => [ 'RT::Queue-Role', 'RT::Ticket-Role', 'RT::Catalog-Role', 'RT::Asset-Role' ],
+            OPERATOR      => 'IN',
+            CASESENSITIVE => 0,
+        );
+        $groups->OrderBy( FIELD => 'id' );
+        $self->PushObj($groups);
+    }
+
+    $self->PushCollections(qw(GroupMembers));
+
     # Custom Fields
     if (RT::ObjectCustomFields->require) {
         $self->PushCollections(map { ($_, "Object$_") } qw(CustomFields CustomFieldValues));
@@ -187,10 +226,14 @@ sub PushAll {
     $self->PushCollections(qw(ACL));
 
     # Scrips
-    $self->PushCollections(qw(Scrips ObjectScrips ScripActions ScripConditions Templates));
+    $self->PushCollections(qw(ScripActions ScripConditions Templates Scrips ObjectScrips));
 
     # Attributes
     $self->PushCollections(qw(Attributes));
+
+    $self->PushCollections(qw(Links));
+
+    $self->PushCollections(qw(Transactions Attachments));
 }
 
 sub PushCollections {
@@ -366,7 +409,7 @@ sub NextPage {
 
     $last ||= 0;
 
-    if ($self->{Clone}) {
+    if ($self->{Clone} || $self->{All}) {
         # Clone provides guaranteed ordering by id and with no other id limits
         # worry about trampling
 
@@ -396,7 +439,7 @@ sub Process {
 
     # Skip all dependency walking if we're cloning; go straight to
     # visiting them.
-    if ($self->{Clone} and $uid) {
+    if ( ($self->{Clone} || $self->{All}) and $uid) {
         return if $obj->isa("RT::System");
         $self->{progress}->($obj) if $self->{progress};
         return $self->Visit(%args);
@@ -530,11 +573,17 @@ sub Visit {
             undef,
             \%data,
         );
-    } elsif ($self->{Clone}) {
+    } elsif ($self->{Clone} || $self->{All}) {
         # Short-circuit and get Just The Basics, Sir if we're cloning
         my $class = ref($obj);
         my $uid   = $obj->UID;
-        my %data  = $obj->RT::Record::Serialize( UIDs => 0 );
+        my %data;
+        if ( $self->{Clone} ) {
+            %data = $obj->RT::Record::Serialize( serializer => $self, UIDs => 0 );
+        }
+        else {
+            %data = $obj->Serialize( serializer => $self, UIDs => 1 );
+        }
 
         # +class is used when seeing a record of one class might insert
         # a separate record into the stream
diff --git a/lib/RT/User.pm b/lib/RT/User.pm
index 76458baabc..26a4692738 100644
--- a/lib/RT/User.pm
+++ b/lib/RT/User.pm
@@ -3102,14 +3102,21 @@ sub PreInflate {
         return;
     }
 
-    # Create a principal first, so we know what ID to use
-    my $id = $importer->NextPrincipalId( PrincipalType => 'User', Disabled => $disabled );
+    # serialized data with --all has principals, in which case we don't need to create a new one.
+    my $principal_obj = $importer->LookupObj( $principal_uid );
+    if ( $principal_obj ) {
+        $data->{id} = $principal_obj->Id;
+    }
+    else {
 
-    # Now we have a principal id, set the id for the user record
-    $data->{id} = $id;
+        # Create a principal first, so we know what ID to use
+        my $id = $importer->NextPrincipalId( PrincipalType => 'User', Disabled => $disabled );
 
-    $importer->Resolve( $principal_uid => 'RT::Principal', $id );
-    $data->{id} = $id;
+        # Now we have a principal id, set the id for the user record
+        $data->{id} = $id;
+
+        $importer->Resolve( $principal_uid => 'RT::Principal', $id );
+    }
 
     return $class->SUPER::PreInflate( $importer, $uid, $data );
 }
diff --git a/sbin/rt-serializer.in b/sbin/rt-serializer.in
index c6fe187898..482b2c542d 100644
--- a/sbin/rt-serializer.in
+++ b/sbin/rt-serializer.in
@@ -113,6 +113,7 @@ GetOptions(
 
     "clone",
     "incremental",
+    "all",
 
     "gc=i",
     "page=i",
@@ -141,6 +142,7 @@ $args{FollowAssets} = $OPT{assets} if defined $OPT{assets};
 
 $args{Clone}         = $OPT{clone}       if $OPT{clone};
 $args{Incremental}   = $OPT{incremental} if $OPT{incremental};
+$args{All}           = $OPT{all}         if $OPT{all};
 
 $args{GC}   = defined $OPT{gc}   ? $OPT{gc}   : 5000;
 $args{Page} = defined $OPT{page} ? $OPT{page} : 100;
@@ -184,9 +186,9 @@ if ($OPT{'limit-cfs'}) {
     $args{CustomFields} = \@cf_ids;
 }
 
-if (($OPT{clone} or $OPT{incremental})
+if (($OPT{clone} or $OPT{incremental} or $OPT{all})
         and grep { /^(users|groups|deleted|disabled|scrips|tickets|transactions|acls|assets)$/ } keys %OPT) {
-    die "You cannot specify object types when cloning.\n\nPlease see $0 --help.\n";
+    die "You cannot specify object types when cloning or with --all.\n\nPlease see $0 --help.\n";
 }
 
 my $walker;
@@ -417,6 +419,14 @@ C<--clone> with any option that limits object types serialized.  No
 dependency walking is performed when cloning. C<rt-importer> will detect
 that your serialized data set was generated by a clone.
 
+=item B<--all>
+
+Serializes your entire database, creating a clone-like data. Both C<--all>
+and C<--clone> do not check dependencies, the difference is C<--all>
+generates UIDs: it means the ids in source instance do not necessirily
+be synced to target instance, which makes it quite useful to fully merge
+multiple RT instances.
+
 =item B<--incremental>
 
 Will generate an incremenal serialized dataset using the data stored in

commit 8d5b9727d98ff45591b54ef450750fb704f7dcdd
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Tue Oct 18 06:16:40 2022 +0800

    Support to create principals in batch beforehand
    
    This reduces SQL calls a lot and thus improves importer performance.

diff --git a/lib/RT/Group.pm b/lib/RT/Group.pm
index 39f6de0a43..ee84a5fffb 100644
--- a/lib/RT/Group.pm
+++ b/lib/RT/Group.pm
@@ -1737,16 +1737,12 @@ sub PreInflate {
         return $duplicated->() if $obj->Id;
     }
 
-    my $principal = RT::Principal->new( RT->SystemUser );
-    my ($id) = $principal->Create(
-        PrincipalType => 'Group',
-        Disabled => $disabled,
-    );
+    my $id = $importer->NextPrincipalId( PrincipalType => 'Group', Disabled => $disabled );
 
     # Now we have a principal id, set the id for the group record
     $data->{id} = $id;
 
-    $importer->Resolve( $principal_uid => ref($principal), $id );
+    $importer->Resolve( $principal_uid => 'RT::Principal', $id );
     $data->{id} = $id;
 
     return 1;
diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index 3ffb9c2bcc..e46c098de9 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -72,6 +72,8 @@ sub Init {
         HandleError         => undef,
         ExcludeOrganization => undef,
         AutoCommit          => 1,
+        BatchUserPrincipals  => 0,
+        BatchGroupPrincipals => 0,
         @_,
     );
 
@@ -84,6 +86,8 @@ sub Init {
 
     $self->{AutoCommit} = $args{AutoCommit};
 
+    $self->{$_} = $args{$_} for qw/BatchUserPrincipals BatchGroupPrincipals/;
+
     $self->{HandleError} = sub { 0 };
     $self->{HandleError} = $args{HandleError}
         if $args{HandleError} and ref $args{HandleError} eq 'CODE';
@@ -161,6 +165,58 @@ sub InitStream {
             );
         }
     }
+
+    if ( !$self->{Clone} ) {
+        for my $type ( qw/User Group/ ) {
+            if ( my $count = $self->{"Batch${type}Principals"} ) {
+                my $principal = RT::Principal->new( RT->SystemUser );
+                my ($id)      = $principal->Create( PrincipalType => $type, Disabled => 0 );
+
+                my $left = $count - 1; # already created one
+
+                # Insert 100k each time, to avoid too much memory assumption.
+                while ( $left > 0 ) {
+                    if ( $left > 100_000 ) {
+                        my $sql = 'INSERT INTO Principals (PrincipalType, Disabled) VALUES ' . join ',',
+                            ("('$type', 0)") x ( 100_000 );
+                        $self->RunSQL($sql);
+                        $left -= 100_000;
+                    }
+                    else {
+                        my $sql = 'INSERT INTO Principals (PrincipalType, Disabled) VALUES ' . join ',',
+                            ("('$type', 0)") x $left;
+                        $self->RunSQL($sql);
+                        last;
+                    }
+                }
+
+                push @{ $self->{_principals}{$type} }, $id .. ( $count - 1 + $id );
+            }
+        }
+    }
+}
+
+sub NextPrincipalId {
+    my $self = shift;
+    my %args = @_;
+    my $id;
+    if ( $args{PrincipalType} eq 'User' ) {
+        $id = shift @{$self->{_principals}{User} || []};
+    }
+    else {
+        $id = shift @{$self->{_principals}{Group} || []};
+    }
+
+    if ( !$id ) {
+        my $principal = RT::Principal->new( RT->SystemUser );
+        ($id) = $principal->Create(%args);
+    }
+
+    if ( $args{Disabled} ) {
+        $self->RunSQL("UPDATE Principals SET Disabled=1 WHERE id=$id");
+    }
+
+    return $id;
 }
 
 sub Resolve {
diff --git a/lib/RT/User.pm b/lib/RT/User.pm
index f7d28b09eb..76458baabc 100644
--- a/lib/RT/User.pm
+++ b/lib/RT/User.pm
@@ -3103,16 +3103,12 @@ sub PreInflate {
     }
 
     # Create a principal first, so we know what ID to use
-    my $principal = RT::Principal->new( RT->SystemUser );
-    my ($id) = $principal->Create(
-        PrincipalType => 'User',
-        Disabled => $disabled,
-    );
+    my $id = $importer->NextPrincipalId( PrincipalType => 'User', Disabled => $disabled );
 
     # Now we have a principal id, set the id for the user record
     $data->{id} = $id;
 
-    $importer->Resolve( $principal_uid => ref($principal), $id );
+    $importer->Resolve( $principal_uid => 'RT::Principal', $id );
     $data->{id} = $id;
 
     return $class->SUPER::PreInflate( $importer, $uid, $data );
diff --git a/sbin/rt-importer.in b/sbin/rt-importer.in
index 219e34534e..882b68b097 100644
--- a/sbin/rt-importer.in
+++ b/sbin/rt-importer.in
@@ -102,6 +102,9 @@ GetOptions(
 
     "auto-commit!",
 
+    "batch-user-principals=i",
+    "batch-group-principals=i",
+
     "dump=s@",
 ) or Pod::Usage::pod2usage();
 
@@ -151,6 +154,8 @@ my $import = RT::Migrate::Importer::File->new(
     DumpObjects         => $OPT{dump},
     Resume              => $OPT{resume},
     AutoCommit          => $OPT{'auto-commit'},
+    BatchUserPrincipals => $OPT{'batch-user-principals'},
+    BatchGroupPrincipals => $OPT{'batch-group-principals'},
     HandleError         => $error_handler,
 );
 
@@ -293,6 +298,13 @@ Works only in conjunction with C<--list>.
 Don't auto commit to database. When this flag is used, it will commit only
 once for each data file.  This could boost performance in some cases.
 
+=item B<--batch-user-pricipals> I<NUMBER>
+=item B<--batch-group-pricipals> I<NUMBER>
+
+The number of user/group principals to create in batch beforehand. Default is 0.
+This is to improve performance for not-cloned serialized data of big instances,
+usually you don't need to specify this.
+
 =back
 
 

commit 98277993ab655f0c74244eba9a2bc3fd2a49b98d
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Oct 14 13:09:53 2022 +0800

    No need to convert ascii strings
    
    By avoiding unnecessary actions like this, importer runs faster with Pg.

diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index 1d091019bf..3ffb9c2bcc 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -340,7 +340,7 @@ sub Create {
     # could be be wrongly encoded on Pg.
     if ( RT->Config->Get( 'DatabaseType' ) eq 'Pg' ) {
         for my $field ( keys %$data ) {
-            if ( $data->{$field} && !utf8::is_utf8( $data->{$field} ) ) {
+            if ( $data->{$field} && $data->{$field} =~ /[^\x00-\x7F]/ && !utf8::is_utf8( $data->{$field} ) ) {
 
                 # Make sure decoded data is valid UTF-8, otherwise Pg won't insert
                 my $decoded;

commit 5ad8d1c893c99ed73400b80b0eb06b0c499c3fdc
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Oct 14 05:02:16 2022 +0800

    Reduce unnecessary Load calls after creation for performance
    
    Now we only load objects if needed, like to run PostInflate, etc.

diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index 9158d9bc68..1d091019bf 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -379,10 +379,14 @@ sub Create {
     $self->{ObjectCount}{$class}++;
     $self->Resolve( $uid => $class, $id );
 
-    # Load it back to get real values into the columns
-    $obj = $class->new( RT->SystemUser );
-    $obj->Load( $id );
-    $obj->PostInflate( $self, $uid );
+    # Attribute, Article and SystemUser have actions in PostInflate. CustomField is for NewCFs.
+    if ( $class =~ /^RT::(Attribute|Article|CustomField)$/ || ( $class eq 'RT::User' && $data->{Name} eq 'RT_System' ) )
+    {
+        # Load it back to get real values into the columns
+        $obj = $class->new( RT->SystemUser );
+        $obj->Load( $id );
+        $obj->PostInflate( $self, $uid );
+    }
 
     return $obj;
 }
@@ -453,6 +457,7 @@ sub ReadStream {
                   ? $origid
                   : $self->Organization . ":$origid";
 
+        $obj->Load( $self->Lookup($uid)->[1] );
         my ($id, $msg) = $obj->AddCustomFieldValue(
             Field             => $self->{OriginalId},
             Value             => $value,

commit 15b69b99345065e5840c15ea1731437995ed6e46
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Thu Oct 13 20:45:31 2022 +0800

    Fill up CachedGroupMembers at the end of importer for performance
    
    Previously we created corresponding CachedGroupMember rows on every
    Group/GroupMember create, which was quite slow. By doing it via plain
    SQL at the end, it's astonishingly faster than before. E.g. for
    CachedGroupMembers with 100k rows, now it could be done in seconds
    (prevously it was in minutes!)

diff --git a/lib/RT/Group.pm b/lib/RT/Group.pm
index 7f9854a204..39f6de0a43 100644
--- a/lib/RT/Group.pm
+++ b/lib/RT/Group.pm
@@ -1752,17 +1752,6 @@ sub PreInflate {
     return 1;
 }
 
-sub PostInflate {
-    my $self = shift;
-
-    my $cgm = RT::CachedGroupMember->new($self->CurrentUser);
-    $cgm->Create(
-        Group  => $self->PrincipalObj,
-        Member => $self->PrincipalObj,
-        ImmediateParent => $self->PrincipalObj
-    );
-}
-
 # If this group represents the members of a custom role, then return
 # the RT::CustomRole object. Otherwise, return undef
 sub _CustomRoleObj {
diff --git a/lib/RT/GroupMember.pm b/lib/RT/GroupMember.pm
index 4a092c332f..2733d3451f 100644
--- a/lib/RT/GroupMember.pm
+++ b/lib/RT/GroupMember.pm
@@ -603,12 +603,6 @@ sub PreInflate {
     return 1;
 }
 
-sub PostInflate {
-    my $self = shift;
-
-    $self->_InsertCGM;
-}
-
 RT::Base->_ImportOverlays();
 
 1;
diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index ae074fd4bd..9158d9bc68 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -477,6 +477,66 @@ sub CloseStream {
 
     $self->{Progress}->(undef, 'force') if $self->{Progress};
 
+    # Fill CGM
+
+    # Groups
+    $self->RunSQL(<<'EOF');
+INSERT INTO CachedGroupMembers (GroupId, MemberId, Via, ImmediateParentId, Disabled)
+    SELECT Groups.id, Groups.id, 0, Groups.id, Principals.Disabled FROM Groups
+    LEFT JOIN Principals ON ( Groups.id = Principals.id )
+    LEFT JOIN CachedGroupMembers ON (
+        Groups.id = CachedGroupMembers.GroupId
+        AND CachedGroupMembers.GroupId = CachedGroupMembers.MemberId
+        AND CachedGroupMembers.GroupId = CachedGroupMembers.ImmediateParentId
+        )
+    WHERE CachedGroupMembers.id IS NULL
+EOF
+
+    # GroupMembers
+    $self->RunSQL(<<'EOF');
+INSERT INTO CachedGroupMembers (GroupId, MemberId, Via, ImmediateParentId, Disabled)
+    SELECT GroupMembers.GroupId, GroupMembers.MemberId, 0, GroupMembers.GroupId, Principals.Disabled FROM GroupMembers
+    LEFT JOIN Principals ON ( GroupMembers.GroupId = Principals.id )
+    LEFT JOIN CachedGroupMembers ON (
+        GroupMembers.GroupId = CachedGroupMembers.GroupId
+        AND GroupMembers.MemberId = CachedGroupMembers.MemberId
+        AND CachedGroupMembers.GroupId = CachedGroupMembers.ImmediateParentId
+    )
+    WHERE CachedGroupMembers.id IS NULL
+EOF
+
+    # Fixup Via
+    $self->RunSQL(<<'EOF');
+UPDATE CachedGroupMembers SET Via=id WHERE Via=0
+EOF
+
+    # Cascaded GroupMembers, use the same SQL in rt-validator
+    my $cascaded_cgm = <<'EOF';
+INSERT INTO CachedGroupMembers (GroupId, MemberId, Via, ImmediateParentId, Disabled)
+SELECT cgm1.GroupId, gm2.MemberId, cgm1.id AS Via,
+    cgm1.MemberId AS ImmediateParentId, cgm1.Disabled
+FROM
+    CachedGroupMembers cgm1
+    CROSS JOIN GroupMembers gm2
+    LEFT JOIN CachedGroupMembers cgm3 ON (
+            cgm3.GroupId           = cgm1.GroupId
+        AND cgm3.MemberId          = gm2.MemberId
+        AND cgm3.Via               = cgm1.id
+        AND cgm3.ImmediateParentId = cgm1.MemberId )
+    LEFT JOIN Groups g ON (
+        cgm1.GroupId = g.id
+    )
+WHERE cgm1.GroupId != cgm1.MemberId
+AND gm2.GroupId = cgm1.MemberId
+AND cgm3.id IS NULL
+AND g.Domain != 'RT::Ticket-Role'
+EOF
+    # Do this multiple times if needed to fill up cascaded group members
+    while ( my $rv = $self->RunSQL($cascaded_cgm) ) {
+        # $rv could be 0E0 that is true in bool context but 0 in numeric comparison.
+        last unless $rv > 0;
+    }
+
     return if $self->{Clone};
 
     # Take global CFs which we made and make them un-global

commit 6aec3179c5a5242a7409b0b5baec0af339fec713
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Sun Oct 23 02:06:34 2022 +0800

    Wrap raw "do" SQL into eval to show more error details
    
    Like existing wrappers for Create calls, it gives HandleError a chance
    to rescue when possible.

diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index 98c6742b53..ae074fd4bd 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -519,4 +519,23 @@ sub Progress {
     return $self->{Progress} = $_[0];
 }
 
+sub RunSQL {
+    my $self = shift;
+    my $rv;
+    eval {
+        local $SIG{__DIE__};
+        $rv = $RT::Handle->dbh->do(@_);
+    };
+    if ($@) {
+        my $err = "Failed to run @_: $@\n";
+        if ( not $self->{HandleError}->( $self, $err ) ) {
+            die $err;
+        }
+        else {
+            return undef;
+        }
+    }
+    return $rv;
+}
+
 1;

-----------------------------------------------------------------------


hooks/post-receive
-- 
rt


More information about the rt-commit mailing list