[Rt-commit] rt branch 5.0/speed-up-importer created. rt-5.0.4-66-gc74b9aa57a

BPS Git Server git at git.bestpractical.com
Thu Jul 6 21:54:59 UTC 2023


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "rt".

The branch, 5.0/speed-up-importer has been created
        at  c74b9aa57aac7884b7034596fe051c6f0532bd15 (commit)

- Log -----------------------------------------------------------------
commit c74b9aa57aac7884b7034596fe051c6f0532bd15
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Nov 11 04:03:53 2022 +0800

    Serialize/Import bookmarks correctly
    
    Previously the ticket ids were not serialized to UIDs.

diff --git a/lib/RT/Attribute.pm b/lib/RT/Attribute.pm
index 51cf0a3ae3..a92570951f 100644
--- a/lib/RT/Attribute.pm
+++ b/lib/RT/Attribute.pm
@@ -1070,6 +1070,17 @@ sub PostInflateFixup {
         }
         $self->SetContent( $content, SyncLinks => 0, RecordTransaction => 0 );
     }
+    elsif ( $self->Name eq 'Bookmarks' ) {
+        my $content = $self->Content;
+        my @ids;
+        for my $uid (@$content) {
+            if ( my $ticket = $importer->LookupObj($$uid) ) {
+                push @ids, $ticket->Id;
+            }
+        }
+        $content = { map { $_ => 1 } @ids };
+        $self->SetContent($content);
+    }
 }
 
 sub PostInflate {
@@ -1138,6 +1149,11 @@ sub Serialize {
 
         $store{Content} = $self->_SerializeContent($content);
     }
+    elsif ( $self->Name eq 'Bookmarks' ) {
+        my $content = $self->Content;
+        $content = [ map { \( join '-', 'RT::Ticket', $RT::Organization, $_ ) } keys %$content ];
+        $store{Content} = $self->_SerializeContent($content);
+    }
 
     return %store;
 }

commit 19cd66ef291432c2422835f9ca6211575f549d83
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Tue Nov 8 09:29:22 2022 +0800

    Serialize/Import subscriptions correctly
    
    Previously the recipients were not serialized to UIDs.

diff --git a/lib/RT/Attribute.pm b/lib/RT/Attribute.pm
index 11913d5708..51cf0a3ae3 100644
--- a/lib/RT/Attribute.pm
+++ b/lib/RT/Attribute.pm
@@ -1039,6 +1039,22 @@ sub PostInflateFixup {
     }
     elsif ($self->Name eq 'Subscription') {
         my $content = $self->Content;
+        for my $type ( qw/Users Groups/ ) {
+            if ( my $list = $content->{Recipients}{$type} ) {
+                my @ids;
+                for my $item ( @$list ) {
+                    if ( ref $item eq 'SCALAR' ) {
+                        my $obj = $importer->LookupObj($$item);
+                        push @ids, $obj->Id if $obj && $obj->Id;
+                    }
+                    else {
+                        push @ids, $item;
+                    }
+                }
+                @$list = @ids;
+            }
+        }
+
         if (ref($content->{DashboardId}) eq 'SCALAR') {
             my $attr = $importer->LookupObj(${ $content->{DashboardId} });
             if ($attr) {
@@ -1102,6 +1118,24 @@ sub Serialize {
     elsif ($store{Name} eq 'Subscription') {
         my $content = $self->_DeserializeContent($store{Content});
         $content->{DashboardId} = \( join '-', 'RT::Attribute', $RT::Organization, $content->{DashboardId} );
+
+        # encode user/groups to be UIDs
+        for my $type (qw/Users Groups/) {
+            if ( $content->{Recipients}{$type} ) {
+                my $class = $type eq 'Users' ? 'RT::User' : 'RT::Group';
+                my @uids;
+                for my $id ( @{ $content->{Recipients}{$type} } ) {
+                    my $obj = $class->new( RT->SystemUser );
+                    $obj->Load($id);
+                    if ( $obj->Id ) {
+                        push @uids,
+                            \( join '-', $class, $class eq 'RT::User' ? $obj->Name : ( $RT::Organization, $obj->Id ) );
+                    }
+                }
+                $content->{Recipients}{$type} = \@uids;
+            }
+        }
+
         $store{Content} = $self->_SerializeContent($content);
     }
 

commit 898b36dc33f2322cccad29d07b1c83511f30e453
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Thu Oct 13 21:34:01 2022 +0800

    Add batch mode to importer for data serialized with --clone or --all
    
    By reducing SQL calls a lot, importer can run much faster now. Cloned
    data doesn't have dependencies to resolve, so we can also insert rows in
    parallel, which makes the speedup more noticable.

diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index b1567be0a2..f73240e522 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -74,6 +74,8 @@ sub Init {
         AutoCommit          => 1,
         BatchUserPrincipals  => 0,
         BatchGroupPrincipals => 0,
+        BatchSize            => 0,
+        MaxProcesses         => 10,
         @_,
     );
 
@@ -87,6 +89,8 @@ sub Init {
     $self->{AutoCommit} = $args{AutoCommit};
 
     $self->{$_} = $args{$_} for qw/BatchUserPrincipals BatchGroupPrincipals/;
+    $self->{BatchSize}    = $args{BatchSize};
+    $self->{MaxProcesses} = $args{MaxProcesses} || 10;
 
     $self->{HandleError} = sub { 0 };
     $self->{HandleError} = $args{HandleError}
@@ -166,6 +170,7 @@ sub InitStream {
                 Type  => 'FreeformSingle',
             );
         }
+        $self->{OriginalId} = $cf->Id;
     }
 
     if ( !$self->{Clone} ) {
@@ -221,6 +226,29 @@ sub NextPrincipalId {
     return $id;
 }
 
+sub NextId {
+    my $self  = shift;
+    my $class = shift;
+    my $id    = shift;
+
+    if ( $id ) {
+        $self->{_next_id}{$class} = $id;
+        return $id;
+    }
+
+    if ( defined $self->{_next_id}{$class} ) {
+        return $self->{_next_id}{$class}++;
+    }
+    return;
+}
+
+sub HasNextId {
+    my $self  = shift;
+    my $class = shift;
+
+    return defined $self->{_next_id}{$class} ? 1 : 0;
+}
+
 sub Resolve {
     my $self = shift;
     my ($uid, $class, $id) = @_;
@@ -235,27 +263,39 @@ sub Resolve {
 
     return unless $self->{Pending}{$uid};
 
+    my @left;
     for my $ref (@{$self->{Pending}{$uid}}) {
-        my ($pclass, $pid) = @{ $self->Lookup( $ref->{uid} ) };
-        my $obj = $pclass->new( RT->SystemUser );
-        $obj->LoadByCols( Id => $pid );
-        $obj->__Set(
-            Field => $ref->{column},
-            Value => $id,
-        ) if defined $ref->{column};
-        $obj->__Set(
-            Field => $ref->{classcolumn},
-            Value => $class,
-        ) if defined $ref->{classcolumn};
-        $obj->__Set(
-            Field => $ref->{uri},
-            Value => $self->LookupObj($uid)->URI,
-        ) if defined $ref->{uri};
-        if (my $method = $ref->{method}) {
-            $obj->$method($self, $ref, $class, $id);
+        if ( my $lookup = $self->Lookup( $ref->{uid} ) ) {
+            my ( $pclass, $pid ) = @{$lookup};
+            my $obj = $pclass->new( RT->SystemUser );
+            $obj->LoadByCols( Id => $pid );
+            $obj->__Set(
+                Field => $ref->{column},
+                Value => $id,
+            ) if defined $ref->{column};
+            $obj->__Set(
+                Field => $ref->{classcolumn},
+                Value => $class,
+            ) if defined $ref->{classcolumn};
+            $obj->__Set(
+                Field => $ref->{uri},
+                Value => $self->LookupObj($uid)->URI,
+            ) if defined $ref->{uri};
+            if ( my $method = $ref->{method} ) {
+                $obj->$method( $self, $ref, $class, $id );
+            }
+        }
+        else {
+            push @left, $ref;
         }
     }
-    delete $self->{Pending}{$uid};
+
+    if ( @left ) {
+        $self->{Pending}{$uid} = \@left;
+    }
+    else {
+        delete $self->{Pending}{$uid};
+    }
 }
 
 sub Lookup {
@@ -419,6 +459,23 @@ sub Create {
         }
     }
 
+    if ( $self->{BatchSize} && ( $self->{Clone} || $self->{All} ) ) {
+
+        # Finish up the previous class if there are any records left
+        my ($previous_class) = grep { $_ ne $class && @{ $self->{_batch}{$_} } } keys %{ $self->{_batch} || {} };
+        if ($previous_class) {
+            $self->BatchCreate( $previous_class, $self->{_batch}{$previous_class} );
+        }
+
+        if ( $data->{id} || $self->HasNextId($class) ) {
+            push @{ $self->{_batch}{$class} }, [ $uid, $data ];
+            if ( @{ $self->{_batch}{$class} } == $self->{BatchSize} ) {
+                $self->BatchCreate( $class, $self->{_batch}{$class} );
+            }
+            return;
+        }
+    }
+
     my ($id, $msg) = eval {
         # catch and rethrow on the outside so we can provide more info
         local $SIG{__DIE__};
@@ -436,6 +493,16 @@ sub Create {
         }
     }
 
+    return $obj if $self->{Clone};
+
+    # Users/Groups have id set in PreInflate, no need to set here
+    if (   $self->{BatchSize}
+        && $self->{All}
+        && $class =~ /^RT::(?:Ticket|Transaction|Attachment|GroupMember|ObjectCustomFieldValue|Attribute|Link)$/ )
+    {
+        $self->NextId( $class, $id + 1 );
+    }
+
     $self->{ObjectCount}{$class}++;
     $self->Resolve( $uid => $class, $id );
 
@@ -515,18 +582,7 @@ sub ReadStream {
     # If it's a ticket, we might need to create a
     # TicketCustomField for the previous ID
     if ($class eq "RT::Ticket" and $self->{OriginalId}) {
-        my $value = $self->{ExcludeOrganization}
-                  ? $origid
-                  : $self->Organization . ":$origid";
-
-        $obj->Load( $self->Lookup($uid)->[1] );
-        my ($id, $msg) = $obj->AddCustomFieldValue(
-            Field             => $self->{OriginalId},
-            Value             => $value,
-            RecordTransaction => 0,
-        );
-        warn "Failed to add custom field to $uid: $msg"
-            unless $id;
+        $self->CreateOriginalIdOCFVs( { $self->Lookup($uid)->[1] => $origid } );
     }
 
     # If it's a CF, we don't know yet if it's global (the OCF
@@ -544,6 +600,33 @@ sub CloseStream {
 
     $self->{Progress}->(undef, 'force') if $self->{Progress};
 
+    my %order = (
+        'RT::Ticket'                 => 1,
+        'RT::Group'                  => 2,
+        'RT::GroupMember'            => 3,
+        'RT::ObjectCustomFieldValue' => 4,
+        'RT::Transaction'            => 5,
+        'RT::Attachment'             => 6,
+    );
+
+    for my $class (
+        sort { ( $order{$a} // 0 ) <=> ( $order{$b} // 0 ) }
+        grep { @{ $self->{_batch}{$_} } } keys %{ $self->{_batch} || {} }
+        )
+    {
+        $self->BatchCreate( $class, $self->{_batch}{$class} );
+    }
+
+    $self->{_pm}->wait_all_children if $self->{_pm};
+
+    # Now have all data imported, try to resolve again.
+    my @uids = grep { $self->{UIDs}{$_} } keys %{ $self->{Pending} };
+
+    for my $uid (@uids) {
+        my ( $class, $id ) = split /-/, $self->{UIDs}{$uid}, 2;
+        $self->Resolve( $uid, $class, $id );
+    }
+
     # Fill CGM
 
     # Groups
@@ -646,6 +729,151 @@ sub Progress {
     return $self->{Progress} = $_[0];
 }
 
+sub BatchCreate {
+    my $self  = shift;
+    my $class = shift;
+    my $items = shift or return;
+
+    return unless @$items;
+
+    if ( $self->{Clone} ) {
+
+        if ( !$self->{_pm} ) {
+            require Parallel::ForkManager;
+            $self->{_pm} = Parallel::ForkManager->new( $self->{MaxProcesses} );
+        }
+
+        $self->{ObjectCount}{$class} += @$items;
+        my @copy = @$items;
+        @$items = ();
+
+        $RT::Handle->Commit unless $self->{AutoCommit};
+        $RT::Handle->Disconnect;
+
+        if ( $self->{_pm}->start ) {
+            $RT::Handle->Connect;
+            $RT::Handle->BeginTransaction unless $self->{AutoCommit};
+            return 1;
+        }
+
+        $RT::Handle->Connect;
+        $self->_BatchCreate( $class, \@copy );
+        $self->{_pm}->finish;
+    }
+    else {
+        # In case there are duplicates, which could happen for merged members.
+        if ( $class eq 'RT::GroupMember' ) {
+            my %added;
+            @$items = grep { !$added{ $_->[1]{GroupId} }{ $_->[1]{MemberId} }++ } @$items;
+        }
+
+        my $map = $self->_BatchCreate( $class, $items );
+        $self->{ObjectCount}{$class} += @$items;
+        @$items = ();
+
+        if ($map) {
+            $self->{UIDs}{$_} = $map->{$_} for keys %$map;
+
+            my %ticket_map;
+            for my $uid ( keys %$map ) {
+                my ( $class, $id ) = split /-/, $map->{$uid}, 2;
+
+                $self->Resolve( $uid => $class, $id );
+                if ( $class eq 'RT::User' ) {
+                    RT->InitSystemObjects if $uid =~ /-RT_System$/;
+                }
+                elsif ( $class->can('PostInflate') ne RT::Record->can('PostInflate') ) {
+                    my $record = $class->new( RT->SystemUser );
+                    $record->Load($id);
+                    $record->PostInflate( $self, $uid );
+                }
+
+                if ( $self->{OriginalId} && $class eq 'RT::Ticket' ) {
+                    my ($orig_id) = ( $uid =~ /-(\d+)$/ );
+                    $ticket_map{$id} = $orig_id;
+                }
+            }
+
+            $self->CreateOriginalIdOCFVs( \%ticket_map ) if %ticket_map;
+        }
+        return 1;
+    }
+}
+
+sub _BatchCreate {
+    my $self  = shift;
+    my $class = shift;
+    my $items = shift or return;
+    return unless @$items;
+
+    my %map;
+
+    # Do not actually insert, just get the SQL, with sorted field/value pairs
+    local *RT::Handle::Insert = sub {
+        my $self  = shift;
+        my $table = shift;
+        my %attr  = @_;
+        return $self->InsertQueryString( $table, map { $_ => $attr{$_} } sort keys %attr );
+    };
+
+    my %query;
+    for (@$items) {
+        my ( $uid, $data ) = @$_;
+        my $obj = $class->new( RT->SystemUser );
+
+        my $id = $data->{id} || $self->NextId($class);
+        my ( $sql, @bind ) = $obj->DBIx::SearchBuilder::Record::Create( %$data, id => $id );
+        $map{$uid} = join '-', $class, $id unless $self->{Clone};
+        push @{ $query{$sql} }, \@bind;
+    }
+
+    my $dbh = $RT::Handle->dbh;
+
+    for my $sql ( keys %query ) {
+
+        my $count = @{ $query{$sql} };
+        my $values_paren;
+        if ( $sql =~ /(\(\?.+?\))/i ) {
+            $values_paren = $1;
+        }
+
+        # DBs have placeholder limitations(64k for Pg), here we replace
+        # placeholders to support bigger batch sizes. The performance is similar.
+        my $batch_sql
+            = $RT::Handle->FillIn( $sql . ( ", $values_paren" x ( $count - 1 ) ), [ map @$_, @{ $query{$sql} } ] );
+        $self->RunSQL($batch_sql);
+    }
+
+    # Clone doesn't need to return anything
+    return $self->{Clone} ? () : \%map;
+}
+
+=head2 CreateOriginalIdOCFVs { NEW_ID => ORIG_ID, ... }
+
+Create corresponding ObjectCustomFieldValues that contain tickets' original ids.
+
+=cut
+
+sub CreateOriginalIdOCFVs {
+    my $self = shift;
+    my $ticket_map = shift;
+    if ( %$ticket_map ) {
+        my $sql
+            = 'INSERT INTO ObjectCustomFieldValues (CustomField, ObjectType, ObjectId, Content, Creator) VALUES ';
+        my @values;
+        if ( !$self->{ExcludeOrganization} ) {
+            $ticket_map->{$_} = "$self->{Organization}:$ticket_map->{$_}" for keys %$ticket_map;
+        }
+
+        my $creator = RT->SystemUser->Id;
+        for my $id ( sort { $a <=> $b } keys %$ticket_map ) {
+            push @values, qq{($self->{OriginalId}, 'RT::Ticket', $id, '$ticket_map->{$id}', $creator)};
+        }
+        $sql .= join ',', @values;
+        $self->RunSQL($sql);
+    }
+}
+
 sub RunSQL {
     my $self = shift;
     my $rv;
diff --git a/sbin/rt-importer.in b/sbin/rt-importer.in
index 81dda8b8b9..cda7ee36af 100644
--- a/sbin/rt-importer.in
+++ b/sbin/rt-importer.in
@@ -104,6 +104,8 @@ GetOptions(
 
     "batch-user-principals=i",
     "batch-group-principals=i",
+    "batch-size=i",
+    "max-processes=i",
 
     "dump=s@",
 ) or Pod::Usage::pod2usage();
@@ -168,6 +170,8 @@ my $import = RT::Migrate::Importer::File->new(
     AutoCommit          => $OPT{'auto-commit'},
     BatchUserPrincipals => $OPT{'batch-user-principals'},
     BatchGroupPrincipals => $OPT{'batch-group-principals'},
+    BatchSize           => $OPT{'batch-size'},
+    MaxProcesses        => $OPT{'max-processes'} || 10,
     HandleError         => $error_handler,
 );
 
@@ -317,6 +321,18 @@ The number of user/group principals to create in batch beforehand. Default is 0.
 This is to improve performance for not-cloned serialized data of big instances,
 usually you don't need to specify this.
 
+=item B<--batch-size> I<BATCH_SIZE>
+
+Create objects in batch. Default is 0, meaning batch processing is not
+enabled. This is for data serialized with C<--clone> or C<--all>. For cloned
+serialized data, each batch processing will also take place in a separate
+child process.
+
+=item B<--max-processes> I<MAX_PROCESSES>
+
+The number of max allowed child processes for batch processing. Default is
+10. This is for cloned serialized data only.
+
 =back
 
 

commit 5bf9960ebe21de20e53fc92c31a9a6fff1a49ecc
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Sat Oct 22 20:56:41 2022 +0800

    Avoid duplicates of postponed id resolution
    
    This could happen when a dashboard contains duplicated saved searches.

diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index 6c79a4f9d2..b1567be0a2 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -298,6 +298,8 @@ sub Postpone {
         uri         => undef,
         @_,
     );
+    return if $self->{_postponed}{ join ';', map { $_ . ':' . ( $args{$_} // '' ) } sort keys %args }++;
+
     my $uid = delete $args{for};
 
     if (defined $uid) {

commit bc3b4804e75557aa57ff0a78514bbba98845d4f2
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Oct 21 18:56:27 2022 +0800

    Skip rights check on Attachment access for system user
    
    This is initially for serialization performance, so we don't need to
    query Transactions table to get Attachment field values.

diff --git a/lib/RT/Attachment.pm b/lib/RT/Attachment.pm
index 2c50447a91..3b6379dd5b 100644
--- a/lib/RT/Attachment.pm
+++ b/lib/RT/Attachment.pm
@@ -1070,6 +1070,8 @@ Returns its value as a string, if the user passes an ACL check
 
 sub _Value {
     my $self  = shift;
+    return $self->__Value(@_) if $self->CurrentUser->Id == RT->SystemUser->Id;
+
     my $field = shift;
 
     #if the field is public, return it.
diff --git a/lib/RT/Attachments.pm b/lib/RT/Attachments.pm
index 239fce5c10..210c0212c0 100644
--- a/lib/RT/Attachments.pm
+++ b/lib/RT/Attachments.pm
@@ -248,7 +248,7 @@ sub AddRecord {
     my $self = shift;
     my ($record) = @_;
 
-    return unless $record->TransactionObj->CurrentUserCanSee;
+    return unless $self->CurrentUser->Id == RT->SystemUser->Id || $record->TransactionObj->CurrentUserCanSee;
     return $self->SUPER::AddRecord( $record );
 }
 

commit 2a6d02e384c04c392fbc81b2a6219e68e4927782
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Oct 21 18:52:01 2022 +0800

    Skip rights check on ACE access for system user
    
    This is initially for serialization performance, so we don't need to
    query ACL and CGM tables to get ACE field values.

diff --git a/lib/RT/ACE.pm b/lib/RT/ACE.pm
index f23d39843f..b1f9bc9b68 100644
--- a/lib/RT/ACE.pm
+++ b/lib/RT/ACE.pm
@@ -588,6 +588,7 @@ sub _Set {
 
 sub _Value {
     my $self = shift;
+    return $self->__Value(@_) if $self->CurrentUser->Id == RT->SystemUser->Id;
 
     if ( $self->PrincipalObj->IsGroup
             && $self->PrincipalObj->Object->HasMemberRecursively(

commit e8def1d53408c980f2ec669d070ba415230124f9
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Oct 21 16:19:05 2022 +0800

    Cache various objects for records
    
    This covers the following methods:
    
        RT::ACE::Object
        RT::ACE::PrincipalObj
        RT::Attribute::Object
        RT::Group::PrincipalObj
        RT::Transaction::Object
        RT::ObjectCustomFieldValue::Object
        RT::ObjectCustomFieldValue::CustomFieldObj
    
    The returned objects usually do not change.

diff --git a/lib/RT/ACE.pm b/lib/RT/ACE.pm
index b106c0a1bb..f23d39843f 100644
--- a/lib/RT/ACE.pm
+++ b/lib/RT/ACE.pm
@@ -523,6 +523,8 @@ If it's the system object, returns undef.
 
 If the user has no rights, returns undef.
 
+CAVEAT: the returned object is cached, reload it to get the latest data.
+
 =cut
 
 
@@ -531,16 +533,17 @@ If the user has no rights, returns undef.
 sub Object {
     my $self = shift;
 
-    my $appliesto_obj;
+    return $self->{_cached}{Object} if $self->{_cached}{Object};
 
-    if ($self->__Value('ObjectType') && $self->__Value('ObjectType')->DOES('RT::Record::Role::Rights') ) {
-        $appliesto_obj =  $self->__Value('ObjectType')->new($self->CurrentUser);
-        unless (ref( $appliesto_obj) eq $self->__Value('ObjectType')) {
+    if ( $self->__Value('ObjectType') && $self->__Value('ObjectType')->DOES('RT::Record::Role::Rights') ) {
+        my $appliesto_obj = $self->__Value('ObjectType')->new( $self->CurrentUser );
+        unless ( ref($appliesto_obj) eq $self->__Value('ObjectType') ) {
             return undef;
         }
         $appliesto_obj->Load( $self->__Value('ObjectId') );
-        return ($appliesto_obj);
-     }
+        $self->{_cached}{Object} = $appliesto_obj;
+        return $self->{_cached}{Object};
+    }
     else {
         $RT::Logger->warning( "$self -> Object called for an object "
                               . "of an unknown type:"
@@ -555,20 +558,22 @@ sub Object {
 
 Returns the RT::Principal object for this ACE. 
 
+CAVEAT: the returned object is cached, reload it to get the latest data.
+
 =cut
 
 sub PrincipalObj {
     my $self = shift;
 
-    my $princ_obj = RT::Principal->new( $self->CurrentUser );
-    $princ_obj->Load( $self->__Value('PrincipalId') );
+    unless ( $self->{_cached}{PrincipalObj} ) {
+        $self->{_cached}{PrincipalObj} = RT::Principal->new( $self->CurrentUser );
+        $self->{_cached}{PrincipalObj}->Load( $self->__Value('PrincipalId') );
 
-    unless ( $princ_obj->Id ) {
-        $RT::Logger->err(
-                   "ACE " . $self->Id . " couldn't load its principal object" );
+        unless ( $self->{_cached}{PrincipalObj}->Id ) {
+            $RT::Logger->err( "ACE " . $self->Id . " couldn't load its principal object" );
+        }
     }
-    return ($princ_obj);
-
+    return $self->{_cached}{PrincipalObj};
 }
 
 
diff --git a/lib/RT/Attribute.pm b/lib/RT/Attribute.pm
index 767cc25c60..11913d5708 100644
--- a/lib/RT/Attribute.pm
+++ b/lib/RT/Attribute.pm
@@ -389,20 +389,28 @@ sub SetSubValues {
 
 }
 
+=head2 Object
 
-sub Object {
-    my $self = shift;
-    my $object_type = $self->__Value('ObjectType');
-    my $object;
-    eval { $object = $object_type->new($self->CurrentUser) };
-    unless(UNIVERSAL::isa($object, $object_type)) {
-        $RT::Logger->error("Attribute ".$self->Id." has a bogus object type - $object_type (".$@.")");
-        return(undef);
-     }
-    $object->Load($self->__Value('ObjectId'));
+Returns the object current attribute blongs to.
 
-    return($object);
+CAVEAT: the returned object is cached, reload it to get the latest data.
+
+=cut
 
+sub Object {
+    my $self = shift;
+    unless ( $self->{_cached}{Object} ) {
+        my $object_type = $self->__Value('ObjectType');
+        my $object;
+        eval { $object = $object_type->new( $self->CurrentUser ) };
+        unless ( UNIVERSAL::isa( $object, $object_type ) ) {
+            $RT::Logger->error( "Attribute " . $self->Id . " has a bogus object type - $object_type (" . $@ . ")" );
+            return (undef);
+        }
+        $object->Load( $self->__Value('ObjectId') );
+        $self->{_cached}{Object} = $object;
+    }
+    return $self->{_cached}{Object};
 }
 
 
diff --git a/lib/RT/Group.pm b/lib/RT/Group.pm
index 575d2285e9..fd592835bd 100644
--- a/lib/RT/Group.pm
+++ b/lib/RT/Group.pm
@@ -1335,9 +1335,16 @@ The response is cached. PrincipalObj should never ever change.
 
 sub PrincipalObj {
     my $self = shift;
-    my $res = RT::Principal->new( $self->CurrentUser );
-    $res->Load( $self->id );
-    return $res;
+    unless ( $self->{_cached}{PrincipalObj} && $self->{_cached}{PrincipalObj}->Id ) {
+        $self->{_cached}{PrincipalObj} = RT::Principal->new( $self->CurrentUser );
+        if ( $self->Id ) {
+            my ( $ret, $msg ) = $self->{_cached}{PrincipalObj}->Load( $self->id );
+            if ( !$ret ) {
+                RT->Logger->warning( "Couldn't load principal #" . $self->id . ": $msg" );
+            }
+        }
+    }
+    return $self->{_cached}{PrincipalObj};
 }
 
 
diff --git a/lib/RT/ObjectCustomFieldValue.pm b/lib/RT/ObjectCustomFieldValue.pm
index 53e62c2334..ffad1c4ddd 100644
--- a/lib/RT/ObjectCustomFieldValue.pm
+++ b/lib/RT/ObjectCustomFieldValue.pm
@@ -213,16 +213,20 @@ sub LoadByObjectContentAndCustomField {
 
 =head2 CustomFieldObj
 
-Returns the CustomField Object which has the id returned by CustomField
+Returns the CustomField Object which has the id returned by CustomField.
+
+CAVEAT: the returned object is cached, reload it to get the latest data.
 
 =cut
 
 sub CustomFieldObj {
     my $self = shift;
-    my $CustomField = RT::CustomField->new( $self->CurrentUser );
-    $CustomField->SetContextObject( $self->Object );
-    $CustomField->Load( $self->__Value('CustomField') );
-    return $CustomField;
+    unless ( $self->{_cached}{CustomFieldObj} ) {
+        $self->{_cached}{CustomFieldObj} = RT::CustomField->new( $self->CurrentUser );
+        $self->{_cached}{CustomFieldObj}->SetContextObject( $self->Object );
+        $self->{_cached}{CustomFieldObj}->Load( $self->__Value('CustomField') );
+    }
+    return $self->{_cached}{CustomFieldObj};
 }
 
 
@@ -292,15 +296,19 @@ sub Content {
 
 =head2 Object
 
-Returns the object this value applies to
+Returns the object this value applies to.
+
+CAVEAT: the returned object is cached, reload it to get the latest data.
 
 =cut
 
 sub Object {
     my $self  = shift;
-    my $Object = $self->__Value('ObjectType')->new( $self->CurrentUser );
-    $Object->LoadById( $self->__Value('ObjectId') );
-    return $Object;
+    unless ( $self->{_cached}{Object} ) {
+        $self->{_cached}{Object} = $self->__Value('ObjectType')->new( $self->CurrentUser );
+        $self->{_cached}{Object}->LoadById( $self->__Value('ObjectId') );
+    }
+    return $self->{_cached}{Object};
 }
 
 
diff --git a/lib/RT/Record.pm b/lib/RT/Record.pm
index 32a5e24d6a..3d55d71f33 100644
--- a/lib/RT/Record.pm
+++ b/lib/RT/Record.pm
@@ -379,6 +379,7 @@ sub LoadByCols {
     # We don't want to hang onto this
     $self->ClearAttributes;
     delete $self->{_Roles};
+    delete $self->{_cached};
 
     unless ( $self->_Handle->CaseSensitive ) {
         my ( $ret, $msg ) = $self->SUPER::LoadByCols( @_ );
diff --git a/lib/RT/Transaction.pm b/lib/RT/Transaction.pm
index 1d42265b63..e15a523e12 100644
--- a/lib/RT/Transaction.pm
+++ b/lib/RT/Transaction.pm
@@ -1693,11 +1693,21 @@ sub NewValue {
     }
 }
 
+=head2 Object
+
+Returns the object current transaction blongs to.
+
+CAVEAT: the returned object is cached, reload it to get the latest data.
+
+=cut
+
 sub Object {
     my $self  = shift;
-    my $Object = $self->__Value('ObjectType')->new($self->CurrentUser);
-    $Object->Load($self->__Value('ObjectId'));
-    return $Object;
+    unless ( $self->{_cached}{Object} ) {
+        $self->{_cached}{Object} = $self->__Value('ObjectType')->new( $self->CurrentUser );
+        $self->{_cached}{Object}->Load( $self->__Value('ObjectId') );
+    }
+    return $self->{_cached}{Object};
 }
 
 =head2 NewReferenceObject
diff --git a/share/html/Search/Elements/EditSearches b/share/html/Search/Elements/EditSearches
index 09f0a0748d..40cfb909c4 100644
--- a/share/html/Search/Elements/EditSearches
+++ b/share/html/Search/Elements/EditSearches
@@ -398,6 +398,8 @@ if ( $obj && $obj->id ) {
                 push @results, loc( 'Unable to set privacy id: [_1]', $msg )
                   unless ($val);
             }
+            # Reload to refresh $obj->Object
+            $obj->Load( $obj->Id );
         }
         else {
             # two loc are just for convenience so we don't need to
diff --git a/t/rest2/group-members.t b/t/rest2/group-members.t
index 842d595bf6..3ceeab8fb5 100644
--- a/t/rest2/group-members.t
+++ b/t/rest2/group-members.t
@@ -106,6 +106,7 @@ $group2->Load($group2_id);
     is($res->code, 200, 'Enable group with AdminGroup & SeeGroup rights');
     is_deeply($mech->json_response, ['Group enabled']);
 
+    $group2->Load( $group2->Id ); # Reload to refresh $group2->PrincipalObj
     is($group2->Disabled, 0, "Group enabled");
 }
 

commit bb0920da831d59be28bc3b29961fa8ed058e3960
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Oct 21 16:21:38 2022 +0800

    Skip rights checks for serializer/importer
    
    This commit short-circuits CurrentUserCanSee and CurrentUserHasRight
    accordingly.

diff --git a/sbin/rt-importer.in b/sbin/rt-importer.in
index 221677962d..81dda8b8b9 100644
--- a/sbin/rt-importer.in
+++ b/sbin/rt-importer.in
@@ -117,6 +117,18 @@ die "No such directory $dir\n" unless -d $dir;
 die "$dir doesn't appear to contain serialized data\n"
     unless -f "$dir/001.dat";
 
+for my $class ( grep m{^RT/\w+\.pm$}, keys %INC ) {
+    no warnings 'redefine';
+    no strict 'refs';
+    $class =~ s!\/!::!;
+    $class =~ s!\.pm$!!;
+    for my $method (qw/CurrentUserCanSee CurrentUserHasRight/) {
+        next unless $class->can($method);
+        my $full_name = "${class}::$method";
+        *$full_name = sub {1};
+    }
+}
+
 if ($OPT{dump}) {
     die "Dumping objects only works in conjunction with --list\n"
         unless $OPT{list};
diff --git a/sbin/rt-serializer.in b/sbin/rt-serializer.in
index b71bd7d5a6..356a6f5def 100644
--- a/sbin/rt-serializer.in
+++ b/sbin/rt-serializer.in
@@ -78,6 +78,18 @@ use RT;
 RT::LoadConfig();
 RT::Init();
 
+for my $class ( grep m{^RT/\w+\.pm$}, keys %INC ) {
+    no warnings 'redefine';
+    no strict 'refs';
+    $class =~ s!\/!::!;
+    $class =~ s!\.pm$!!;
+    for my $method (qw/CurrentUserCanSee CurrentUserHasRight/) {
+        next unless $class->can($method);
+        my $full_name = "${class}::$method";
+        *$full_name = sub {1};
+    }
+}
+
 @RT::Record::ISA = qw( DBIx::SearchBuilder::Record RT::Base );
 
 use RT::Migrate;

commit 79c99012671b4ec4bf0c8ab70cd928c1d790438b
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Oct 21 04:51:56 2022 +0800

    Tweak UID generation code and also cache user UID for performance

diff --git a/lib/RT/Attribute.pm b/lib/RT/Attribute.pm
index aaa94b091f..767cc25c60 100644
--- a/lib/RT/Attribute.pm
+++ b/lib/RT/Attribute.pm
@@ -1069,18 +1069,13 @@ sub Serialize {
         my $content = $self->_DeserializeContent($store{Content});
         for my $pane (values %{ $content || {} }) {
             for (@$pane) {
-                my $attr = RT::Attribute->new($self->CurrentUser);
-                $attr->LoadById($_);
-                $_ = \($attr->UID);
+                $_ = \( join '-', 'RT::Attribute', $RT::Organization, $_ );
             }
         }
         $store{Content} = $self->_SerializeContent($content);
     }
     elsif ( $store{Name} =~ /DefaultDashboard$/ ) {
-        my $content = $store{Content};
-        my $attr    = RT::Attribute->new( $self->CurrentUser );
-        $attr->LoadById($content);
-        $store{Content} = \$attr->UID;
+        $store{Content} = \( join '-', 'RT::Attribute', $RT::Organization, $store{Content} );
     }
     # encode saved searches and dashboards to be UIDs
     elsif ($store{Name} eq 'Dashboard') {
@@ -1088,9 +1083,7 @@ sub Serialize {
         for my $pane (values %{ $content->{Panes} || {} }) {
             for (@$pane) {
                 if ($_->{portlet_type} eq 'search' || $_->{portlet_type} eq 'dashboard') {
-                    my $attr = RT::Attribute->new($self->CurrentUser);
-                    $attr->LoadById($_->{id});
-                    $_->{uid} = \($attr->UID);
+                    $_->{uid} = \( join '-', 'RT::Attribute', $RT::Organization, $_->{id} );
                 }
                 # pass through everything else (e.g. component)
             }
@@ -1100,9 +1093,7 @@ sub Serialize {
     # encode subscriptions to have dashboard UID
     elsif ($store{Name} eq 'Subscription') {
         my $content = $self->_DeserializeContent($store{Content});
-        my $attr = RT::Attribute->new($self->CurrentUser);
-        $attr->LoadById($content->{DashboardId});
-        $content->{DashboardId} = \($attr->UID);
+        $content->{DashboardId} = \( join '-', 'RT::Attribute', $RT::Organization, $content->{DashboardId} );
         $store{Content} = $self->_SerializeContent($content);
     }
 
diff --git a/lib/RT/Group.pm b/lib/RT/Group.pm
index 23ca12f092..575d2285e9 100644
--- a/lib/RT/Group.pm
+++ b/lib/RT/Group.pm
@@ -1355,6 +1355,15 @@ sub PrincipalId {
 sub InstanceObj {
     my $self = shift;
 
+    my $class = $self->InstanceClass or return;
+
+    my $obj = $class->new( $self->CurrentUser );
+    $obj->Load( $self->Instance );
+    return $obj;
+}
+
+sub InstanceClass {
+    my $self = shift;
     my $class;
     if ( $self->Domain eq 'ACLEquivalence' ) {
         $class = "RT::User";
@@ -1365,12 +1374,7 @@ sub InstanceObj {
     } elsif ($self->Domain eq 'RT::Asset-Role') {
         $class = "RT::Asset";
     }
-
-    return unless $class;
-
-    my $obj = $class->new( $self->CurrentUser );
-    $obj->Load( $self->Instance );
-    return $obj;
+    return $class;
 }
 
 sub BasicColumns {
@@ -1670,8 +1674,18 @@ sub Serialize {
     my %args = (@_);
     my %store = $self->SUPER::Serialize(@_);
 
-    my $instance = $self->InstanceObj;
-    $store{Instance} = \($instance->UID) if $instance;
+    if ( my $class = $self->InstanceClass ) {
+        if ( $class->can('UID') eq RT::Record->can('UID') ) {
+            $store{Instance} = \( join '-', $class, $RT::Organization, $store{Instance} );
+        }
+        elsif ( $class eq 'RT::User' ) {
+            $store{Instance} = \$args{serializer}{_uid}{user}{ $store{Instance} };
+        }
+        else {
+            my $instance = $self->InstanceObj;
+            $store{Instance} = \($instance->UID);
+        }
+    }
 
     $store{Disabled} = $self->PrincipalObj->Disabled;
     $store{Principal} = $self->PrincipalObj->UID;
diff --git a/lib/RT/Record.pm b/lib/RT/Record.pm
index 7828634bf0..32a5e24d6a 100644
--- a/lib/RT/Record.pm
+++ b/lib/RT/Record.pm
@@ -69,6 +69,7 @@ use warnings;
 use RT;
 use base RT->Config->Get('RecordBaseClass');
 use base 'RT::Base';
+use v5.10;
 
 require RT::Date;
 require RT::User;
@@ -2783,6 +2784,15 @@ sub CustomDateRangeFields {
     return sort { lc $a cmp lc $b } @fields;
 }
 
+=head2 UID
+
+UID used by shredder/seralizer to identify the record. The format is
+"$Class-$Organization-$Id", e.g.
+
+    "RT::Ticket-example.com-20"
+
+=cut
+
 sub UID {
     my $self = shift;
     return undef unless defined $self->Id;
@@ -2890,8 +2900,19 @@ sub Serialize {
     }
     return %store unless $args{UIDs};
 
+    state %simple_uid_class;
+
     # Use FooObj to turn Foo into a reference to the UID
     for my $col ( grep {$store{$_}} @cols ) {
+        next if $col =~ /^(?:Created|LastUpdated)$/;;
+
+        if ( $self->isa('RT::Ticket') ) {
+            next if $col =~ /^(?:Due|Resolved|Starts|Started|Told)$/;
+        }
+        elsif ( $self->isa('RT::Group') ) {
+            next if $col eq 'Instance';
+        }
+
         my $method = $methods{$col};
         if (not $method) {
             $method = $col;
@@ -2899,15 +2920,44 @@ sub Serialize {
         }
         next unless $self->can($method);
 
-        my $obj = $self->$method;
-        next unless $obj and $obj->isa("RT::Record");
-        $store{$col} = \($obj->UID);
+        my $uid;
+
+        my $value = $self->$col;
+        if ( $simple_uid_class{ ref $self }{$col} && $value =~ /^\d+$/ && $value > 0 ) {
+
+            # UID is based on id, so we can generate UID directly
+            $uid = join '-', $simple_uid_class{ ref $self }{$col}, $RT::Organization, $value;
+        }
+        elsif ( $method =~ /^(?:Creator|LastUpdatedBy)Obj$/ || ( $self->isa('RT::Ticket') && $method eq 'OwnerObj' ) ) {
+            $uid = $args{serializer}{_uid}{user}{$value} if $value;
+        }
+
+        if (!$uid) {
+            my $obj = $self->$method;
+            next unless $obj and $obj->isa("RT::Record");
+            $uid = $obj->UID;
+
+            if ( $obj->can('UID') eq RT::Record->can('UID') ) {
+                # Group Instance column points to various classes.
+                $simple_uid_class{ ref $self }{$col} = ref $obj;
+            }
+        }
+
+        $store{$col} = \$uid if $uid;
     }
 
     # Anything on an object should get the UID stored instead
     if ($store{ObjectType} and $store{ObjectId} and $self->can("Object")) {
+        if ( $store{ObjectType}->can('UID') eq RT::Record->can('UID') ) {
+            $store{Object} = \( join '-', $store{ObjectType}, $RT::Organization, $store{ObjectId} );
+        }
+        elsif ( $store{ObjectType} eq 'RT::User' ) {
+            $store{Object} = \( $args{serializer}{uid}{user}{ $store{ObjectId} } ||= $self->Object->UID );
+        }
+        else {
+            $store{Object} = \( $self->Object->UID );
+        }
         delete $store{$_} for qw/ObjectType ObjectId/;
-        $store{Object} = \($self->Object->UID);
     }
 
     return %store;
diff --git a/lib/RT/Ticket.pm b/lib/RT/Ticket.pm
index 741d2e3007..f9afb4f140 100644
--- a/lib/RT/Ticket.pm
+++ b/lib/RT/Ticket.pm
@@ -3784,9 +3784,7 @@ sub Serialize {
     my %args = (@_);
     my %store = $self->SUPER::Serialize(@_);
 
-    my $obj = RT::Ticket->new( RT->SystemUser );
-    $obj->Load( $store{EffectiveId} );
-    $store{EffectiveId} = \($obj->UID);
+    $store{EffectiveId} = \( join '-', 'RT::Ticket', $RT::Organization, $store{EffectiveId} );
 
     return %store;
 }
diff --git a/lib/RT/Transaction.pm b/lib/RT/Transaction.pm
index 12ea05949c..1d42265b63 100644
--- a/lib/RT/Transaction.pm
+++ b/lib/RT/Transaction.pm
@@ -2284,27 +2284,21 @@ sub Serialize {
 
     my $type = $store{Type};
     if ($type eq "CustomField") {
-        my $cf = RT::CustomField->new( RT->SystemUser );
-        $cf->Load( $store{Field} );
-        $store{Field} = \($cf->UID);
-
-        $store{OldReference} = \($self->OldReferenceObject->UID) if $self->OldReference;
-        $store{NewReference} = \($self->NewReferenceObject->UID) if $self->NewReference;
-    } elsif ($type =~ /^(Take|Untake|Force|Steal|Give|SetWatcher)$/
-            || ($type eq 'Set' && $store{Field} eq 'Owner')) {
+        $store{Field} = \( join '-', 'RT::CustomField', $RT::Organization, $store{Field} );
+
+        $store{OldReference} = \( join '-', $store{ReferenceType}, $RT::Organization, $store{OldReference} )
+            if $store{OldReference};
+        $store{NewReference} = \( join '-', $store{ReferenceType}, $RT::Organization, $store{NewReference} )
+            if $store{NewReference};
+   }
+   elsif ($type =~ /^(Take|Untake|Force|Steal|Give|SetWatcher)$/) {
         for my $field (qw/OldValue NewValue/) {
-            my $user = RT::User->new( RT->SystemUser );
-            $user->Load( $store{$field} );
-            $store{$field} = \($user->UID);
+            $store{$field} = \$args{serializer}{_uid}{user}{ $store{$field} };
         }
     } elsif ($type eq "DelWatcher") {
-        my $principal = RT::Principal->new( RT->SystemUser );
-        $principal->Load( $store{OldValue} );
-        $store{OldValue} = \($principal->UID);
+        $store{OldValue} = \( join '-', 'RT::Principal', $RT::Organization, $store{OldValue} );
     } elsif ($type eq "AddWatcher") {
-        my $principal = RT::Principal->new( RT->SystemUser );
-        $principal->Load( $store{NewValue} );
-        $store{NewValue} = \($principal->UID);
+        $store{NewValue} = \( join '-', 'RT::Principal', $RT::Organization, $store{NewValue} );
     } elsif ($type eq "DeleteLink") {
         if ($store{OldValue}) {
             my $base = RT::URI->new( $self->CurrentUser );
@@ -2364,9 +2358,7 @@ sub Serialize {
             }
         }
     } elsif ($type =~ /^(Add|Open|Resolve)Reminder$/) {
-        my $ticket = RT::Ticket->new( RT->SystemUser );
-        $ticket->Load( $store{NewValue} );
-        $store{NewValue} = \($ticket->UID);
+        $store{NewValue} = \( join '-', 'RT::Ticket', $RT::Organization, $store{NewValue} );
     }
 
     return %store;
diff --git a/lib/RT/User.pm b/lib/RT/User.pm
index 38aa3a670e..4b46910baf 100644
--- a/lib/RT/User.pm
+++ b/lib/RT/User.pm
@@ -3062,6 +3062,9 @@ sub BeforeWipeout {
 
 sub Serialize {
     my $self = shift;
+    my %args = @_;
+    $args{serializer}{_uid}{user}{ $self->Id } = $self->UID;
+
     return (
         Disabled => $self->PrincipalObj->Disabled,
         Principal => $self->PrincipalObj->UID,

commit 56b12116763a73e09da779b75770316ddf284963
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Thu Oct 20 23:03:39 2022 +0800

    Add --all to serializer to export all data with UIDs and not check dependencies
    
    This is faster than walking through dependencies.
    
    The collections are reordered so most dependencies can be serialized
    first, which reduces the importer work to resolve references.

diff --git a/lib/RT/Group.pm b/lib/RT/Group.pm
index bb66f3f140..23ca12f092 100644
--- a/lib/RT/Group.pm
+++ b/lib/RT/Group.pm
@@ -1742,13 +1742,20 @@ sub PreInflate {
         return $duplicated->() if $obj->Id;
     }
 
-    my $id = $importer->NextPrincipalId( PrincipalType => 'Group', Disabled => $disabled );
+    # serialized data with --all has principals, in which case we don't need to create a new one.
+    my $principal_obj = $importer->LookupObj( $principal_uid );
+    if ( $principal_obj ) {
+        $data->{id} = $principal_obj->Id;
+    }
+    else {
+        my $id = $importer->NextPrincipalId( PrincipalType => 'Group', Disabled => $disabled );
 
-    # Now we have a principal id, set the id for the group record
-    $data->{id} = $id;
+        # Now we have a principal id, set the id for the group record
+        $data->{id} = $id;
+
+        $importer->Resolve( $principal_uid => 'RT::Principal', $id );
+    }
 
-    $importer->Resolve( $principal_uid => 'RT::Principal', $id );
-    $data->{id} = $id;
 
     return 1;
 }
diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index cee32dc547..6c79a4f9d2 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -132,6 +132,8 @@ sub LoadMetadata {
     $self->{Organization} = $data->{Organization};
     $self->{Clone}        = $data->{Clone};
     $self->{Incremental}  = $data->{Incremental};
+    $self->{All}          = $data->{All};
+
     $self->{Files}        = $data->{Files} if $data->{Final};
 }
 
diff --git a/lib/RT/Migrate/Serializer.pm b/lib/RT/Migrate/Serializer.pm
index 4c7dfba251..c300f7836a 100644
--- a/lib/RT/Migrate/Serializer.pm
+++ b/lib/RT/Migrate/Serializer.pm
@@ -76,6 +76,7 @@ sub Init {
 
         Clone       => 0,
         Incremental => 0,
+        All         => 0,
 
         Verbose => 1,
         @_,
@@ -99,6 +100,7 @@ sub Init {
                   HyperlinkUnmigrated
                   Clone
                   Incremental
+                  All
               /;
 
     $self->{Clone} = 1 if $self->{Incremental};
@@ -108,7 +110,7 @@ sub Init {
     # Keep track of the number of each type of object written out
     $self->{ObjectCount} = {};
 
-    if ($self->{Clone}) {
+    if ($self->{Clone} || $self->{All}) {
         $self->PushAll;
     } else {
         $self->PushBasics;
@@ -132,6 +134,7 @@ sub Metadata {
         Organization => $RT::Organization,
         Clone        => $self->{Clone},
         Incremental  => $self->{Incremental},
+        All          => $self->{All},
         ObjectCount  => { $self->ObjectCount },
         @_,
     },
@@ -159,13 +162,32 @@ sub PushAll {
     # Principals first; while we don't serialize these separately during
     # normal dependency walking (we fold them into users and groups),
     # having them separate during cloning makes logic simpler.
-    $self->PushCollections(qw(Principals));
+    $self->PushCollections(qw(Principals)) if $self->{Clone};
 
-    # Users and groups
-    $self->PushCollections(qw(Users Groups GroupMembers));
+    # Users
+    $self->PushCollections(qw(Users));
+
+    # groups
+    if ( $self->{Clone} ) {
+        $self->PushCollections(qw(Groups));
+    }
+    else {
+        my $groups = RT::Groups->new(RT->SystemUser);
+        $groups->FindAllRows if $self->{FollowDisabled};
+        $groups->CleanSlate;
+        $groups->UnLimit;
+        $groups->Limit(
+            FIELD         => 'Domain',
+            VALUE         => [ 'RT::Queue-Role', 'RT::Ticket-Role', 'RT::Catalog-Role', 'RT::Asset-Role' ],
+            OPERATOR      => 'NOT IN',
+            CASESENSITIVE => 0,
+        );
+        $groups->OrderBy( FIELD => 'id' );
+        $self->PushObj($groups);
+    }
 
     # Tickets
-    $self->PushCollections(qw(Queues Tickets Transactions Attachments Links));
+    $self->PushCollections(qw(Queues Tickets));
 
     # Articles
     $self->PushCollections(qw(Articles), map { ($_, "Object$_") } qw(Classes Topics));
@@ -176,6 +198,23 @@ sub PushAll {
     # Assets
     $self->PushCollections(qw(Catalogs Assets));
 
+    if ( !$self->{Clone} ) {
+        my $groups = RT::Groups->new( RT->SystemUser );
+        $groups->FindAllRows if $self->{FollowDisabled};
+        $groups->CleanSlate;
+        $groups->UnLimit;
+        $groups->Limit(
+            FIELD         => 'Domain',
+            VALUE         => [ 'RT::Queue-Role', 'RT::Ticket-Role', 'RT::Catalog-Role', 'RT::Asset-Role' ],
+            OPERATOR      => 'IN',
+            CASESENSITIVE => 0,
+        );
+        $groups->OrderBy( FIELD => 'id' );
+        $self->PushObj($groups);
+    }
+
+    $self->PushCollections(qw(GroupMembers));
+
     # Custom Fields
     if (RT::StaticUtil::RequireModule("RT::ObjectCustomFields")) {
         $self->PushCollections(map { ($_, "Object$_") } qw(CustomFields CustomFieldValues));
@@ -187,13 +226,16 @@ sub PushAll {
     $self->PushCollections(qw(ACL));
 
     # Scrips
-    $self->PushCollections(qw(Scrips ObjectScrips ScripActions ScripConditions Templates));
+    $self->PushCollections(qw(ScripActions ScripConditions Templates Scrips ObjectScrips));
 
     # Attributes
     $self->PushCollections(qw(Attributes));
 
     # Shorteners
     $self->PushCollections(qw(Shorteners));
+
+    $self->PushCollections(qw(Links));
+    $self->PushCollections(qw(Transactions Attachments));
 }
 
 sub PushCollections {
@@ -369,7 +411,7 @@ sub NextPage {
 
     $last ||= 0;
 
-    if ($self->{Clone}) {
+    if ($self->{Clone} || $self->{All}) {
         # Clone provides guaranteed ordering by id and with no other id limits
         # worry about trampling
 
@@ -399,7 +441,7 @@ sub Process {
 
     # Skip all dependency walking if we're cloning; go straight to
     # visiting them.
-    if ($self->{Clone} and $uid) {
+    if ( ($self->{Clone} || $self->{All}) and $uid) {
         return if $obj->isa("RT::System");
         $self->{progress}->($obj) if $self->{progress};
         return $self->Visit(%args);
@@ -533,11 +575,17 @@ sub Visit {
             undef,
             \%data,
         );
-    } elsif ($self->{Clone}) {
+    } elsif ($self->{Clone} || $self->{All}) {
         # Short-circuit and get Just The Basics, Sir if we're cloning
         my $class = ref($obj);
         my $uid   = $obj->UID;
-        my %data  = $obj->RT::Record::Serialize( UIDs => 0 );
+        my %data;
+        if ( $self->{Clone} ) {
+            %data = $obj->RT::Record::Serialize( serializer => $self, UIDs => 0 );
+        }
+        else {
+            %data = $obj->Serialize( serializer => $self, UIDs => 1 );
+        }
 
         # +class is used when seeing a record of one class might insert
         # a separate record into the stream
diff --git a/lib/RT/User.pm b/lib/RT/User.pm
index 753be40fdd..38aa3a670e 100644
--- a/lib/RT/User.pm
+++ b/lib/RT/User.pm
@@ -3102,14 +3102,21 @@ sub PreInflate {
         return;
     }
 
-    # Create a principal first, so we know what ID to use
-    my $id = $importer->NextPrincipalId( PrincipalType => 'User', Disabled => $disabled );
+    # serialized data with --all has principals, in which case we don't need to create a new one.
+    my $principal_obj = $importer->LookupObj( $principal_uid );
+    if ( $principal_obj ) {
+        $data->{id} = $principal_obj->Id;
+    }
+    else {
 
-    # Now we have a principal id, set the id for the user record
-    $data->{id} = $id;
+        # Create a principal first, so we know what ID to use
+        my $id = $importer->NextPrincipalId( PrincipalType => 'User', Disabled => $disabled );
 
-    $importer->Resolve( $principal_uid => 'RT::Principal', $id );
-    $data->{id} = $id;
+        # Now we have a principal id, set the id for the user record
+        $data->{id} = $id;
+
+        $importer->Resolve( $principal_uid => 'RT::Principal', $id );
+    }
 
     return $class->SUPER::PreInflate( $importer, $uid, $data );
 }
diff --git a/sbin/rt-serializer.in b/sbin/rt-serializer.in
index 52b2761d50..b71bd7d5a6 100644
--- a/sbin/rt-serializer.in
+++ b/sbin/rt-serializer.in
@@ -113,6 +113,7 @@ GetOptions(
 
     "clone",
     "incremental",
+    "all",
 
     "gc=i",
     "page=i",
@@ -141,6 +142,7 @@ $args{FollowAssets} = $OPT{assets} if defined $OPT{assets};
 
 $args{Clone}         = $OPT{clone}       if $OPT{clone};
 $args{Incremental}   = $OPT{incremental} if $OPT{incremental};
+$args{All}           = $OPT{all}         if $OPT{all};
 
 $args{GC}   = defined $OPT{gc}   ? $OPT{gc}   : 5000;
 $args{Page} = defined $OPT{page} ? $OPT{page} : 100;
@@ -184,9 +186,9 @@ if ($OPT{'limit-cfs'}) {
     $args{CustomFields} = \@cf_ids;
 }
 
-if (($OPT{clone} or $OPT{incremental})
+if (($OPT{clone} or $OPT{incremental} or $OPT{all})
         and grep { /^(users|groups|deleted|disabled|scrips|tickets|transactions|acls|assets)$/ } keys %OPT) {
-    die "You cannot specify object types when cloning.\n\nPlease see $0 --help.\n";
+    die "You cannot specify object types when cloning or with --all.\n\nPlease see $0 --help.\n";
 }
 
 my $walker;
@@ -417,6 +419,15 @@ C<--clone> with any option that limits object types serialized.  No
 dependency walking is performed when cloning. C<rt-importer> will detect
 that your serialized data set was generated by a clone.
 
+=item B<--all>
+
+Serializes your entire database, creating a clone-like data. Both C<--all>
+and C<--clone> do not check dependencies, the difference is C<--all>
+generates UIDs: it means the ids in source instance do not necessirily
+be synced to target instance, which makes it quite useful to fully merge
+multiple RT instances. Use C<--clone> instead if you really want to keep
+ids in source instance.
+
 =item B<--incremental>
 
 Will generate an incremenal serialized dataset using the data stored in

commit 48a4036471d546e1c70f391e33acfada8c7a1718
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Tue Oct 18 06:16:40 2022 +0800

    Support to create principals in batch beforehand
    
    This reduces SQL calls a lot and thus improves importer performance.

diff --git a/lib/RT/Group.pm b/lib/RT/Group.pm
index 86ceaca0c6..bb66f3f140 100644
--- a/lib/RT/Group.pm
+++ b/lib/RT/Group.pm
@@ -1742,16 +1742,12 @@ sub PreInflate {
         return $duplicated->() if $obj->Id;
     }
 
-    my $principal = RT::Principal->new( RT->SystemUser );
-    my ($id) = $principal->Create(
-        PrincipalType => 'Group',
-        Disabled => $disabled,
-    );
+    my $id = $importer->NextPrincipalId( PrincipalType => 'Group', Disabled => $disabled );
 
     # Now we have a principal id, set the id for the group record
     $data->{id} = $id;
 
-    $importer->Resolve( $principal_uid => ref($principal), $id );
+    $importer->Resolve( $principal_uid => 'RT::Principal', $id );
     $data->{id} = $id;
 
     return 1;
diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index 4e6d3a73e7..cee32dc547 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -72,6 +72,8 @@ sub Init {
         HandleError         => undef,
         ExcludeOrganization => undef,
         AutoCommit          => 1,
+        BatchUserPrincipals  => 0,
+        BatchGroupPrincipals => 0,
         @_,
     );
 
@@ -84,6 +86,8 @@ sub Init {
 
     $self->{AutoCommit} = $args{AutoCommit};
 
+    $self->{$_} = $args{$_} for qw/BatchUserPrincipals BatchGroupPrincipals/;
+
     $self->{HandleError} = sub { 0 };
     $self->{HandleError} = $args{HandleError}
         if $args{HandleError} and ref $args{HandleError} eq 'CODE';
@@ -161,6 +165,58 @@ sub InitStream {
             );
         }
     }
+
+    if ( !$self->{Clone} ) {
+        for my $type ( qw/User Group/ ) {
+            if ( my $count = $self->{"Batch${type}Principals"} ) {
+                my $principal = RT::Principal->new( RT->SystemUser );
+                my ($id)      = $principal->Create( PrincipalType => $type, Disabled => 0 );
+
+                my $left = $count - 1; # already created one
+
+                # Insert 100k each time, to avoid too much memory assumption.
+                while ( $left > 0 ) {
+                    if ( $left > 100_000 ) {
+                        my $sql = 'INSERT INTO Principals (PrincipalType, Disabled) VALUES ' . join ',',
+                            ("('$type', 0)") x ( 100_000 );
+                        $self->RunSQL($sql);
+                        $left -= 100_000;
+                    }
+                    else {
+                        my $sql = 'INSERT INTO Principals (PrincipalType, Disabled) VALUES ' . join ',',
+                            ("('$type', 0)") x $left;
+                        $self->RunSQL($sql);
+                        last;
+                    }
+                }
+
+                push @{ $self->{_principals}{$type} }, $id .. ( $count - 1 + $id );
+            }
+        }
+    }
+}
+
+sub NextPrincipalId {
+    my $self = shift;
+    my %args = @_;
+    my $id;
+    if ( $args{PrincipalType} eq 'User' ) {
+        $id = shift @{$self->{_principals}{User} || []};
+    }
+    else {
+        $id = shift @{$self->{_principals}{Group} || []};
+    }
+
+    if ( !$id ) {
+        my $principal = RT::Principal->new( RT->SystemUser );
+        ($id) = $principal->Create(%args);
+    }
+
+    if ( $args{Disabled} ) {
+        $self->RunSQL("UPDATE Principals SET Disabled=1 WHERE id=$id");
+    }
+
+    return $id;
 }
 
 sub Resolve {
diff --git a/lib/RT/User.pm b/lib/RT/User.pm
index b1a8cb911b..753be40fdd 100644
--- a/lib/RT/User.pm
+++ b/lib/RT/User.pm
@@ -3103,16 +3103,12 @@ sub PreInflate {
     }
 
     # Create a principal first, so we know what ID to use
-    my $principal = RT::Principal->new( RT->SystemUser );
-    my ($id) = $principal->Create(
-        PrincipalType => 'User',
-        Disabled => $disabled,
-    );
+    my $id = $importer->NextPrincipalId( PrincipalType => 'User', Disabled => $disabled );
 
     # Now we have a principal id, set the id for the user record
     $data->{id} = $id;
 
-    $importer->Resolve( $principal_uid => ref($principal), $id );
+    $importer->Resolve( $principal_uid => 'RT::Principal', $id );
     $data->{id} = $id;
 
     return $class->SUPER::PreInflate( $importer, $uid, $data );
diff --git a/sbin/rt-importer.in b/sbin/rt-importer.in
index 306c4428fc..221677962d 100644
--- a/sbin/rt-importer.in
+++ b/sbin/rt-importer.in
@@ -102,6 +102,9 @@ GetOptions(
 
     "auto-commit!",
 
+    "batch-user-principals=i",
+    "batch-group-principals=i",
+
     "dump=s@",
 ) or Pod::Usage::pod2usage();
 
@@ -151,6 +154,8 @@ my $import = RT::Migrate::Importer::File->new(
     DumpObjects         => $OPT{dump},
     Resume              => $OPT{resume},
     AutoCommit          => $OPT{'auto-commit'},
+    BatchUserPrincipals => $OPT{'batch-user-principals'},
+    BatchGroupPrincipals => $OPT{'batch-group-principals'},
     HandleError         => $error_handler,
 );
 
@@ -293,6 +298,13 @@ Works only in conjunction with C<--list>.
 Don't auto commit to database. When this flag is used, it will commit only
 once for each data file.  This could boost performance in some cases.
 
+=item B<--batch-user-pricipals> I<NUMBER>
+=item B<--batch-group-pricipals> I<NUMBER>
+
+The number of user/group principals to create in batch beforehand. Default is 0.
+This is to improve performance for not-cloned serialized data of big instances,
+usually you don't need to specify this.
+
 =back
 
 

commit de8dc8a692b4f195a5618b70c492cb278ab39a5a
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Oct 14 13:09:53 2022 +0800

    No need to convert ascii strings
    
    By avoiding unnecessary actions like this, importer runs faster with Pg.

diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index ac7baf18e7..4e6d3a73e7 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -340,7 +340,7 @@ sub Create {
     # could be be wrongly encoded on Pg.
     if ( RT->Config->Get( 'DatabaseType' ) eq 'Pg' ) {
         for my $field ( keys %$data ) {
-            if ( $data->{$field} && !utf8::is_utf8( $data->{$field} ) ) {
+            if ( $data->{$field} && $data->{$field} =~ /[^\x00-\x7F]/ && !utf8::is_utf8( $data->{$field} ) ) {
 
                 # Make sure decoded data is valid UTF-8, otherwise Pg won't insert
                 my $decoded;

commit 6261aa0d4758c773954b2525d6c08513454a0f66
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Oct 14 05:02:16 2022 +0800

    Reduce unnecessary Load calls after creation for performance
    
    Now we only load objects if needed, like to run PostInflate, etc.

diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index 379b8f728c..ac7baf18e7 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -379,10 +379,16 @@ sub Create {
     $self->{ObjectCount}{$class}++;
     $self->Resolve( $uid => $class, $id );
 
-    # Load it back to get real values into the columns
-    $obj = $class->new( RT->SystemUser );
-    $obj->Load( $id );
-    $obj->PostInflate( $self, $uid );
+    # RT::User::PostInflate is just to call InitSystemObjects for RT_System user.
+    # Here we treat it specially to avoid loading other user objects unnecessarily for performance.
+    if ( $class eq 'RT::User' ) {
+        RT->InitSystemObjects if $data->{Name} eq 'RT_System';
+    }
+    elsif ( $class->can('PostInflate') ne RT::Record->can('PostInflate') ) {
+        # Load it back to get real values into the columns
+        $obj->Load( $id );
+        $obj->PostInflate( $self, $uid );
+    }
 
     return $obj;
 }
@@ -453,6 +459,7 @@ sub ReadStream {
                   ? $origid
                   : $self->Organization . ":$origid";
 
+        $obj->Load( $self->Lookup($uid)->[1] );
         my ($id, $msg) = $obj->AddCustomFieldValue(
             Field             => $self->{OriginalId},
             Value             => $value,
@@ -467,7 +474,7 @@ sub ReadStream {
     # inspection
     push @{$self->{NewCFs}}, $uid
         if $class eq "RT::CustomField"
-            and $obj->LookupType =~ /^RT::Queue/;
+            and $data->{LookupType} =~ /^RT::Queue/; # Use $data in case $obj is not fully loaded.
 
     $self->{Progress}->($obj) if $self->{Progress};
 }

commit 304fd96a812703fc66be6406821bff2c8bf771ab
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Thu Oct 13 20:45:31 2022 +0800

    Fill up CachedGroupMembers at the end of importer for performance
    
    Previously we created corresponding CachedGroupMember rows on every
    Group/GroupMember create, which was quite slow. By doing it via plain
    SQL at the end, it's astonishingly faster than before. E.g. for
    CachedGroupMembers with 100k rows, now it could be done in seconds
    (prevously it was in minutes!)

diff --git a/lib/RT/Group.pm b/lib/RT/Group.pm
index ccc92fe7c1..86ceaca0c6 100644
--- a/lib/RT/Group.pm
+++ b/lib/RT/Group.pm
@@ -1757,17 +1757,6 @@ sub PreInflate {
     return 1;
 }
 
-sub PostInflate {
-    my $self = shift;
-
-    my $cgm = RT::CachedGroupMember->new($self->CurrentUser);
-    $cgm->Create(
-        Group  => $self->PrincipalObj,
-        Member => $self->PrincipalObj,
-        ImmediateParent => $self->PrincipalObj
-    );
-}
-
 # If this group represents the members of a custom role, then return
 # the RT::CustomRole object. Otherwise, return undef
 sub _CustomRoleObj {
diff --git a/lib/RT/GroupMember.pm b/lib/RT/GroupMember.pm
index cd9a47a200..41b083561b 100644
--- a/lib/RT/GroupMember.pm
+++ b/lib/RT/GroupMember.pm
@@ -603,12 +603,6 @@ sub PreInflate {
     return 1;
 }
 
-sub PostInflate {
-    my $self = shift;
-
-    $self->_InsertCGM;
-}
-
 RT::Base->_ImportOverlays();
 
 1;
diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index 25d911562d..379b8f728c 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -477,6 +477,66 @@ sub CloseStream {
 
     $self->{Progress}->(undef, 'force') if $self->{Progress};
 
+    # Fill CGM
+
+    # Groups
+    $self->RunSQL(<<'EOF');
+INSERT INTO CachedGroupMembers (GroupId, MemberId, Via, ImmediateParentId, Disabled)
+    SELECT Groups.id, Groups.id, 0, Groups.id, Principals.Disabled FROM Groups
+    LEFT JOIN Principals ON ( Groups.id = Principals.id )
+    LEFT JOIN CachedGroupMembers ON (
+        Groups.id = CachedGroupMembers.GroupId
+        AND CachedGroupMembers.GroupId = CachedGroupMembers.MemberId
+        AND CachedGroupMembers.GroupId = CachedGroupMembers.ImmediateParentId
+        )
+    WHERE CachedGroupMembers.id IS NULL
+EOF
+
+    # GroupMembers
+    $self->RunSQL(<<'EOF');
+INSERT INTO CachedGroupMembers (GroupId, MemberId, Via, ImmediateParentId, Disabled)
+    SELECT GroupMembers.GroupId, GroupMembers.MemberId, 0, GroupMembers.GroupId, Principals.Disabled FROM GroupMembers
+    LEFT JOIN Principals ON ( GroupMembers.GroupId = Principals.id )
+    LEFT JOIN CachedGroupMembers ON (
+        GroupMembers.GroupId = CachedGroupMembers.GroupId
+        AND GroupMembers.MemberId = CachedGroupMembers.MemberId
+        AND CachedGroupMembers.GroupId = CachedGroupMembers.ImmediateParentId
+    )
+    WHERE CachedGroupMembers.id IS NULL
+EOF
+
+    # Fixup Via
+    $self->RunSQL(<<'EOF');
+UPDATE CachedGroupMembers SET Via=id WHERE Via=0
+EOF
+
+    # Cascaded GroupMembers, use the same SQL in rt-validator
+    my $cascaded_cgm = <<'EOF';
+INSERT INTO CachedGroupMembers (GroupId, MemberId, Via, ImmediateParentId, Disabled)
+SELECT cgm1.GroupId, gm2.MemberId, cgm1.id AS Via,
+    cgm1.MemberId AS ImmediateParentId, cgm1.Disabled
+FROM
+    CachedGroupMembers cgm1
+    CROSS JOIN GroupMembers gm2
+    LEFT JOIN CachedGroupMembers cgm3 ON (
+            cgm3.GroupId           = cgm1.GroupId
+        AND cgm3.MemberId          = gm2.MemberId
+        AND cgm3.Via               = cgm1.id
+        AND cgm3.ImmediateParentId = cgm1.MemberId )
+    LEFT JOIN Groups g ON (
+        cgm1.GroupId = g.id
+    )
+WHERE cgm1.GroupId != cgm1.MemberId
+AND gm2.GroupId = cgm1.MemberId
+AND cgm3.id IS NULL
+AND g.Domain != 'RT::Ticket-Role'
+EOF
+    # Do this multiple times if needed to fill up cascaded group members
+    while ( my $rv = $self->RunSQL($cascaded_cgm) ) {
+        # $rv could be 0E0 that is true in bool context but 0 in numeric comparison.
+        last unless $rv > 0;
+    }
+
     return if $self->{Clone};
 
     # Take global CFs which we made and make them un-global

commit 13501db85ca738eb7cbce3b5861c5063698172cc
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Sun Oct 23 02:06:34 2022 +0800

    Wrap raw "do" SQL into eval to show more error details
    
    Like existing wrappers for Create calls, it gives HandleError a chance
    to rescue when possible.

diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index 9989a12f49..25d911562d 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -519,4 +519,23 @@ sub Progress {
     return $self->{Progress} = $_[0];
 }
 
+sub RunSQL {
+    my $self = shift;
+    my $rv;
+    eval {
+        local $SIG{__DIE__};
+        $rv = $RT::Handle->dbh->do(@_);
+    };
+    if ($@) {
+        my $err = "Failed to run @_: $@\n";
+        if ( not $self->{HandleError}->( $self, $err ) ) {
+            die $err;
+        }
+        else {
+            return undef;
+        }
+    }
+    return $rv;
+}
+
 1;

-----------------------------------------------------------------------


hooks/post-receive
-- 
rt


More information about the rt-commit mailing list