[Rt-commit] rt branch 5.0/speed-up-importer created. rt-5.0.3-130-gf899dd6adc

BPS Git Server git at git.bestpractical.com
Tue Oct 18 17:40:27 UTC 2022


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "rt".

The branch, 5.0/speed-up-importer has been created
        at  f899dd6adc0a4fcb7d3557b4ac59be493fe867d0 (commit)

- Log -----------------------------------------------------------------
commit f899dd6adc0a4fcb7d3557b4ac59be493fe867d0
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Tue Oct 18 20:21:47 2022 +0800

    Wrap raw "do" SQL into eval to show more error details
    
    Like existing wrappers for Create calls, it also gives HandleError a
    chance to rescue when possible.

diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index 88d6a2108b..36c91eac12 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -184,13 +184,13 @@ sub InitStream {
                     if ( $left > 100_000 ) {
                         my $sql = 'INSERT INTO Principals (PrincipalType, Disabled) VALUES ' . join ',',
                             ("('$type', 0)") x ( 100_000 );
-                        $RT::Handle->dbh->do($sql);
+                        $self->RunSQL($sql);
                         $left -= 100_000;
                     }
                     else {
                         my $sql = 'INSERT INTO Principals (PrincipalType, Disabled) VALUES ' . join ',',
                             ("('$type', 0)") x $left;
-                        $RT::Handle->dbh->do($sql);
+                        $self->RunSQL($sql);
                         last;
                     }
                 }
@@ -213,7 +213,7 @@ sub NextPrincipalId {
     }
 
     if ( $args{Disabled} ) {
-        $RT::Handle->dbh->do("UPDATE Principals SET Disabled=1 WHERE id=$id");
+        $self->RunSQL("UPDATE Principals SET Disabled=1 WHERE id=$id");
     }
 
     if ( !$id ) {
@@ -640,10 +640,9 @@ sub CloseStream {
     }
 
     # Fill CGM
-    my $dbh = $RT::Handle->dbh;
 
     # Groups
-    $dbh->do(<<'EOF');
+    $self->RunSQL(<<'EOF');
 INSERT INTO CachedGroupMembers (GroupId, MemberId, Via, ImmediateParentId, Disabled)
     SELECT Groups.id, Groups.id, 0, Groups.id, Principals.Disabled FROM Groups
     LEFT JOIN Principals ON ( Groups.id = Principals.id )
@@ -656,7 +655,7 @@ INSERT INTO CachedGroupMembers (GroupId, MemberId, Via, ImmediateParentId, Disab
 EOF
 
     # GroupMembers
-    $dbh->do(<<'EOF');
+    $self->RunSQL(<<'EOF');
 INSERT INTO CachedGroupMembers (GroupId, MemberId, Via, ImmediateParentId, Disabled)
     SELECT GroupMembers.GroupId, GroupMembers.MemberId, 0, GroupMembers.GroupId, Principals.Disabled FROM GroupMembers
     LEFT JOIN Principals ON ( GroupMembers.GroupId = Principals.id )
@@ -669,7 +668,7 @@ INSERT INTO CachedGroupMembers (GroupId, MemberId, Via, ImmediateParentId, Disab
 EOF
 
     # Fixup Via
-    $dbh->do(<<'EOF');
+    $self->RunSQL(<<'EOF');
 UPDATE CachedGroupMembers SET Via=id WHERE Via=0
 EOF
 
@@ -695,7 +694,7 @@ AND cgm3.id IS NULL
 AND g.Domain != 'RT::Ticket-Role'
 EOF
     # Do this multiple times if needed to fill up cascaded group members
-    while ( my $rv = $dbh->do($cascaded_cgm) ) {
+    while ( my $rv = $self->RunSQL($cascaded_cgm) ) {
         # $rv could be 0E0 that is true in bool context but 0 in numeric comparison.
         last unless $rv > 0;
     }
@@ -775,19 +774,7 @@ sub BatchCreate {
             my $batch_sql
                 = $RT::Handle->FillIn( $sql . ( ", $values_paren" x ( $count - 1 ) ), [ map @$_, @{ $query{$sql} } ] );
 
-            eval {
-                local $SIG{__DIE__};
-                $dbh->do($batch_sql);
-            };
-            if ($@) {
-                my $err = "Failed to run $batch_sql: $@\n";
-                if ( not $self->{HandleError}->( $self, $err ) ) {
-                    die $err;
-                }
-                else {
-                    next;
-                }
-            }
+            $self->RunSQL($batch_sql);
         }
         return;
     }
@@ -819,4 +806,23 @@ sub BatchCreate {
     }
 }
 
+sub RunSQL {
+    my $self = shift;
+    my $rv;
+    eval {
+        local $SIG{__DIE__};
+        $rv = $RT::Handle->dbh->do(@_);
+    };
+    if ($@) {
+        my $err = "Failed to run @_: $@\n";
+        if ( not $self->{HandleError}->( $self, $err ) ) {
+            die $err;
+        }
+        else {
+            return undef;
+        }
+    }
+    return $rv;
+}
+
 1;

commit fac8b086e18496777b4378ce0e3f9c27a11334fe
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Tue Oct 18 06:16:40 2022 +0800

    Support to create principals in batch beforehand
    
    This reduces SQL calls a lot and thus improves importer performance.

diff --git a/lib/RT/Group.pm b/lib/RT/Group.pm
index 39f6de0a43..ee84a5fffb 100644
--- a/lib/RT/Group.pm
+++ b/lib/RT/Group.pm
@@ -1737,16 +1737,12 @@ sub PreInflate {
         return $duplicated->() if $obj->Id;
     }
 
-    my $principal = RT::Principal->new( RT->SystemUser );
-    my ($id) = $principal->Create(
-        PrincipalType => 'Group',
-        Disabled => $disabled,
-    );
+    my $id = $importer->NextPrincipalId( PrincipalType => 'Group', Disabled => $disabled );
 
     # Now we have a principal id, set the id for the group record
     $data->{id} = $id;
 
-    $importer->Resolve( $principal_uid => ref($principal), $id );
+    $importer->Resolve( $principal_uid => 'RT::Principal', $id );
     $data->{id} = $id;
 
     return 1;
diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index 3889b1ef7d..88d6a2108b 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -74,6 +74,8 @@ sub Init {
         AutoCommit          => 1,
         BatchSize           => 0,
         MaxProcesses        => 10,
+        BatchUserPrincipals  => 0,
+        BatchGroupPrincipals => 0,
         @_,
     );
 
@@ -89,6 +91,8 @@ sub Init {
     $self->{BatchSize}    = $args{BatchSize};
     $self->{MaxProcesses} = $args{MaxProcesses} || 10;
 
+    $self->{$_} = $args{$_} for qw/BatchUserPrincipals BatchGroupPrincipals/;
+
     $self->{HandleError} = sub { 0 };
     $self->{HandleError} = $args{HandleError}
         if $args{HandleError} and ref $args{HandleError} eq 'CODE';
@@ -166,6 +170,58 @@ sub InitStream {
             );
         }
     }
+
+    if ( !$self->{Clone} ) {
+        for my $type ( qw/User Group/ ) {
+            if ( my $count = $self->{"Batch${type}Principals"} ) {
+                my $principal = RT::Principal->new( RT->SystemUser );
+                my ($id)      = $principal->Create( PrincipalType => $type, Disabled => 0 );
+
+                my $left = $count - 1; # already created one
+
+                # Insert 100k each time, to avoid too much memory assumption.
+                while ( $left > 0 ) {
+                    if ( $left > 100_000 ) {
+                        my $sql = 'INSERT INTO Principals (PrincipalType, Disabled) VALUES ' . join ',',
+                            ("('$type', 0)") x ( 100_000 );
+                        $RT::Handle->dbh->do($sql);
+                        $left -= 100_000;
+                    }
+                    else {
+                        my $sql = 'INSERT INTO Principals (PrincipalType, Disabled) VALUES ' . join ',',
+                            ("('$type', 0)") x $left;
+                        $RT::Handle->dbh->do($sql);
+                        last;
+                    }
+                }
+
+                push @{ $self->{_principals}{$type} }, $id .. ( $count - 1 + $id );
+            }
+        }
+    }
+}
+
+sub NextPrincipalId {
+    my $self = shift;
+    my %args = @_;
+    my $id;
+    if ( $args{PrincipalType} eq 'User' ) {
+        $id = shift @{$self->{_principals}{User} || []};
+    }
+    else {
+        $id = shift @{$self->{_principals}{Group} || []};
+    }
+
+    if ( $args{Disabled} ) {
+        $RT::Handle->dbh->do("UPDATE Principals SET Disabled=1 WHERE id=$id");
+    }
+
+    if ( !$id ) {
+        my $principal = RT::Principal->new( RT->SystemUser );
+        ($id) = $principal->Create(%args);
+    }
+
+    return $id;
 }
 
 sub Resolve {
diff --git a/lib/RT/User.pm b/lib/RT/User.pm
index f7d28b09eb..76458baabc 100644
--- a/lib/RT/User.pm
+++ b/lib/RT/User.pm
@@ -3103,16 +3103,12 @@ sub PreInflate {
     }
 
     # Create a principal first, so we know what ID to use
-    my $principal = RT::Principal->new( RT->SystemUser );
-    my ($id) = $principal->Create(
-        PrincipalType => 'User',
-        Disabled => $disabled,
-    );
+    my $id = $importer->NextPrincipalId( PrincipalType => 'User', Disabled => $disabled );
 
     # Now we have a principal id, set the id for the user record
     $data->{id} = $id;
 
-    $importer->Resolve( $principal_uid => ref($principal), $id );
+    $importer->Resolve( $principal_uid => 'RT::Principal', $id );
     $data->{id} = $id;
 
     return $class->SUPER::PreInflate( $importer, $uid, $data );
diff --git a/sbin/rt-importer.in b/sbin/rt-importer.in
index f2411d657c..909ac6dc3d 100644
--- a/sbin/rt-importer.in
+++ b/sbin/rt-importer.in
@@ -104,6 +104,8 @@ GetOptions(
 
     "batch-size=i",
     "max-processes=i",
+    "batch-user-principals=i",
+    "batch-group-principals=i",
     "dump=s@",
 ) or Pod::Usage::pod2usage();
 
@@ -155,6 +157,8 @@ my $import = RT::Migrate::Importer::File->new(
     AutoCommit          => $OPT{'auto-commit'},
     BatchSize           => $OPT{'batch-size'},
     MaxProcesses        => $OPT{'max-processes'} || 10,
+    BatchUserPrincipals => $OPT{'batch-user-principals'},
+    BatchGroupPrincipals => $OPT{'batch-group-principals'},
     HandleError         => $error_handler,
 );
 
@@ -306,6 +310,13 @@ which means batch processing is not enabled.
 
 The number of max allowed child processes for batch processing. Default is 10.
 
+=item B<--batch-user-pricipals> I<NUMBER>
+=item B<--batch-group-pricipals> I<NUMBER>
+
+The number of user/group principals to create in batch beforehand. Default is 0.
+This is to improve performance for not-cloned serialized data of big instances,
+usually you don't need to specify this.
+
 =back
 
 

commit 3e33f138a92409be689e4b0f591f1cb3fbf9b69c
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Oct 14 13:09:53 2022 +0800

    No need to convert ascii strings
    
    By avoiding unnecessary actions like this, importer runs faster with Pg.

diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index 70c2c8f58b..3889b1ef7d 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -357,7 +357,7 @@ sub Create {
     # could be be wrongly encoded on Pg.
     if ( RT->Config->Get( 'DatabaseType' ) eq 'Pg' ) {
         for my $field ( keys %$data ) {
-            if ( $data->{$field} && !utf8::is_utf8( $data->{$field} ) ) {
+            if ( $data->{$field} && $data->{$field} =~ /[^\x00-\x7F]/ && !utf8::is_utf8( $data->{$field} ) ) {
 
                 # Make sure decoded data is valid UTF-8, otherwise Pg won't insert
                 my $decoded;

commit b644507476e29f9fb1a3f83d68943200471f53b0
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Fri Oct 14 05:02:16 2022 +0800

    Reduce unnecessary Load calls after creation for performance
    
    Now we only load objects if needed, like to run PostInflate, etc.

diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index 5520d0f289..70c2c8f58b 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -439,10 +439,14 @@ sub Create {
     $self->{ObjectCount}{$class}++;
     $self->Resolve( $uid => $class, $id );
 
-    # Load it back to get real values into the columns
-    $obj = $class->new( RT->SystemUser );
-    $obj->Load( $id );
-    $obj->PostInflate( $self, $uid );
+    # Attribute, Article and SystemUser have actions in PostInflate. CustomField is for NewCFs.
+    if ( $class =~ /^RT::(Attribute|Article|CustomField)$/ || ( $class eq 'RT::User' && $data->{Name} eq 'RT_System' ) )
+    {
+        # Load it back to get real values into the columns
+        $obj = $class->new( RT->SystemUser );
+        $obj->Load( $id );
+        $obj->PostInflate( $self, $uid );
+    }
 
     return $obj;
 }
@@ -513,6 +517,7 @@ sub ReadStream {
                   ? $origid
                   : $self->Organization . ":$origid";
 
+        $obj->Load( $self->Lookup($uid)->[1] );
         my ($id, $msg) = $obj->AddCustomFieldValue(
             Field             => $self->{OriginalId},
             Value             => $value,

commit 3673287d1b6887bcd2e7480eaf80e0022d173ef9
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Thu Oct 13 21:34:01 2022 +0800

    Add batch mode to importer for performance
    
    Via parallel processing and reduced SQL calls, the importer can run much
    faster now(10x+ and 3x+ for cloned and not-cloned data, respectively).

diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index 3ee478e0aa..5520d0f289 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -72,6 +72,8 @@ sub Init {
         HandleError         => undef,
         ExcludeOrganization => undef,
         AutoCommit          => 1,
+        BatchSize           => 0,
+        MaxProcesses        => 10,
         @_,
     );
 
@@ -84,6 +86,9 @@ sub Init {
 
     $self->{AutoCommit} = $args{AutoCommit};
 
+    $self->{BatchSize}    = $args{BatchSize};
+    $self->{MaxProcesses} = $args{MaxProcesses} || 10;
+
     $self->{HandleError} = sub { 0 };
     $self->{HandleError} = $args{HandleError}
         if $args{HandleError} and ref $args{HandleError} eq 'CODE';
@@ -177,27 +182,39 @@ sub Resolve {
 
     return unless $self->{Pending}{$uid};
 
+    my @left;
     for my $ref (@{$self->{Pending}{$uid}}) {
-        my ($pclass, $pid) = @{ $self->Lookup( $ref->{uid} ) };
-        my $obj = $pclass->new( RT->SystemUser );
-        $obj->LoadByCols( Id => $pid );
-        $obj->__Set(
-            Field => $ref->{column},
-            Value => $id,
-        ) if defined $ref->{column};
-        $obj->__Set(
-            Field => $ref->{classcolumn},
-            Value => $class,
-        ) if defined $ref->{classcolumn};
-        $obj->__Set(
-            Field => $ref->{uri},
-            Value => $self->LookupObj($uid)->URI,
-        ) if defined $ref->{uri};
-        if (my $method = $ref->{method}) {
-            $obj->$method($self, $ref, $class, $id);
+        if ( my $lookup = $self->Lookup( $ref->{uid} ) ) {
+            my ( $pclass, $pid ) = @{$lookup};
+            my $obj = $pclass->new( RT->SystemUser );
+            $obj->LoadByCols( Id => $pid );
+            $obj->__Set(
+                Field => $ref->{column},
+                Value => $id,
+            ) if defined $ref->{column};
+            $obj->__Set(
+                Field => $ref->{classcolumn},
+                Value => $class,
+            ) if defined $ref->{classcolumn};
+            $obj->__Set(
+                Field => $ref->{uri},
+                Value => $self->LookupObj($uid)->URI,
+            ) if defined $ref->{uri};
+            if ( my $method = $ref->{method} ) {
+                $obj->$method( $self, $ref, $class, $id );
+            }
         }
+        else {
+            push @left, $ref;
+        }
+    }
+
+    if ( @left ) {
+        $self->{Pending}{$uid} = \@left;
+    }
+    else {
+        delete $self->{Pending}{$uid};
     }
-    delete $self->{Pending}{$uid};
 }
 
 sub Lookup {
@@ -359,6 +376,49 @@ sub Create {
         }
     }
 
+    if ( $self->{BatchSize} && ( $self->{Clone} || $class =~ /Transaction|Attachment|Group.*/ ) ) {
+        push @{ $self->{_batch} }, [ $class, $uid, $data ];
+        my $reconnect;
+        while ( @{ $self->{_batch} } >= $self->{BatchSize} ) {
+            my @parts = splice @{ $self->{_batch} }, 0, $self->{BatchSize};
+            unless ( $reconnect ) {
+                $RT::Handle->Commit unless $self->{AutoCommit};
+                $RT::Handle->Disconnect;
+                $reconnect = 1;
+            }
+
+            if ( !$self->{_pm} ) {
+                require Parallel::ForkManager;
+                $self->{_pm} = Parallel::ForkManager->new( $self->{MaxProcesses} );
+                $self->{_pm}->run_on_finish(
+                    sub {
+                        if ( my $data = $_[5] ) {
+                            $self->{UIDs}{$_} = $data->{$_} for keys %$data;
+                        }
+                    }
+                );
+            }
+
+            $self->{ObjectCount}{$_->[0]}++ for @parts;
+
+            $self->{_pm}->start and next;
+            $RT::Handle->Connect;
+            my $data = $self->BatchCreate(@parts);
+            $self->{_pm}->finish(0, $data);
+        }
+
+        if ( $reconnect ) {
+            $RT::Handle->Connect;
+            $RT::Handle->BeginTransaction unless $self->{AutoCommit};
+        }
+
+        # Groups have id generated in PreInflate.
+        if ( !$self->{Clone} && $data->{id} ) {
+            $self->Resolve( $uid => $class, $data->{id} );
+        }
+        return;
+    }
+
     my ($id, $msg) = eval {
         # catch and rethrow on the outside so we can provide more info
         local $SIG{__DIE__};
@@ -477,6 +537,47 @@ sub CloseStream {
 
     $self->{Progress}->(undef, 'force') if $self->{Progress};
 
+    $self->{ObjectCount}{ $_->[0] }++ for @{ $self->{_batch} || [] };
+    my $data = $self->BatchCreate( @{ $self->{_batch} || [] } );
+    if ( $data ) {
+        $self->{UIDs}{$_} = $data->{$_} for keys %$data;
+    }
+
+    $self->{_pm}->wait_all_children if $self->{_pm};
+
+    if ( $self->{BatchSize} && !$self->{Clone} ) {
+        my @uids = grep { $self->{UIDs}{$_} } sort keys %{ $self->{Pending} };
+
+        if ( @uids ) {
+
+            $RT::Handle->Commit unless $self->{AutoCommit};
+            $RT::Handle->Disconnect;
+            $self->{_pm}->run_on_finish(undef);
+
+            while (@uids) {
+                my @batch = splice @uids, 0, $self->{BatchSize};
+                if ( $self->{_pm}->start ) {
+                    delete $self->{Pending}{$_} for @batch;
+                    next;
+                }
+
+                $RT::Handle->Connect;
+
+                # Always enable AutoCommit in child processes, otherwise deadlock might happen.
+                for my $uid (@batch) {
+                    my ( $class, $id ) = split /-/, $self->{UIDs}{$uid}, 2;
+                    $self->Resolve( $uid, $class, $id );
+                }
+                $self->{_pm}->finish;
+            }
+
+            $RT::Handle->Connect;
+            $RT::Handle->BeginTransaction unless $self->{AutoCommit};
+
+            $self->{_pm}->wait_all_children;
+        }
+    }
+
     # Fill CGM
     my $dbh = $RT::Handle->dbh;
 
@@ -580,4 +681,81 @@ sub Progress {
     return $self->{Progress} = $_[0];
 }
 
+sub BatchCreate {
+    my $self  = shift;
+    my @items = @_;
+
+    if ( $self->{Clone} ) {
+        my %query;
+
+        # Do not actually insert, just get the SQL
+        local *RT::Handle::Insert = sub {
+            my $self = shift;
+            return $self->InsertQueryString(@_);
+        };
+        for (@items) {
+            my ( $class, $uid, $data ) = @$_;
+            my $obj = $class->new( RT->SystemUser );
+
+            my ( $sql, @bind ) = $obj->DBIx::SearchBuilder::Record::Create(%$data);
+            push @{ $query{$sql} }, \@bind;
+        }
+
+        for my $sql ( keys %query ) {
+            my $dbh   = $RT::Handle->dbh;
+            my $count = @{ $query{$sql} };
+            my $values_paren;
+            if ( $sql =~ /(\(\?.+?\))/i ) {
+                $values_paren = $1;
+            }
+
+            # DBs have placeholder limitations(64k for Pg), here we replace
+            # placeholders to support bigger batch sizes. The performance is similar.
+            my $batch_sql
+                = $RT::Handle->FillIn( $sql . ( ", $values_paren" x ( $count - 1 ) ), [ map @$_, @{ $query{$sql} } ] );
+
+            eval {
+                local $SIG{__DIE__};
+                $dbh->do($batch_sql);
+            };
+            if ($@) {
+                my $err = "Failed to run $batch_sql: $@\n";
+                if ( not $self->{HandleError}->( $self, $err ) ) {
+                    die $err;
+                }
+                else {
+                    next;
+                }
+            }
+        }
+        return;
+    }
+    else {
+        my %map;
+        for (@items) {
+            my ( $class, $uid, $data ) = @$_;
+            my $obj = $class->new( RT->SystemUser );
+
+            my ( $id, $msg ) = eval {
+                # catch and rethrow on the outside so we can provide more info
+                local $SIG{__DIE__};
+                $obj->DBIx::SearchBuilder::Record::Create( %{$data} );
+            };
+            if ( not $id or $@ ) {
+                $msg ||= '';    # avoid undef
+                my $err = "Failed to create $uid: $msg $@\n" . Data::Dumper::Dumper($data) . "\n";
+                if ( not $self->{HandleError}->( $self, $err ) ) {
+                    die $err;
+                }
+                else {
+                    next;
+                }
+            }
+
+            $map{$uid} = "$class-$id";
+        }
+        return %map ? \%map : undef;
+    }
+}
+
 1;
diff --git a/sbin/rt-importer.in b/sbin/rt-importer.in
index 219e34534e..f2411d657c 100644
--- a/sbin/rt-importer.in
+++ b/sbin/rt-importer.in
@@ -102,6 +102,8 @@ GetOptions(
 
     "auto-commit!",
 
+    "batch-size=i",
+    "max-processes=i",
     "dump=s@",
 ) or Pod::Usage::pod2usage();
 
@@ -151,6 +153,8 @@ my $import = RT::Migrate::Importer::File->new(
     DumpObjects         => $OPT{dump},
     Resume              => $OPT{resume},
     AutoCommit          => $OPT{'auto-commit'},
+    BatchSize           => $OPT{'batch-size'},
+    MaxProcesses        => $OPT{'max-processes'} || 10,
     HandleError         => $error_handler,
 );
 
@@ -293,6 +297,15 @@ Works only in conjunction with C<--list>.
 Don't auto commit to database. When this flag is used, it will commit only
 once for each data file.  This could boost performance in some cases.
 
+=item B<--batch-size> I<BATCH_SIZE>
+
+Create objects in batch and in forked processes when possible. Default is 0,
+which means batch processing is not enabled.
+
+=item B<--max-processes> I<MAX_PROCESSES>
+
+The number of max allowed child processes for batch processing. Default is 10.
+
 =back
 
 

commit 9b8099666de72ea24314d75c2668cc9423b68356
Author: sunnavy <sunnavy at bestpractical.com>
Date:   Thu Oct 13 20:45:31 2022 +0800

    Fill up CachedGroupMembers at the end of importer for performance
    
    Previously we created corresponding CachedGroupMember rows on every
    Group/GroupMember create, which was quite slow. By doing it via plain
    SQL at the end, it's astonishingly faster than before. E.g. for
    CachedGroupMembers with 100k rows, now it could be done in seconds
    (prevously it was in minutes!)

diff --git a/lib/RT/Group.pm b/lib/RT/Group.pm
index 7f9854a204..39f6de0a43 100644
--- a/lib/RT/Group.pm
+++ b/lib/RT/Group.pm
@@ -1752,17 +1752,6 @@ sub PreInflate {
     return 1;
 }
 
-sub PostInflate {
-    my $self = shift;
-
-    my $cgm = RT::CachedGroupMember->new($self->CurrentUser);
-    $cgm->Create(
-        Group  => $self->PrincipalObj,
-        Member => $self->PrincipalObj,
-        ImmediateParent => $self->PrincipalObj
-    );
-}
-
 # If this group represents the members of a custom role, then return
 # the RT::CustomRole object. Otherwise, return undef
 sub _CustomRoleObj {
diff --git a/lib/RT/GroupMember.pm b/lib/RT/GroupMember.pm
index 4a092c332f..2733d3451f 100644
--- a/lib/RT/GroupMember.pm
+++ b/lib/RT/GroupMember.pm
@@ -603,12 +603,6 @@ sub PreInflate {
     return 1;
 }
 
-sub PostInflate {
-    my $self = shift;
-
-    $self->_InsertCGM;
-}
-
 RT::Base->_ImportOverlays();
 
 1;
diff --git a/lib/RT/Migrate/Importer.pm b/lib/RT/Migrate/Importer.pm
index 98c6742b53..3ee478e0aa 100644
--- a/lib/RT/Migrate/Importer.pm
+++ b/lib/RT/Migrate/Importer.pm
@@ -477,6 +477,67 @@ sub CloseStream {
 
     $self->{Progress}->(undef, 'force') if $self->{Progress};
 
+    # Fill CGM
+    my $dbh = $RT::Handle->dbh;
+
+    # Groups
+    $dbh->do(<<'EOF');
+INSERT INTO CachedGroupMembers (GroupId, MemberId, Via, ImmediateParentId, Disabled)
+    SELECT Groups.id, Groups.id, 0, Groups.id, Principals.Disabled FROM Groups
+    LEFT JOIN Principals ON ( Groups.id = Principals.id )
+    LEFT JOIN CachedGroupMembers ON (
+        Groups.id = CachedGroupMembers.GroupId
+        AND CachedGroupMembers.GroupId = CachedGroupMembers.MemberId
+        AND CachedGroupMembers.GroupId = CachedGroupMembers.ImmediateParentId
+        )
+    WHERE CachedGroupMembers.id IS NULL
+EOF
+
+    # GroupMembers
+    $dbh->do(<<'EOF');
+INSERT INTO CachedGroupMembers (GroupId, MemberId, Via, ImmediateParentId, Disabled)
+    SELECT GroupMembers.GroupId, GroupMembers.MemberId, 0, GroupMembers.GroupId, Principals.Disabled FROM GroupMembers
+    LEFT JOIN Principals ON ( GroupMembers.GroupId = Principals.id )
+    LEFT JOIN CachedGroupMembers ON (
+        GroupMembers.GroupId = CachedGroupMembers.GroupId
+        AND GroupMembers.MemberId = CachedGroupMembers.MemberId
+        AND CachedGroupMembers.GroupId = CachedGroupMembers.ImmediateParentId
+    )
+    WHERE CachedGroupMembers.id IS NULL
+EOF
+
+    # Fixup Via
+    $dbh->do(<<'EOF');
+UPDATE CachedGroupMembers SET Via=id WHERE Via=0
+EOF
+
+    # Cascaded GroupMembers, use the same SQL in rt-validator
+    my $cascaded_cgm = <<'EOF';
+INSERT INTO CachedGroupMembers (GroupId, MemberId, Via, ImmediateParentId, Disabled)
+SELECT cgm1.GroupId, gm2.MemberId, cgm1.id AS Via,
+    cgm1.MemberId AS ImmediateParentId, cgm1.Disabled
+FROM
+    CachedGroupMembers cgm1
+    CROSS JOIN GroupMembers gm2
+    LEFT JOIN CachedGroupMembers cgm3 ON (
+            cgm3.GroupId           = cgm1.GroupId
+        AND cgm3.MemberId          = gm2.MemberId
+        AND cgm3.Via               = cgm1.id
+        AND cgm3.ImmediateParentId = cgm1.MemberId )
+    LEFT JOIN Groups g ON (
+        cgm1.GroupId = g.id
+    )
+WHERE cgm1.GroupId != cgm1.MemberId
+AND gm2.GroupId = cgm1.MemberId
+AND cgm3.id IS NULL
+AND g.Domain != 'RT::Ticket-Role'
+EOF
+    # Do this multiple times if needed to fill up cascaded group members
+    while ( my $rv = $dbh->do($cascaded_cgm) ) {
+        # $rv could be 0E0 that is true in bool context but 0 in numeric comparison.
+        last unless $rv > 0;
+    }
+
     return if $self->{Clone};
 
     # Take global CFs which we made and make them un-global

-----------------------------------------------------------------------


hooks/post-receive
-- 
rt


More information about the rt-commit mailing list