[Rt-commit] rt branch, 4.2/optimize-cgm-table, created. rt-4.0.0rc7-253-g8adb70f

Ruslan Zakirov ruz at bestpractical.com
Mon Mar 28 20:42:59 EDT 2011


The branch, 4.2/optimize-cgm-table has been created
        at  8adb70fba60b903c2aa564dce5810cb3d252cefa (commit)

- Log -----------------------------------------------------------------
commit 5226413641d51625f26d626b19ff52f3057c1cf0
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Sun Mar 27 19:15:35 2011 +0400

    drop all CGM management from GroupMember class

diff --git a/lib/RT/GroupMember.pm b/lib/RT/GroupMember.pm
index 84887ee..eb7736e 100644
--- a/lib/RT/GroupMember.pm
+++ b/lib/RT/GroupMember.pm
@@ -124,6 +124,8 @@ sub Create {
         return (undef);
     }
 
+    my $gid = $args{'Group'}->id;
+    my $mid = $args{'Member'}->id;
 
     #Clear the key cache. TODO someday we may want to just clear a little bit of the keycache space. 
     # TODO what about the groups key cache?
@@ -143,7 +145,7 @@ sub Create {
             $RT::Handle->Rollback() unless ($args{'InsideTransaction'});
             return(undef);
         }
-        elsif ( $args{'Member'}->Id == $args{'Group'}->Id) {
+        elsif ( $mid == $gid) {
             $RT::Logger->debug("Can't add a group to itself");
             $RT::Handle->Rollback() unless ($args{'InsideTransaction'});
             return(undef);
@@ -152,8 +154,8 @@ sub Create {
 
 
     my $id = $self->SUPER::Create(
-        GroupId  => $args{'Group'}->Id,
-        MemberId => $args{'Member'}->Id
+        GroupId  => $gid,
+        MemberId => $mid
     );
 
     unless ($id) {
@@ -163,51 +165,9 @@ sub Create {
 
     my $cached_member = RT::CachedGroupMember->new( $self->CurrentUser );
     my $cached_id     = $cached_member->Create(
-        Member          => $args{'Member'},
         Group           => $args{'Group'},
-        ImmediateParent => $args{'Group'},
-        Via             => '0'
-    );
-
-
-    #When adding a member to a group, we need to go back
-    #and popuplate the CachedGroupMembers of all the groups that group is part of .
-
-    my $cgm = RT::CachedGroupMembers->new( $self->CurrentUser );
-
-    # find things which have the current group as a member. 
-    # $group is an RT::Principal for the group.
-    $cgm->LimitToGroupsWithMember( $args{'Group'}->Id );
-    $cgm->Limit(
-        SUBCLAUSE => 'filter', # dont't mess up with prev condition
-        FIELD => 'MemberId',
-        OPERATOR => '!=',
-        VALUE => 'main.GroupId',
-        QUOTEVALUE => 0,
-        ENTRYAGGREGATOR => 'AND',
+        Member          => $args{'Member'},
     );
-
-    while ( my $parent_member = $cgm->Next ) {
-        my $parent_id = $parent_member->MemberId;
-        my $via       = $parent_member->Id;
-        my $group_id  = $parent_member->GroupId;
-
-          my $other_cached_member =
-          RT::CachedGroupMember->new( $self->CurrentUser );
-        my $other_cached_id = $other_cached_member->Create(
-            Member          => $args{'Member'},
-                      Group => $parent_member->GroupObj,
-            ImmediateParent => $parent_member->MemberObj,
-            Via             => $parent_member->Id
-        );
-        unless ($other_cached_id) {
-            $RT::Logger->err( "Couldn't add " . $args{'Member'}
-                  . " as a submember of a supergroup" );
-            $RT::Handle->Rollback() unless ($args{'InsideTransaction'});
-            return (undef);
-        }
-    } 
-
     unless ($cached_id) {
         $RT::Handle->Rollback() unless ($args{'InsideTransaction'});
         return (undef);
@@ -290,59 +250,34 @@ Expects to be called _outside_ a transaction
 sub Delete {
     my $self = shift;
 
-
     $RT::Handle->BeginTransaction();
 
-    # Find all occurrences of this member as a member of this group
-    # in the cache and nuke them, recursively.
-
-    # The following code will delete all Cached Group members
-    # where this member's group is _not_ the primary group 
-    # (Ie if we're deleting C as a member of B, and B happens to be 
-    # a member of A, will delete C as a member of A without touching
-    # C as a member of B
-
-    my $cached_submembers = RT::CachedGroupMembers->new( $self->CurrentUser );
-
-    $cached_submembers->Limit(
-        FIELD    => 'MemberId',
-        OPERATOR => '=',
-        VALUE    => $self->MemberObj->Id
-    );
-
-    $cached_submembers->Limit(
-        FIELD    => 'ImmediateParentId',
-        OPERATOR => '=',
-        VALUE    => $self->GroupObj->Id
-    );
-
-
-
-
-
-    while ( my $item_to_del = $cached_submembers->Next() ) {
-        my $del_err = $item_to_del->Delete();
-        unless ($del_err) {
-            $RT::Handle->Rollback();
-            $RT::Logger->warning("Couldn't delete cached group submember ".$item_to_del->Id);
-            return (undef);
-        }
-    }
-
     my ($err, $msg) = $self->SUPER::Delete();
     unless ($err) {
-            $RT::Logger->warning("Couldn't delete cached group submember ".$self->Id);
+        $RT::Logger->error("Couldn't delete cached group submember ".$self->Id);
         $RT::Handle->Rollback();
         return (undef);
     }
 
+    my $cgm = RT::CachedGroupMember->new( $self->CurrentUser );
+    $cgm->LoadByCols( GroupId => $self->GroupId, MemberId => $self->MemberId, );
+    if ( $cgm->id ) {
+        my $status = $cgm->Delete;
+        unless ($status) {
+            $RT::Logger->error("Couldn't delete cached group member");
+            $RT::Handle->Rollback;
+            return (undef);
+        }
+    } else {
+        $RT::Logger->warning("There was no CGM record matching GM record");
+    }
+
     #Clear the key cache. TODO someday we may want to just clear a little bit of the keycache space. 
     # TODO what about the groups key cache?
     RT::Principal->InvalidateACLCache();
 
     $RT::Handle->Commit();
     return ($err);
-
 }
 
 

commit 4c1940a5ce5f8f6f1ec72796a5e86cecd0536e0d
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Sun Mar 27 19:18:24 2011 +0400

    insert all CGM records in one query + update

diff --git a/lib/RT/CachedGroupMember.pm b/lib/RT/CachedGroupMember.pm
index c6ef008..d27e5e9 100644
--- a/lib/RT/CachedGroupMember.pm
+++ b/lib/RT/CachedGroupMember.pm
@@ -96,12 +96,11 @@ Create takes a hash of values and creates a row in the database:
 
 sub Create {
     my $self = shift;
-    my %args = ( Group           => '',
-                 Member          => '',
-                 ImmediateParent => '',
-                 Via             => '0',
-                 Disabled        => '0',
-                 @_ );
+    my %args = (
+        Group           => undef,
+        Member          => undef,
+        @_
+    );
 
     unless (    $args{'Member'}
              && UNIVERSAL::isa( $args{'Member'}, 'RT::Principal' )
@@ -115,71 +114,68 @@ sub Create {
         $RT::Logger->debug("$self->Create: bogus Group argument");
     }
 
-    unless (    $args{'ImmediateParent'}
-             && UNIVERSAL::isa( $args{'ImmediateParent'}, 'RT::Principal' )
-             && $args{'ImmediateParent'}->Id ) {
-        $RT::Logger->debug("$self->Create: bogus ImmediateParent argument");
-    }
-
-    # If the parent group for this group member is disabled, it's disabled too, along with all its children
-    if ( $args{'ImmediateParent'}->Disabled ) {
-        $args{'Disabled'} = $args{'ImmediateParent'}->Disabled;
-    }
-
     my $id = $self->SUPER::Create(
-                              GroupId           => $args{'Group'}->Id,
-                              MemberId          => $args{'Member'}->Id,
-                              ImmediateParentId => $args{'ImmediateParent'}->Id,
-                              Disabled          => $args{'Disabled'},
-                              Via               => $args{'Via'}, );
-
+        GroupId           => $args{'Group'}->Id,
+        MemberId          => $args{'Member'}->Id,
+        Disabled          => $args{'Disabled'},
+    );
     unless ($id) {
-        $RT::Logger->warning( "Couldn't create "
-                           . $args{'Member'}
-                           . " as a cached member of "
-                           . $args{'Group'}->Id . " via "
-                           . $args{'Via'} );
-        return (undef);  #this will percolate up and bail out of the transaction
-    }
-    if ( $self->__Value('Via') == 0 ) {
-        my ( $vid, $vmsg ) = $self->__Set( Field => 'Via', Value => $id );
-        unless ($vid) {
-            $RT::Logger->warning( "Due to a via error, couldn't create "
-                               . $args{'Member'}
-                               . " as a cached member of "
-                               . $args{'Group'}->Id . " via "
-                               . $args{'Via'} );
-            return (undef)
-              ;          #this will percolate up and bail out of the transaction
-        }
+        $RT::Logger->warning(
+            "Couldn't create ". $args{'Member'} ." as a cached member of "
+            . $args{'Group'} ." via ". $args{'Via'}
+        );
+        return (undef);
     }
-
     return $id if $args{'Member'}->id == $args{'Group'}->id;
 
-    if ( $args{'Member'}->IsGroup() ) {
-        my $GroupMembers = $args{'Member'}->Object->MembersObj();
-        while ( my $member = $GroupMembers->Next() ) {
-            my $cached_member =
-              RT::CachedGroupMember->new( $self->CurrentUser );
-            my $c_id = $cached_member->Create(
-                                             Group  => $args{'Group'},
-                                             Member => $member->MemberObj,
-                                             ImmediateParent => $args{'Member'},
-                                             Disabled => $args{'Disabled'},
-                                             Via      => $id );
-            unless ($c_id) {
-                return (undef);    #percolate the error upwards.
-                     # the caller will log an error and abort the transaction
-            }
-
-        }
+    my $table = $self->Table;
+    unless ( $args{'Disabled'} ) {
+        # update existing records, in case we activated some paths
+        my $query = "
+            SELECT CGM3.id FROM
+                $table CGM1 CROSS JOIN $table CGM2
+                JOIN $table CGM3
+                    ON CGM3.GroupId = CGM1.GroupId AND CGM3.MemberId = CGM2.MemberId
+            WHERE
+                CGM1.MemberId = ? AND (CGM1.GroupId != CGM1.MemberId OR CGM1.MemberId = ?)
+                AND CGM2.GroupId = ? AND (CGM2.GroupId != CGM2.MemberId OR CGM2.GroupId = ?)
+                AND CGM1.Disabled = 0 AND CGM2.Disabled = 0 AND CGM3.Disabled > 0
+        ";
+        $RT::Handle->SimpleUpdateFromSelect(
+            $table, { Disabled => 0 }, $query,
+            $args{'Group'}->id, $args{'Group'}->id,
+            $args{'Member'}->id, $args{'Member'}->id
+        ) or return undef;
     }
 
-    return ($id);
-
-}
+    my @binds;
 
+    my $disabled_clause;
+    if ( $args{'Disabled'} ) {
+        $disabled_clause = '?';
+        push @binds, $args{'Disabled'};
+    } else {
+        $disabled_clause = 'CASE WHEN CGM1.Disabled + CGM2.Disabled > 0 THEN 1 ELSE 0 END';
+    }
 
+    my $query = "SELECT CGM1.GroupId, CGM2.MemberId, $disabled_clause FROM
+        $table CGM1 CROSS JOIN $table CGM2
+        LEFT JOIN $table CGM3
+            ON CGM3.GroupId = CGM1.GroupId AND CGM3.MemberId = CGM2.MemberId
+        WHERE
+            CGM1.MemberId = ? AND (CGM1.GroupId != CGM1.MemberId OR CGM1.MemberId = ?)
+            AND CGM2.GroupId = ? AND (CGM2.GroupId != CGM2.MemberId OR CGM2.GroupId = ?)
+            AND CGM3.id IS NULL
+    ";
+    $RT::Handle->InsertFromSelect(
+        $table, ['GroupId', 'MemberId', 'Disabled'], $query,
+        @binds,
+        $args{'Group'}->id, $args{'Group'}->id,
+        $args{'Member'}->id, $args{'Member'}->id
+    );
+
+    return $id;
+}
 
 =head2 Delete
 

commit 803a49e00f793d912095ff17a3e1987ec10d2411
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Sun Mar 27 19:20:55 2011 +0400

    handle properly disabled on create

diff --git a/lib/RT/CachedGroupMember.pm b/lib/RT/CachedGroupMember.pm
index d27e5e9..8f465d5 100644
--- a/lib/RT/CachedGroupMember.pm
+++ b/lib/RT/CachedGroupMember.pm
@@ -114,6 +114,8 @@ sub Create {
         $RT::Logger->debug("$self->Create: bogus Group argument");
     }
 
+    $args{'Disabled'} = ($args{'Group'}->Disabled || $args{'Member'}->Disabled)? 1 : 0;
+
     my $id = $self->SUPER::Create(
         GroupId           => $args{'Group'}->Id,
         MemberId          => $args{'Member'}->Id,

commit a28198fa5edc160fc4ddbe36542df739834bc40f
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Sun Mar 27 19:22:10 2011 +0400

    first pass on CGM->Delete method

diff --git a/lib/RT/CachedGroupMember.pm b/lib/RT/CachedGroupMember.pm
index 8f465d5..bb90398 100644
--- a/lib/RT/CachedGroupMember.pm
+++ b/lib/RT/CachedGroupMember.pm
@@ -190,33 +190,49 @@ mysql supported foreign keys with cascading deletes.
 sub Delete {
     my $self = shift;
 
-    
-    my $member = $self->MemberObj();
-    if ( $member->IsGroup ) {
-        my $deletable = RT::CachedGroupMembers->new( $self->CurrentUser );
+    if ( $self->MemberId == $self->GroupId ) {
+        # deleting self-referenced means that we're deleting a principal
+        # itself and all records where it's a parent or member should
+        # be deleted beforehead
+        return $self->SUPER::Delete( @_ );
+    }
 
-        $deletable->Limit( FIELD    => 'id',
-                           OPERATOR => '!=',
-                           VALUE    => $self->id );
-        $deletable->Limit( FIELD    => 'Via',
-                           OPERATOR => '=',
-                           VALUE    => $self->id );
+    my $table = $self->Table;
+    my $query = "
+        SELECT CGM3.id FROM
+            $table CGM1
+            CROSS JOIN $table CGM2
+            JOIN $table CGM3
+                ON CGM3.GroupId = CGM1.GroupId AND CGM3.MemberId = CGM2.MemberId
 
-        while ( my $kid = $deletable->Next ) {
-            my $kid_err = $kid->Delete();
-            unless ($kid_err) {
-                $RT::Logger->error(
-                              "Couldn't delete CachedGroupMember " . $kid->Id );
-                return (undef);
-            }
-        }
-    }
-    my $ret = $self->SUPER::Delete();
-    unless ($ret) {
-        $RT::Logger->error( "Couldn't delete CachedGroupMember " . $self->Id );
-        return (undef);
-    }
-    return $ret;
+        WHERE
+            CGM1.MemberId = ?
+            AND CGM2.GroupId = ?
+            AND NOT EXISTS (
+                SELECT CGM4.GroupId
+                    FROM $table CGM4
+                    JOIN GroupMembers GM1
+                        ON GM1.GroupId = CGM4.MemberId
+                    JOIN $table CGM5
+                        ON CGM5.GroupId = GM1.MemberId
+                    JOIN $table CGM6
+                        ON CGM6.GroupId = CGM4.MemberId
+                        AND CGM6.MemberId = ?
+                    LEFT JOIN $table CGM7
+                        ON CGM7.GroupId = CGM5.GroupId
+                        AND CGM7.MemberId = ?
+                WHERE
+                    CGM4.GroupId = CGM3.GroupId
+                    AND CGM5.MemberId = CGM3.MemberId
+                    AND CGM7.id IS NULL
+            )
+    ";
+
+    return $RT::Handle->DeleteFromSelect(
+        $table, $query,
+        $self->GroupId, $self->MemberId,
+        $self->GroupId, $self->GroupId,
+    );
 }
 
 

commit 02a7a51e731f7348da0ba0fa36d4d8178346c3c9
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Mon Mar 28 15:15:28 2011 +0400

    second attempt on deleting CGM records
    
    * delete all potential candidates + insert back missing
    * faster than previouse and linear

diff --git a/lib/RT/CachedGroupMember.pm b/lib/RT/CachedGroupMember.pm
index bb90398..7979544 100644
--- a/lib/RT/CachedGroupMember.pm
+++ b/lib/RT/CachedGroupMember.pm
@@ -199,43 +199,42 @@ sub Delete {
 
     my $table = $self->Table;
     my $query = "
-        SELECT CGM3.id FROM
-            $table CGM1
-            CROSS JOIN $table CGM2
-            JOIN $table CGM3
-                ON CGM3.GroupId = CGM1.GroupId AND CGM3.MemberId = CGM2.MemberId
-
+        SELECT CGM1.id FROM
+            CachedGroupMembers CGM1
+            JOIN CachedGroupMembers CGMA ON CGMA.MemberId = ?
+            JOIN CachedGroupMembers CGMD ON CGMD.GroupId = ?
+            LEFT JOIN GroupMembers GM1
+                ON GM1.GroupId = CGM1.GroupId AND GM1.MemberId = CGM1.MemberId
         WHERE
-            CGM1.MemberId = ?
-            AND CGM2.GroupId = ?
-            AND NOT EXISTS (
-                SELECT CGM4.GroupId
-                    FROM $table CGM4
-                    JOIN GroupMembers GM1
-                        ON GM1.GroupId = CGM4.MemberId
-                    JOIN $table CGM5
-                        ON CGM5.GroupId = GM1.MemberId
-                    JOIN $table CGM6
-                        ON CGM6.GroupId = CGM4.MemberId
-                        AND CGM6.MemberId = ?
-                    LEFT JOIN $table CGM7
-                        ON CGM7.GroupId = CGM5.GroupId
-                        AND CGM7.MemberId = ?
-                WHERE
-                    CGM4.GroupId = CGM3.GroupId
-                    AND CGM5.MemberId = CGM3.MemberId
-                    AND CGM7.id IS NULL
-            )
+            CGM1.GroupId = CGMA.GroupId AND CGM1.MemberId = CGMD.MemberId
+            AND CGM1.GroupId != CGM1.MemberId
+            AND GM1.id IS NULL 
     ";
 
-    return $RT::Handle->DeleteFromSelect(
+    my $res = $RT::Handle->DeleteFromSelect(
         $table, $query,
         $self->GroupId, $self->MemberId,
-        $self->GroupId, $self->GroupId,
     );
-}
+    return $res unless $res;
 
+    $query = "SELECT CGM1.GroupId, CGM2.MemberId FROM
+        $table CGM1 CROSS JOIN $table CGM2
+        LEFT JOIN $table CGM3
+            ON CGM3.GroupId = CGM1.GroupId AND CGM3.MemberId = CGM2.MemberId
+        WHERE
+            CGM1.MemberId = ? AND (CGM1.GroupId != CGM1.MemberId OR CGM1.MemberId = ?)
+            AND CGM2.GroupId = ? AND (CGM2.GroupId != CGM2.MemberId OR CGM2.GroupId = ?)
+            AND CGM3.id IS NULL
+    ";
+    $res = $RT::Handle->InsertFromSelect(
+        $table, ['GroupId', 'MemberId'], $query,
+        $self->GroupId, $self->GroupId,
+        $self->MemberId, $self->MemberId,
+    );
+    return $res unless $res;
 
+    return 1;
+}
 
 =head2 SetDisabled
 

commit c3e365b2cfe4d7caaf940f9c39193cc1d603b715
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Tue Mar 29 01:41:40 2011 +0400

    Group and Member can not be NULLs in CGM

diff --git a/etc/schema.Oracle b/etc/schema.Oracle
index 4bcae6c..0c0943b 100755
--- a/etc/schema.Oracle
+++ b/etc/schema.Oracle
@@ -186,8 +186,8 @@ CREATE SEQUENCE CachedGroupMembers_seq;
 CREATE TABLE CachedGroupMembers (
 	id		NUMBER(11,0) 
 		CONSTRAINT CachedGroupMembers_Key PRIMARY KEY,
-	GroupId		NUMBER(11,0),
-	MemberId	NUMBER(11,0),
+	GroupId		NUMBER(11,0) NOT NULL,
+	MemberId	NUMBER(11,0) NOT NULL,
 	Via		NUMBER(11,0),
 	ImmediateParentId	NUMBER(11,0),
 	Disabled	NUMBER(11,0) DEFAULT 0 NOT NULL
diff --git a/etc/schema.Pg b/etc/schema.Pg
index 565f76b..ce97731 100755
--- a/etc/schema.Pg
+++ b/etc/schema.Pg
@@ -311,8 +311,8 @@ CREATE SEQUENCE cachedgroupmembers_id_seq;
 
 CREATE TABLE CachedGroupMembers (
         id int DEFAULT nextval('cachedgroupmembers_id_seq'),
-        GroupId int, 
-        MemberId int, 
+        GroupId int NOT NULL,
+        MemberId int NOT NULL,
         Via int, 
         ImmediateParentId int, 
         Disabled integer NOT NULL DEFAULT 0 , 
diff --git a/etc/schema.SQLite b/etc/schema.SQLite
index 138971c..bf5677c 100755
--- a/etc/schema.SQLite
+++ b/etc/schema.SQLite
@@ -200,8 +200,8 @@ CREATE TABLE GroupMembers (
 
 create table CachedGroupMembers (
         id integer primary key ,
-        GroupId int, 
-        MemberId int, 
+        GroupId int NOT NULL,
+        MemberId int NOT NULL,
         Via int, 
         ImmediateParentId int,
         Disabled int2 NOT NULL DEFAULT 0  # if this cached group member is a member of this group by way of a disabled
diff --git a/etc/schema.mysql b/etc/schema.mysql
index c313aaf..eb3c53f 100755
--- a/etc/schema.mysql
+++ b/etc/schema.mysql
@@ -189,8 +189,8 @@ CREATE UNIQUE INDEX GroupMembers1 on GroupMembers (GroupId, MemberId);
 
 create table CachedGroupMembers (
         id int auto_increment,
-        GroupId int, # foreign key to Principals
-        MemberId int, # foreign key to Principals
+        GroupId int NOT NULL, # foreign key to Principals
+        MemberId int NOT NULL, # foreign key to Principals
         Via int, #foreign key to CachedGroupMembers. (may point to $self->id)
         ImmediateParentId int, #foreign key to prinicpals.
                                # this points to the group that the member is

commit bed1669ebc962b1a83cd5cf2f02b454f09822f58
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Tue Mar 29 01:49:52 2011 +0400

    indexes for CGM table on mysql

diff --git a/etc/schema.mysql b/etc/schema.mysql
index eb3c53f..166c159 100755
--- a/etc/schema.mysql
+++ b/etc/schema.mysql
@@ -203,9 +203,10 @@ create table CachedGroupMembers (
         PRIMARY KEY (id)
 ) ENGINE=InnoDB CHARACTER SET utf8;
 
-CREATE INDEX DisGrouMem  on CachedGroupMembers (GroupId,MemberId,Disabled);
-CREATE INDEX CachedGroupMembers3 on CachedGroupMembers (MemberId, ImmediateParentId);
-
+# we can create UNIQUE ON (GroupId,MemberId), but it only a few bytes shorter than
+# the following and there is no need to do that
+CREATE UNIQUE INDEX DisGrouMem  on CachedGroupMembers (GroupId,MemberId,Disabled);
+CREATE UNIQUE INDEX CachedGroupMembers3  on CachedGroupMembers (MemberId,GroupId,Disabled);
 
 
 CREATE TABLE Users (

commit 8adb70fba60b903c2aa564dce5810cb3d252cefa
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Tue Mar 29 04:41:09 2011 +0400

    documentation for CGM table management and SQL

diff --git a/lib/RT/CachedGroupMember.pm b/lib/RT/CachedGroupMember.pm
index 7979544..899cf4e 100644
--- a/lib/RT/CachedGroupMember.pm
+++ b/lib/RT/CachedGroupMember.pm
@@ -426,6 +426,257 @@ Returns (1, 'Status message') on success and (0, 'Error Message') on failure.
 =cut
 
 
+=head1 FOR DEVELOPERS
+
+=head2 SQL behind maintaining CGM table
+
+=head3 Terminology
+
+=over 4
+
+=item * An(E) - all ancestors of E including E itself
+
+=item * De(E) - all descendants of E including E itself
+
+=back
+
+=head3 Adding a (G -> M) record
+
+When a new (G -> M) record added we should connect all An(G) to all De(M), so it's
+the following select:
+
+    SELECT CGM1.GroupId, CGM2.MemberId FROM
+        CachedGroupMembers CGM1
+        CROSS JOIN CachedGroupMembers CGM2
+    WHERE
+        CGM1.MemberId = G
+        AND CGM2.GroupId = M
+    ;
+
+It handles G and M itself as we always have (E->E) records.
+
+Some of this records may exist in the table, so we should skip them:
+
+    SELECT CGM1.GroupId, CGM2.MemberId FROM
+        CachedGroupMembers CGM1
+        CROSS JOIN CachedGroupMembers CGM2
+        LEFT JOIN CachedGroupMembers CGM3
+            ON CGM3.GroupId = CGM1.GroupId AND CGM3.MemberId = CGM2.MemberId
+    WHERE
+        CGM1.MemberId = G
+        AND CGM2.GroupId = M
+        AND CGM3.id IS NULL
+    ;
+
+In order to do less checks we should skip (E->E) records, but not those
+that touch our G and M:
+
+    SELECT CGM1.GroupId, CGM2.MemberId FROM
+        CachedGroupMembers CGM1
+        CROSS JOIN CachedGroupMembers CGM2
+        LEFT JOIN CachedGroupMembers CGM3
+            ON CGM3.GroupId = CGM1.GroupId AND CGM3.MemberId = CGM2.MemberId
+    WHERE
+        CGM1.MemberId = G AND (CGM1.GroupId != CGM1.MemberId OR CGM1.MemberId = G)
+        AND CGM2.GroupId = M AND (CGM2.GroupId != CGM2.MemberId OR CGM2.GroupId = M)
+        AND CGM3.id IS NULL
+    ;
+
+=head4 Disabled column
+
+We should handle properly Disabled column.
+
+If the new records we're adding is disabled then all new paths we add as well
+disabled and existing one are not affected.
+
+Otherwise activity of new paths depends on entries that got connected and existing
+paths have to be updated.
+
+New paths:
+
+    SELECT CGM1.GroupId, CGM2.MemberId, IF(CGM1.Disabled+CGM2.Disabled > 0, 1, 0) FROM
+    ...
+
+Updating old paths, the following records should be activated:
+
+    SELECT CGM3.id FROM
+        CachedGroupMembers CGM1
+        CROSS JOIN CachedGroupMembers CGM2
+        JOIN CachedGroupMembers CGM3
+            ON CGM3.GroupId = CGM1.GroupId AND CGM3.MemberId = CGM2.MemberId
+    WHERE
+        CGM1.MemberId = G AND (CGM1.GroupId != CGM1.MemberId OR CGM1.MemberId = G)
+        AND CGM2.GroupId = M AND (CGM2.GroupId != CGM2.MemberId OR CGM2.GroupId = M)
+        AND CGM1.Disabled = 0 AND CGM2.Disabled = 0 AND CGM3.Disabled > 0
+    ;
+
+It's better to do this before we insert new records, so we scan less records
+to find things we need updating.
+
+=head3 mysql performance
+
+Sample results:
+
+    10k  - 0.4x seconds
+    100k - 4.x seconds
+    1M   - 4x.x seconds
+
+As long as innodb_buffer_pool_size is big enough to store insert buffer,
+and MIN(tmp_table_size, max_heap_table_size) allow us to store tmp table
+in the memory. For 100k records we need less than 15 MBytes. Disk I/O
+heavily degrades performance.
+
+=head2 Deleting a (G->M) record
+
+In case record is deleted from GM table we should re-evaluate records in CGM.
+
+Candidates for deletion are any records An(G) -> De(M):
+
+    SELECT CGM3.id FROM
+        CachedGroupMembers CGM1
+        CROSS JOIN CachedGroupMembers CGM2
+        JOIN CachedGroupMembers CGM3
+            ON CGM3.GroupId = CGM1.GroupId AND CGM3.MemberId = CGM2.MemberId
+    WHERE
+        CGM1.MemberId = G
+        AND CGM2.GroupId = M
+    ;
+
+Some of this records may still have alternative routes. A candidate (G', M')
+stays in the table if following records exist in GM and CGM tables.
+(G', X) in CGM, (X,Y) in GM and (Y,M') in CGM, where X ~ An(G) and Y !~ An(G).
+
+    SELECT CGM3.id FROM
+        CachedGroupMembers CGM1
+        CROSS JOIN CachedGroupMembers CGM2
+        JOIN CachedGroupMembers CGM3
+            ON CGM3.GroupId = CGM1.GroupId AND CGM3.MemberId = CGM2.MemberId
+
+    WHERE
+        CGM1.MemberId = G
+        AND CGM2.GroupId = M
+        AND NOT EXISTS (
+            SELECT CGM4.GroupId FROM
+                CachedGroupMembers CGM4
+                    ON CGM4.GroupId = CGM3.GroupId
+                JOIN GroupMembers GM1
+                    ON GM1.GroupId = CGM4.MemberId
+                JOIN GroupMembers CGM5
+                    ON CGM4.GroupId = GM1.MemberId
+                    AND CGM4.MemberId = CGM3.MemberId
+                JOIN CachedGroupMembers CGM6
+                    ON CGM6.GroupId = CGM4.MemberId
+                    AND CGM6.MemberId = G
+                LEFT JOIN CachedGroupMembers CGM7
+                    ON CGM7.GroupId = CGM5.GroupId
+                    AND CGM7.MemberId = G
+            WHERE
+                CGM7.id IS NULL
+        )
+    ;
+
+Fun.
+
+=head3 mysql performance
+
+    10k  - 4.x seconds
+    100k - 13x seconds
+    1M   - not tested
+
+Sadly this query perform much worth comparing to the insert operation. Problem is
+in the select.
+
+=head3 Delete all candidates and re-insert missing
+
+We can delete all candidates (An(G)->De(M)) from CGM table that are not
+real GM records: then insert records once again.
+
+    SELECT CGM1.id FROM
+        CachedGroupMembers CGM1
+        JOIN CachedGroupMembers CGMA ON CGMA.MemberId = G
+        JOIN CachedGroupMembers CGMD ON CGMD.GroupId = M
+        LEFT JOIN GroupMembers GM1
+            ON GM1.GroupId = CGM1.GroupId AND GM1.MemberId = CGM1.MemberId
+    WHERE
+        CGM1.GroupId = CGMA.GroupId AND CGM1.MemberId = CGMD.MemberId
+        AND CGM1.GroupId != CGM1.MemberId
+        AND GM1.id IS NULL
+    ;
+
+Then we can re-insert data back with insert from select described above.
+
+=head4 mysql performance
+
+This solution is faster than perviouse variant, 4-5 times slower than
+create operation and behaves linear.
+
+=head3 Recursive delete
+
+Again, some (An(G), De(M)) pairs should be deleted, but some may stay. If
+delete any pair from the set then An(G) and De(M) sets don't change, so
+we can delete things step by step. Run delete operation, if any was deleted
+then run it once again, do it until operation deletes no rows. We shouldn't
+delete records where:
+
+=over 4
+
+=item * GroupId == MemberId
+
+=item * exists matching GM
+
+=item * exists equivalent GM->CGM pair
+
+=item * exists equivalent CGM->GM pair
+
+=over
+
+Query with most conditions in one NOT EXISTS subquery:
+
+    SELECT CGM1.id FROM
+        CachedGroupMembers CGM1
+        JOIN CachedGroupMembers CGMA ON CGMA.MemberId = G
+        JOIN CachedGroupMembers CGMD ON CGMD.GroupId = M
+    WHERE
+        CGM1.GroupId = CGMA.GroupId AND CGM1.MemberId = CGMD.MemberId
+        AND CGM1.GroupId != CGM1.MemberId
+        AND NOT EXISTS (
+            SELECT * FROM
+                CachedGroupMembers CGML
+                CROSS JOIN GroupMembers GM
+                CROSS JOIN CachedGroupMembers CGMR
+            WHERE
+                CGML.GroupId = CGM1.GroupId
+                AND GM.GroupId = CGML.MemberId
+                AND CGMR.GroupId = GM.MemberId
+                AND CGMR.MemberId = CGM1.MemberId
+                AND (
+                    (CGML.GroupId = CGML.MemberId AND CGMR.GroupId != CGMR.MemberId)
+                    OR 
+                    (CGML.GroupId != CGML.MemberId AND CGMR.GroupId = CGMR.MemberId)
+                )
+        )
+    ;
+
+=head4 mysql performance
+
+It's better than first solution, but still it's not linear. Problem is that
+NOT EXISTS means that for every link that should be deleted we have to check too
+many conditions (too many rows to scan). Still delete + insert behave better and
+more linear.
+
+=head3 Alternative ways
+
+Store additional info in a table, similar to Via and IP we had. Then we can
+do iterative delete like in the last solution. However, this will slowdown
+insert, probably not that much as I suspect we would be able to push new data
+in one query.
+
+=head2 TODO
+
+Update disabled on delete. Update SetDisabled method. Delete all uses of Via and
+IntermidiateParent. Review indexes on all databases. Create upgrade script.
+
+=cut
 
 sub _CoreAccessible {
     {

-----------------------------------------------------------------------


More information about the Rt-commit mailing list