[Bps-public-commit] r20130 - CPAN2RT/lib

ruz at bestpractical.com ruz at bestpractical.com
Wed Nov 11 08:37:14 EST 2009


Author: ruz
Date: Wed Nov 11 08:37:11 2009
New Revision: 20130

Modified:
   CPAN2RT/lib/CPAN2RT.pm

Log:
improve memory usage

* file2distinfo method to convert file and filter
* two new for_...($callback) methods for stream processing
* drop all_distributions method
* flush cache

Modified: CPAN2RT/lib/CPAN2RT.pm
==============================================================================
--- CPAN2RT/lib/CPAN2RT.pm	(original)
+++ CPAN2RT/lib/CPAN2RT.pm	Wed Nov 11 08:37:11 2009
@@ -210,6 +210,16 @@
 
 sub _module2file {
     my $self = shift;
+
+    my %res;
+    $self->for_mapped_distributions( sub { $res{ $_[0] } = $_[2] } );
+    return \%res;
+}
+
+sub for_mapped_distributions {
+    my $self = shift;
+    my $callback = shift;
+
     my $file = '02packages.details.txt';
     debug { "Parsing $file...\n" };
     my $path = $self->file_path( $file );
@@ -217,38 +227,28 @@
 
     $self->skip_header( $fh );
 
-    my %res;
     while ( my $str = <$fh> ) {
         chomp $str;
 
         my ($module, $mver, $file) = split /\s+/, $str;
         unless ( $module && $file ) {
-            debug { "couldn't parse '$str'\n" };
+            debug { "couldn't parse '$str' in '$file'" };
             next;
         }
-        $res{ $module } = $file;
+        $callback->( $module, $mver, $file );
     }
     close $fh;
-
-    return \%res;
 }
 
-
-{ my $cache;
-sub all_distributions {
+sub for_all_distributions {
     my $self = shift;
-    $cache = $self->_all_distributions() unless $cache;
-    return $cache;
-} }
+    my $callback = shift;
 
-sub _all_distributions {
-    my $self = shift;
     my $file = 'find-ls';
     debug { "Parsing $file...\n" };
     my $path = $self->file_path( $file );
     open my $fh, "<:utf8", $path or die "Couldn't open '$path': $!";
 
-    my %res;
     while ( my $str = <$fh> ) {
         next if $str =~ /^\d+\s+0\s+l\s+1/; # skip symbolic links
         chomp $str;
@@ -259,18 +259,12 @@
         next unless index($file, "authors/id/") == 0;
         next unless $file =~ /\.(bz2|zip|tgz|tar\.gz)$/i;
 
-        my $info = CPAN::DistnameInfo->new( $file );
-        my $dist = $info->dist;
-        unless ( $dist ) {
-            debug { "Couldn't parse distribution name from '$file'\n" };
-            next;
-        }
-        push @{ $res{ $dist }{'versions'} ||= [] }, $info->version;
-        push @{ $res{ $dist }{'uploaders'} ||= [] }, $info->cpanid;
+        my $info = $self->file2distinfo( $file )
+            or next;
+
+        $callback->( $info );
     }
     close $fh;
-
-    return \%res;
 }
 
 sub sync_authors {
@@ -281,11 +275,13 @@
         return (1);
     }
 
-    my @errors;
+    my ($i, @errors) = (0);
     my $authors = $self->authors;
     while ( my ($cpanid, $meta) = each %$authors ) {
         my ($user, @msg) = $self->load_or_create_user( $cpanid, @{ $meta }{qw(fullname email)} );
         push @errors, @msg unless $user;
+
+        DBIx::SearchBuilder::Record::Cachable->FlushCache unless ++$i % 100;
     }
     return (undef, @errors) if @errors;
     return (1);
@@ -299,43 +295,71 @@
         return (1);
     }
 
-    my @files = uniq values %{ $self->module2file };
-    my $all_dists = $self->all_distributions;
+    my @errors;
 
-    my %tmp;
-    foreach my $file ( @files ) {
-        my $info = CPAN::DistnameInfo->new( "authors/id/$file" );
-        my $dist = $info->dist;
-        unless ( $dist ) {
-            debug { "Couldn't parse distribution name from '$file'\n" };
-            next;
-        }
-        if ( $dist =~ /^(parrot|perl)$/i ) {
-            debug { "Skipping $dist as it's hard coded to be skipped." };
-            next;
-        }
+    my $last = ''; my $i = 0;
+    my $syncer = sub {
+        my $file = $_[2];
+        return if $last eq $file;
 
-        $tmp{ $dist } ||= [];
-        if ( my $v = $info->version ) {
-            push @{ $tmp{ $dist } }, $v;
-        }
-        push @{ $tmp{ $dist } }, @{ $all_dists->{ $dist }{'versions'} || [] };
+        $last = $file;
+
+        my $info = $self->file2distinfo( "authors/id/$file" )
+            or return;
+
+        my ($queue, @msg) = $self->load_or_create_queue( $info->dist );
+        push @errors, @msg unless $queue;
+
+        # we don't sync version here as sync_versions does this better
+
+        DBIx::SearchBuilder::Record::Cachable->FlushCache unless ++$i % 100;
+    };
+    $self->for_mapped_distributions( $syncer );
+
+    return (undef, @errors) if @errors;
+    return (1);
+}
+
+sub sync_versions {
+    my $self = shift;
+    my $force = shift;
+    if ( !$force && !$self->is_new_file( '02packages.details.txt' ) ) {
+        debug { "Skip syncing, file's not changed\n" };
+        return (1);
     }
 
+    my $i = 0;
     my @errors;
-    while ( my ($dist, $versions) = each %tmp ) {
-        my ($queue, @msg) = $self->load_or_create_queue( $dist );
+    my ($last_dist, @last_versions) = ('');
+    my $syncer = sub {
+        return unless $last_dist && @last_versions;
+
+        my $queue = $self->load_queue( $last_dist );
         unless ( $queue ) {
-            push @errors, @msg;
-            next;
+            debug { "No queue for dist '$last_dist'" };
+            return;
         }
-        if ( $versions && @$versions ) {
-            my ($status, @msg) = $self->add_versions( $queue, @$versions );
-            push @errors, @msg unless $status;
+
+        my ($status, @msg) = $self->add_versions( $queue, @last_versions );
+        push @errors, @msg unless $status;
+
+        DBIx::SearchBuilder::Record::Cachable->FlushCache unless ++$i % 100;
+    };
+    my $collector = sub {
+        my $info = shift;
+
+        my $dist = $info->dist;
+        if ( $dist ne $last_dist ) {
+            $syncer->();
+            $last_dist = $dist;
+            @last_versions = ();
         }
-    }
 
-    %tmp = ();
+        push @last_versions, $info->version;
+    };
+
+    $self->for_all_distributions( $collector );
+    $syncer->(); # last portion
 
     return (undef, @errors) if @errors;
     return (1);
@@ -357,14 +381,13 @@
         my $file = $m2f->{ $module };
         next unless $file;
 
-        my $dist = CPAN::DistnameInfo->new( "authors/id/$file" )->dist;
-        unless ( $dist ) {
-            debug { "Couldn't parse distribution name from '$file'\n" };
-            next;
-        }
-        push @{ $res{ $dist } ||= [] }, @$maint;
+        my $info = $self->file2distinfo( "authors/id/$file" )
+            or next;
+
+        push @{ $res{ $info->dist } ||= [] }, @$maint;
     }
 
+    my $i = 0;
     my @errors = ();
     while ( my ($dist, $maint) = each %res ) {
         my ($queue, @msg) = $self->load_or_create_queue( $dist );
@@ -376,6 +399,8 @@
         my $status;
         ($status, @msg) = $self->set_maintainers( $queue, @$maint );
         push @errors, @msg unless $status;
+
+        DBIx::SearchBuilder::Record::Cachable->FlushCache unless ++$i % 100;
     }
     %res = ();
     return (undef, @errors) if @errors;
@@ -611,27 +636,36 @@
     return ($user)
 }
 
-sub load_or_create_queue {
+sub load_queue {
     my $self = shift;
     my $dist = shift;
 
     my $queue = RT::Queue->new( $RT::SystemUser );
-    # Try to load up the current queue by name.  Avoids duplication.
     $queue->Load( $dist );
-    unless ( $queue->id ) {
-        my ($status, $msg) = $queue->Create(
-            Name               => $dist,
-            Description        => "Bugs in $dist",
-            CorrespondAddress  => "bug-$dist\@rt.cpan.org",
-            CommentAddress     => "comment-$dist\@rt.cpan.org",
-        );
-        unless ( $status ) {
-            return (undef, "Couldn't create queue '$dist': $msg\n");
-        }
-        debug { "Created queue #". $queue->id ." for dist ". $queue->Name ."\n" };
-    } else {
-        debug { "Found queue #". $queue->id ." for dist ". $queue->Name ."\n" };
+    return undef unless $queue->id;
+
+    debug { "Found queue #". $queue->id ." for dist ". $queue->Name ."\n" };
+    return $queue;
+}
+
+sub load_or_create_queue {
+    my $self = shift;
+    my $dist = shift;
+
+    my $queue = $self->load_queue( $dist );
+    return $queue if $queue;
+
+    $queue = RT::Queue->new( $RT::SystemUser );
+    my ($status, $msg) = $queue->Create(
+        Name               => $dist,
+        Description        => "Bugs in $dist",
+        CorrespondAddress  => "bug-$dist\@rt.cpan.org",
+        CommentAddress     => "comment-$dist\@rt.cpan.org",
+    );
+    unless ( $status ) {
+        return (undef, "Couldn't create queue '$dist': $msg\n");
     }
+    debug { "Created queue #". $queue->id ." for dist ". $queue->Name ."\n" };
     return $queue;
 }
 
@@ -712,6 +746,23 @@
     return $address->address;
 }
 
+sub file2distinfo {
+    my $self = shift;
+    my $file = shift or return undef;
+
+    my $info = CPAN::DistnameInfo->new( $file );
+    my $dist = $info->dist;
+    unless ( $dist ) {
+        debug { "Couldn't parse distribution name from '$file'\n" };
+        return undef;
+    }
+    if ( $dist =~ /^(parrot|perl)$/i ) {
+        debug { "Skipping $dist as it's hard coded to be skipped." };
+        return undef;
+    }
+    return $info;
+}
+
 sub file_path {
     my $self = shift;
     my $file = shift;



More information about the Bps-public-commit mailing list