[Bps-public-commit] r13729 - CPAN2RT/lib
ruz at bestpractical.com
ruz at bestpractical.com
Wed Jul 2 15:12:57 EDT 2008
Author: ruz
Date: Wed Jul 2 15:12:49 2008
New Revision: 13729
Modified:
CPAN2RT/lib/CPAN2RT.pm
Log:
* switch from 01mailrc.txt.gz to 00whois.xml
** use XML::SAX parser to minimize memory impact
** filter maintainers by type, set only users with
type 'author' as maintainers and skip lists
Modified: CPAN2RT/lib/CPAN2RT.pm
==============================================================================
--- CPAN2RT/lib/CPAN2RT.pm (original)
+++ CPAN2RT/lib/CPAN2RT.pm Wed Jul 2 15:12:49 2008
@@ -87,7 +87,7 @@
my @files = qw(
indices/find-ls.gz
- authors/01mailrc.txt.gz
+ authors/00whois.xml
modules/06perms.txt.gz
modules/02packages.details.txt.gz
);
@@ -134,26 +134,19 @@
sub _authors {
my $self = shift;
- my $file = '01mailrc.txt';
+ my $file = '00whois.xml';
debug { "Parsing $file...\n" };
my $path = $self->file_path( $file );
- open my $fh, "<:utf8", $path or die "Couldn't open '$path': $!";
- my %res;
- while ( my $str = <$fh> ) {
- chomp $str;
- my ($cpanid, $real_name, $email) = ($str =~ m{^alias\s+([A-Z0-9]+)\s+"([^<>]*?)\s*<([^>]+)>"$});
- unless ( $cpanid ) {
- debug { "couldn't parse '$str'\n" };
- next;
- }
- $res{ $cpanid } = {
- real_name => $real_name,
- email_address => $self->parse_email_address($email) || $cpanid .'@cpan.org',
- };
- }
+ use XML::SAX::ParserFactory;
+ my $handler = CPAN2RT::UsersSAXParser->new();
+ my $p = XML::SAX::ParserFactory->parser(Handler => $handler);
+
+ open my $fh, "<:raw", $path or die "Couldn't open '$path': $!";
+ my $res = $p->parse_file( $fh );
close $fh;
- return \%res;
+
+ return $res;
}
{ my $cache;
@@ -272,7 +265,7 @@
my @errors;
my $authors = $self->authors;
while ( my ($cpanid, $meta) = each %$authors ) {
- my ($user, @msg) = $self->load_or_create_user( $cpanid, @{ $meta }{qw(real_name email_address)} );
+ my ($user, @msg) = $self->load_or_create_user( $cpanid, @{ $meta }{qw(fullname email)} );
push @errors, @msg unless $user;
}
return (undef, @errors) if @errors;
@@ -379,10 +372,17 @@
return map uc $_->Name, @{ $users->ItemsArrayRef };
}
+sub filter_maintainers {
+ my $self = shift;
+ my $authors = $self->authors;
+ return grep { ($authors->{$_}{'type'}||'') eq 'author' } @_;
+}
+
sub set_maintainers {
my $self = shift;
my $queue = shift;
- my @maints = @_;
+
+ my @maints = $self->filter_maintainers( @_ );
my @current = $self->current_maintainers( $queue );
my @errors;
@@ -476,7 +476,6 @@
return (1);
}
-
sub add_versions {
my $self = shift;
my ($queue, @versions) = @_;
@@ -735,3 +734,52 @@
}
1;
+
+package CPAN2RT::UsersSAXParser;
+use base qw(XML::SAX::Base);
+
+sub start_document {
+ my ($self, $doc) = @_;
+ $self->{'res'} = {};
+}
+
+sub start_element {
+ my ($self, $el) = @_;
+ my $name = $el->{LocalName};
+ return if $name ne 'cpanid' && !$self->{inside};
+
+ if ( $name eq 'cpanid' ) {
+ $self->{inside} = 1;
+ $self->{tmp} = [];
+ return;
+ } else {
+ $self->{inside_prop} = 1;
+ }
+
+ push @{ $self->{'tmp'} }, $name, '';
+}
+
+sub characters {
+ my ($self, $el) = @_;
+ $self->{'tmp'}[-1] .= $el->{Data} if $self->{inside_prop};
+}
+
+sub end_element {
+ my ($self, $el) = @_;
+ $self->{inside_prop} = 0;
+
+ my $name = $el->{LocalName};
+
+ if ( $name eq 'cpanid' ) {
+ $self->{inside} = 0;
+ my %rec = map Encode::decode_utf8($_), @{ delete $self->{'tmp'} };
+ $self->{'res'}{ delete $rec{'id'} } = \%rec;
+ }
+}
+
+sub end_document {
+ my ($self) = @_;
+ return $self->{'res'};
+}
+
+1;
More information about the Bps-public-commit
mailing list