[Bps-public-commit] www-mechanize branch, master, updated. 1.72-10-gfe027ba

Jesse Vincent jesse at bestpractical.com
Sat Mar 24 14:53:59 EDT 2012


The branch, master has been updated
       via  fe027ba1816ef42c3e2d0ec2b1c9ac265403bd89 (commit)
       via  de5d63eea151bf737a48b99179e2d8a4c3b2fbd0 (commit)
       via  09a12d29669934a68acb972efa8271b6a332ffa0 (commit)
       via  4f25335bddc43f08d2857486c813af57ff028434 (commit)
       via  50f451a3e4972891d7453dd0805c3541a61c6793 (commit)
       via  a6d3be53397f9ee374ffb2b54839b279d9a8f0e3 (commit)
       via  68fac0884fe7c1f0232eab0c854310b1647d3d25 (commit)
       via  bcf10333376c2d2850f41e4bc51b242b1b384803 (commit)
       via  81c5df7395b61db74cde774da208fc75568d79b4 (commit)
      from  da11c71577b92190966335fda72070259f153822 (commit)

Summary of changes:
 lib/WWW/Mechanize.pm      |  128 ++++++++++++++++++++++++++++++++++++++-------
 lib/WWW/Mechanize/FAQ.pod |    2 +-
 t/content.t               |   34 ++++++++++++-
 t/dump.t                  |   32 +++++++++++
 t/save_content.html       |    8 +++
 t/save_content.t          |   23 +++++++-
 6 files changed, 203 insertions(+), 24 deletions(-)
 create mode 100644 t/dump.t
 create mode 100644 t/save_content.html

- Log -----------------------------------------------------------------
commit 81c5df7395b61db74cde774da208fc75568d79b4
Author: Alexandr Ciornii <alexchorny at gmail.com>
Date:   Sat Dec 3 20:18:53 2011 +0200

    typo from annocpan

diff --git a/lib/WWW/Mechanize/FAQ.pod b/lib/WWW/Mechanize/FAQ.pod
index d20ac19..8de8508 100644
--- a/lib/WWW/Mechanize/FAQ.pod
+++ b/lib/WWW/Mechanize/FAQ.pod
@@ -166,7 +166,7 @@ I<submit_form()>, and I<request()> methods.
 
     my $mech = WWW::Mechanize->new( autocheck => 1 );
     $mech->get( 'http://my.site.com' );
-    my $res = $mech->response();
+    my $response = $mech->response();
     for my $key ( $response->header_field_names() ) {
         print $key, " : ", $response->header( $key ), "\n";
     }

commit bcf10333376c2d2850f41e4bc51b242b1b384803
Author: Alexandr Ciornii <alexchorny at gmail.com>
Date:   Sat Dec 3 21:01:48 2011 +0200

    dump_headers can now use a file name as a parameter

diff --git a/lib/WWW/Mechanize.pm b/lib/WWW/Mechanize.pm
index 01353c9..d3f82eefb 100644
--- a/lib/WWW/Mechanize.pm
+++ b/lib/WWW/Mechanize.pm
@@ -2054,14 +2054,27 @@ Prints a dump of the HTTP response headers for the most recent
 response.  If I<$fh> is not specified or is undef, it dumps to
 STDOUT.
 
-Unlike the rest of the dump_* methods, you cannot specify a filehandle
-to print to.
+Unlike the rest of the dump_* methods, $fh can be a scalar. It
+will be used as a file name.
 
 =cut
 
+sub _get_fh_default_stdout {
+    my $self = shift;
+    my $p = shift || '';
+    if ( !$p ) {
+        return \*STDOUT;
+    } elsif ( !ref($p) ) {
+        open my $fh, '>', $p or $self->die( "Unable to write to $p: $!" );;
+        return $fh;
+    } else {
+        return $p;
+    }
+}
+
 sub dump_headers {
     my $self = shift;
-    my $fh   = shift || \*STDOUT;
+    my $fh   = $self->_get_fh_default_stdout(shift);
 
     print {$fh} $self->response->headers_as_string;
 
diff --git a/t/dump.t b/t/dump.t
new file mode 100644
index 0000000..04c1652
--- /dev/null
+++ b/t/dump.t
@@ -0,0 +1,32 @@
+#!perl -Tw
+
+use warnings;
+use strict;
+use Test::More tests => 5;
+use URI::file;
+
+BEGIN {
+    use_ok( 'WWW::Mechanize' );
+}
+
+my $mech = WWW::Mechanize->new( cookie_jar => undef );
+isa_ok( $mech, 'WWW::Mechanize' );
+
+my $uri = URI::file->new_abs( 't/find_inputs.html' )->as_string;
+
+$mech->get( $uri );
+ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page};
+my $fn = 'headers.tmp';
+$mech->dump_headers($fn);
+ok( -e $fn );
+unlink('headers.tmp');
+
+my $content;
+open my $fh, '>', \$content;
+$mech->dump_headers( $fh );
+like( $content, qr/Content-Length/ );
+close $fh;
+
+END {
+    unlink('headers.tmp');
+}

commit 68fac0884fe7c1f0232eab0c854310b1647d3d25
Author: Alexandr Ciornii <alexchorny at gmail.com>
Date:   Sat Dec 3 21:27:03 2011 +0200

    add WWW::Scripter to list of subclasses

diff --git a/lib/WWW/Mechanize.pm b/lib/WWW/Mechanize.pm
index d3f82eefb..4dbf46d 100644
--- a/lib/WWW/Mechanize.pm
+++ b/lib/WWW/Mechanize.pm
@@ -2922,6 +2922,8 @@ Just like Mech, but using Microsoft Internet Explorer to do the work.
 
 =item * L<WWW::Yahoo::Groups>
 
+=item * L<WWW::Scripter>
+
 =back
 
 =head1 ACKNOWLEDGEMENTS

commit a6d3be53397f9ee374ffb2b54839b279d9a8f0e3
Author: Flavio Poletti <flavio at polettix.it>
Date:   Sat Jan 21 23:19:24 2012 +0100

    linkified URIs in POD documentation

diff --git a/lib/WWW/Mechanize.pm b/lib/WWW/Mechanize.pm
index 4dbf46d..1cd57ca 100644
--- a/lib/WWW/Mechanize.pm
+++ b/lib/WWW/Mechanize.pm
@@ -2773,8 +2773,8 @@ __END__
 WWW::Mechanize is hosted at GitHub, though the bug tracker still
 lives at Google Code.
 
-Repository: https://github.com/bestpractical/www-mechanize/.  
-Bugs: http://code.google.com/p/www-mechanize/issues
+Repository: L<https://github.com/bestpractical/www-mechanize/>.
+Bugs: L<http://code.google.com/p/www-mechanize/issues>.
 
 =head1 OTHER DOCUMENTATION
 

commit 50f451a3e4972891d7453dd0805c3541a61c6793
Author: Flavio Poletti <flavio at polettix.it>
Date:   Sun Jan 22 00:46:06 2012 +0100

    extended content with new args

diff --git a/lib/WWW/Mechanize.pm b/lib/WWW/Mechanize.pm
index 1cd57ca..5ef4fac 100644
--- a/lib/WWW/Mechanize.pm
+++ b/lib/WWW/Mechanize.pm
@@ -591,7 +591,8 @@ sub title {
 =head2 $mech->content(...)
 
 Returns the content that the mech uses internally for the last page
-fetched. Ordinarily this is the same as $mech->response()->content(),
+fetched. Ordinarily this is the same as
+C<< $mech->response()->decoded_content() >>,
 but this may differ for HTML documents if L</update_html> is
 overloaded (in which case the value passed to the base-class
 implementation of same will be returned), and/or extra named arguments
@@ -603,31 +604,56 @@ are passed to I<content()>:
 
 Returns a text-only version of the page, with all HTML markup
 stripped. This feature requires I<HTML::TreeBuilder> to be installed,
-or a fatal error will be thrown.
+or a fatal error will be thrown. This works only if the contents are
+HTML.
 
 =item I<< $mech->content( base_href => [$base_href|undef] ) >>
 
 Returns the HTML document, modified to contain a
 C<< <base href="$base_href"> >> mark-up in the header.
 I<$base_href> is C<< $mech->base() >> if not specified. This is
-handy to pass the HTML to e.g. L<HTML::Display>.
+handy to pass the HTML to e.g. L<HTML::Display>. This works only if
+the contents are HTML.
+
+
+=item I<< $mech->content( raw => 1 ) >>
+
+Returns C<< $self->response()->content() >>, i.e. the raw contents from the
+response.
+
+=item I<< $mech->content( decoded_by_headers => 1 ) >>
+
+Returns the content after applying all C<Content-Encoding> headers but
+with not additional mangling.
+
+=item I<< $mech->content( charset => $charset ) >>
+
+Returns C<< $self->response()->decoded_content(charset => $charset) >>
+(see L<HTTP::Response> for details).
 
 =back
 
-Passing arguments to C<content()> if the current document is not
-HTML has no effect now (i.e. the return value is the same as
-C<< $self->response()->content() >>. This may change in the future,
-but will likely be backwards-compatible when it does.
+To preserve backwards compatibility, additional parameters will be
+ignored unless none of C<< raw | decoded_by_headers | charset >> is
+specified and the text is HTML, in which case an error will be triggered.
 
 =cut
 
 sub content {
     my $self = shift;
-    my $content = $self->{content};
-
-    if ( $self->is_html ) {
-        my %parms = @_;
+    my %parms = @_;
 
+    my $content = $self->{content};
+    if (delete $parms{raw}) {
+        $content = $self->response()->content();
+    }
+    elsif (delete $parms{decoded_by_headers}) {
+        $content = $self->response()->decoded_content(charset => 'none');
+    }
+    elsif (my $charset = delete $parms{charset}) {
+        $content = $self->response()->decoded_content(charset => $charset);
+    }
+    elsif ( $self->is_html ) {
         if ( exists $parms{base_href} ) {
             my $base_href = (delete $parms{base_href}) || $self->base;
             $content=~s/<head>/<head>\n<base href="$base_href">/i;
diff --git a/t/content.t b/t/content.t
index d5f2126..00da666 100644
--- a/t/content.t
+++ b/t/content.t
@@ -2,7 +2,7 @@
 
 use warnings;
 use strict;
-use Test::More tests => 5;
+use Test::More tests => 8;
 
 =head1 NAME
 
@@ -63,3 +63,35 @@ like($content, qr/base href="foo"/, 'Found the base href');
 $content = $mech->content(base_href => undef);
 like($content, qr[base href="http://example.com/"], 'Found the new base href');
 
+$mech->{res} = Test::MockResponse->new(
+   raw_content => 'this is the raw content',
+   charset_none => 'this is a slightly decoded content',
+   charset_whatever => 'this is charset whatever',
+);
+
+$content = $mech->content(raw => 1);
+is($content, 'this is the raw content', 'raw => 1');
+
+$content = $mech->content(decoded_by_headers => 1);
+is($content, 'this is a slightly decoded content', 'decoded_by_headers => 1');
+
+$content = $mech->content(charset => 'whatever');
+is($content, 'this is charset whatever', 'charset => ...');
+
+package Test::MockResponse;
+
+sub new {
+   my $package = shift;
+   return bless { @_ }, $package;
+}
+
+sub content {
+   my ($self) = @_;
+   return $self->{raw_content};
+}
+
+sub decoded_content {
+   my ($self, %opts) = @_;
+   return $self->{decoded_content} unless exists $opts{charset};
+   return $self->{"charset_$opts{charset}"};
+}

commit 4f25335bddc43f08d2857486c813af57ff028434
Author: Flavio Poletti <flavio at polettix.it>
Date:   Sun Jan 22 01:03:20 2012 +0100

    more robust save_content

diff --git a/lib/WWW/Mechanize.pm b/lib/WWW/Mechanize.pm
index 5ef4fac..36bb4c4 100644
--- a/lib/WWW/Mechanize.pm
+++ b/lib/WWW/Mechanize.pm
@@ -2067,7 +2067,7 @@ sub save_content {
 
     open( my $fh, '>', $filename ) or $self->die( "Unable to create $filename: $!" );
     binmode $fh unless $self->content_type =~ m{^text/};
-    print {$fh} $self->content or $self->die( "Unable to write to $filename: $!" );
+    print {$fh} $self->content(decoded_by_headers => 1) or $self->die( "Unable to write to $filename: $!" );
     close $fh or $self->die( "Unable to close $filename: $!" );
 
     return;
diff --git a/t/save_content.html b/t/save_content.html
new file mode 100644
index 0000000..03091aa
--- /dev/null
+++ b/t/save_content.html
@@ -0,0 +1,8 @@
+<html>
+<head>
+    <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+</head>
+<body>
+   Però poi si vedrà!!!
+</body>
+</html>
diff --git a/t/save_content.t b/t/save_content.t
index 623e077..813f488 100644
--- a/t/save_content.t
+++ b/t/save_content.t
@@ -14,7 +14,7 @@ BEGIN {
 my $mech = WWW::Mechanize->new( cookie_jar => undef );
 isa_ok( $mech, 'WWW::Mechanize' );
 
-my $original = 't/find_inputs.html';
+my $original = 't/save_content.html';
 my $saved = 'saved.test.txt';
 
 my $uri = URI::file->new_abs( $original )->as_string;

commit 09a12d29669934a68acb972efa8271b6a332ffa0
Author: Flavio Poletti <flavio at polettix.it>
Date:   Sun Jan 22 01:20:56 2012 +0100

    save_content is more backwards-compatible

diff --git a/lib/WWW/Mechanize.pm b/lib/WWW/Mechanize.pm
index 36bb4c4..76e6b90 100644
--- a/lib/WWW/Mechanize.pm
+++ b/lib/WWW/Mechanize.pm
@@ -2067,7 +2067,7 @@ sub save_content {
 
     open( my $fh, '>', $filename ) or $self->die( "Unable to create $filename: $!" );
     binmode $fh unless $self->content_type =~ m{^text/};
-    print {$fh} $self->content(decoded_by_headers => 1) or $self->die( "Unable to write to $filename: $!" );
+    print {$fh} $self->content(@_) or $self->die( "Unable to write to $filename: $!" );
     close $fh or $self->die( "Unable to close $filename: $!" );
 
     return;
diff --git a/t/save_content.t b/t/save_content.t
index 813f488..89b0bbc 100644
--- a/t/save_content.t
+++ b/t/save_content.t
@@ -3,7 +3,7 @@
 use warnings;
 use strict;
 
-use Test::More tests => 5;
+use Test::More tests => 8;
 use URI::file;
 
 BEGIN {
@@ -14,8 +14,8 @@ BEGIN {
 my $mech = WWW::Mechanize->new( cookie_jar => undef );
 isa_ok( $mech, 'WWW::Mechanize' );
 
-my $original = 't/save_content.html';
-my $saved = 'saved.test.txt';
+my $original = 't/find_inputs.html';
+my $saved = 'saved1.test.txt';
 
 my $uri = URI::file->new_abs( $original )->as_string;
 
@@ -31,6 +31,25 @@ my $new_text = slurp( $saved );
 
 ok( $old_text eq $new_text, 'Saved copy matches the original' ) && unlink $saved;
 
+{
+    my $original = 't/save_content.html';
+    my $saved = 'saved2.test.txt';
+
+    my $uri = URI::file->new_abs( $original )->as_string;
+
+    $mech->get( $uri );
+    ok( $mech->success, "Fetched $uri" ) or die q{Can't get test page};
+
+    #unlink $saved;
+    ok( !-e $saved, "$saved does not exist" );
+    $mech->save_content( $saved, decoded_by_headers => 1 );
+
+    my $old_text = slurp( $original );
+    my $new_text = slurp( $saved );
+
+    ok( $old_text eq $new_text, 'Saved copy matches the original' ) && unlink $saved;
+}
+
 sub slurp {
     my $name = shift;
 

commit de5d63eea151bf737a48b99179e2d8a4c3b2fbd0
Author: Flavio Poletti <flavio at polettix.it>
Date:   Sun Jan 22 01:45:31 2012 +0100

    better "straight" binary handling

diff --git a/lib/WWW/Mechanize.pm b/lib/WWW/Mechanize.pm
index 76e6b90..ad31505 100644
--- a/lib/WWW/Mechanize.pm
+++ b/lib/WWW/Mechanize.pm
@@ -2051,7 +2051,7 @@ sub stack_depth {
     return $self->{stack_depth};
 }
 
-=head2 $mech->save_content( $filename )
+=head2 $mech->save_content( $filename, %opts )
 
 Dumps the contents of C<< $mech->content >> into I<$filename>.
 I<$filename> will be overwritten.  Dies if there are any errors.
@@ -2059,15 +2059,51 @@ I<$filename> will be overwritten.  Dies if there are any errors.
 If the content type does not begin with "text/", then the content
 is saved in binary mode.
 
+Additional arguments can be passed as I<key>/I<value> pairs:
+
+=over
+
+=item I<< $mech->save_content( $filename, binary => 1 ) >>
+
+Filehandle is set with C<binmode> to C<:raw> and contents are taken
+calling C<< $self->content(decoded_by_headers => 1) >>. Same as calling:
+
+    $mech->save_content( $filename, binmode =>  \':raw',
+                         decoded_by_headers => 1 );
+
+=item I<< $mech->save_content( $filename, binmode => $binmode ) >>
+
+Filehandle is set to binary mode. If C<$binmode> is a reference, it is
+dereferenced as a scalar and passed to C<binmode>:
+
+    binmode $fh, $$binmode;
+
+otherwise the filehandle is set to binary mode if C<$binmode> is true:
+
+    binmode $fh;
+
+=item I<all other arguments>
+
+are passed as-is to C<< $mech->content(%opts) >>
+
+=back
+
 =cut
 
 sub save_content {
     my $self = shift;
     my $filename = shift;
+    my %opts = @_;
+    if (delete $opts{binary}) {
+        $opts{binmode} = \':raw';
+        $opts{decoded_by_headers} = 1;
+    }
 
     open( my $fh, '>', $filename ) or $self->die( "Unable to create $filename: $!" );
-    binmode $fh unless $self->content_type =~ m{^text/};
-    print {$fh} $self->content(@_) or $self->die( "Unable to write to $filename: $!" );
+    if ((my $binmode = delete $opts{binmode}) || ($self->content_type() !~ m{^text/})) {
+        ref($binmode) ? binmode($fh, $$binmode) : binmode($fh);
+    }
+    print {$fh} $self->content(%opts) or $self->die( "Unable to write to $filename: $!" );
     close $fh or $self->die( "Unable to close $filename: $!" );
 
     return;
diff --git a/t/save_content.t b/t/save_content.t
index 89b0bbc..d43e7a0 100644
--- a/t/save_content.t
+++ b/t/save_content.t
@@ -42,7 +42,7 @@ ok( $old_text eq $new_text, 'Saved copy matches the original' ) && unlink $saved
 
     #unlink $saved;
     ok( !-e $saved, "$saved does not exist" );
-    $mech->save_content( $saved, decoded_by_headers => 1 );
+    $mech->save_content( $saved, binary => 1 );
 
     my $old_text = slurp( $original );
     my $new_text = slurp( $saved );

commit fe027ba1816ef42c3e2d0ec2b1c9ac265403bd89
Author: Flavio Poletti <flavio at polettix.it>
Date:   Wed Jan 25 01:17:17 2012 +0100

    made binmode a bit more intuitive

diff --git a/lib/WWW/Mechanize.pm b/lib/WWW/Mechanize.pm
index ad31505..7845e6e 100644
--- a/lib/WWW/Mechanize.pm
+++ b/lib/WWW/Mechanize.pm
@@ -2057,7 +2057,8 @@ Dumps the contents of C<< $mech->content >> into I<$filename>.
 I<$filename> will be overwritten.  Dies if there are any errors.
 
 If the content type does not begin with "text/", then the content
-is saved in binary mode.
+is saved in binary mode (i.e. C<binmode()> is set on the output
+filehandle).
 
 Additional arguments can be passed as I<key>/I<value> pairs:
 
@@ -2068,15 +2069,17 @@ Additional arguments can be passed as I<key>/I<value> pairs:
 Filehandle is set with C<binmode> to C<:raw> and contents are taken
 calling C<< $self->content(decoded_by_headers => 1) >>. Same as calling:
 
-    $mech->save_content( $filename, binmode =>  \':raw',
+    $mech->save_content( $filename, binmode => ':raw',
                          decoded_by_headers => 1 );
 
+This I<should> be the safest way to save contents verbatim.
+
 =item I<< $mech->save_content( $filename, binmode => $binmode ) >>
 
-Filehandle is set to binary mode. If C<$binmode> is a reference, it is
-dereferenced as a scalar and passed to C<binmode>:
+Filehandle is set to binary mode. If C<$binmode> begins with ':', it is
+passed as a parameter to C<binmode>:
 
-    binmode $fh, $$binmode;
+    binmode $fh, $binmode;
 
 otherwise the filehandle is set to binary mode if C<$binmode> is true:
 
@@ -2084,7 +2087,10 @@ otherwise the filehandle is set to binary mode if C<$binmode> is true:
 
 =item I<all other arguments>
 
-are passed as-is to C<< $mech->content(%opts) >>
+are passed as-is to C<< $mech->content(%opts) >>. In particular,
+C<decoded_by_headers> might come handy if you want to revert the effect
+of line compression performed by the web server but without further
+interpreting the contents (e.g. decoding it according to the charset).
 
 =back
 
@@ -2095,13 +2101,18 @@ sub save_content {
     my $filename = shift;
     my %opts = @_;
     if (delete $opts{binary}) {
-        $opts{binmode} = \':raw';
+        $opts{binmode} = ':raw';
         $opts{decoded_by_headers} = 1;
     }
 
     open( my $fh, '>', $filename ) or $self->die( "Unable to create $filename: $!" );
-    if ((my $binmode = delete $opts{binmode}) || ($self->content_type() !~ m{^text/})) {
-        ref($binmode) ? binmode($fh, $$binmode) : binmode($fh);
+    if ((my $binmode = delete($opts{binmode}) || '') || ($self->content_type() !~ m{^text/})) {
+        if (length($binmode) && (substr($binmode, 0, 1) eq ':')) {
+            binmode $fh, $binmode;
+        }
+        else {
+            binmode $fh;
+        }
     }
     print {$fh} $self->content(%opts) or $self->die( "Unable to write to $filename: $!" );
     close $fh or $self->die( "Unable to close $filename: $!" );

-----------------------------------------------------------------------



More information about the Bps-public-commit mailing list