[Rt-commit] r3578 - in WWW-Mechanize-FromRecording: . bin lib/WWW/Mechanize t

glasser at bestpractical.com glasser at bestpractical.com
Tue Aug 2 13:00:24 EDT 2005


Author: glasser
Date: Tue Aug  2 13:00:23 2005
New Revision: 3578

Added:
   WWW-Mechanize-FromRecording/bin/
   WWW-Mechanize-FromRecording/bin/mech-from-recording
Removed:
   WWW-Mechanize-FromRecording/t/.01.basic.t.swo
Modified:
   WWW-Mechanize-FromRecording/   (props changed)
   WWW-Mechanize-FromRecording/lib/WWW/Mechanize/FromRecording.pm
   WWW-Mechanize-FromRecording/t/01.basic.t
Log:
 r38280 at tin-foil:  glasser | 2005-08-01 15:02:28 -0400
  * Add a bin/mech-from-recording script
  * handle redirects
  * display text of html pages
  * try to guess clicked links


Added: WWW-Mechanize-FromRecording/bin/mech-from-recording
==============================================================================
--- (empty file)
+++ WWW-Mechanize-FromRecording/bin/mech-from-recording	Tue Aug  2 13:00:23 2005
@@ -0,0 +1,43 @@
+#!/usr/bin/perl -w
+
+eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
+    if 0; # not running under some shell
+
+=head1 NAME
+
+mech-from-recording - Makes a WWW::Mechanize script from an HTTP recording
+
+=cut
+
+use strict;
+BEGIN { 
+    unless (eval {require WWW::Mechanize::FromRecording; 1}) {
+	use lib 'lib';
+	require WWW::Mechanize::FromRecording;
+    } 
+}
+use Getopt::Long;
+use Pod::Usage;
+
+GetOptions( 
+    help =>     sub { pod2usage(1); },
+) or pod2usage(2);
+
+=head1 SYNOPSIS
+
+mech-from-recording prefix
+
+Options:
+
+    --help          Show this message
+
+The prefix must be the prefix of the recording files, like C</tmp/http-server-simple-recorder>.
+
+=cut
+
+my $prefix = shift or die "Must specify a prefix for the recording files\n";
+
+my $FR = WWW::Mechanize::FromRecording->new($prefix);
+
+print $FR->mech_script;
+

Modified: WWW-Mechanize-FromRecording/lib/WWW/Mechanize/FromRecording.pm
==============================================================================
--- WWW-Mechanize-FromRecording/lib/WWW/Mechanize/FromRecording.pm	(original)
+++ WWW-Mechanize-FromRecording/lib/WWW/Mechanize/FromRecording.pm	Tue Aug  2 13:00:23 2005
@@ -12,6 +12,8 @@
 use HTTP::Request;
 use File::Basename ();
 use IO::Dir;
+use HTML::TreeBuilder;
+use HTML::FormatText;
 
 =head1 NAME
 
@@ -90,6 +92,8 @@
 END_HEAD
 
     my $last_real_URL;
+    my %last_links;
+    my %redirected;
 
     for my $messages ($self->_messages) {
 	my $request = $messages->{'request'};
@@ -99,13 +103,17 @@
 
 	my $line = qq(\$mech->get(q{$URL}););
 
-	my $comment = sub { $line .= "\n# ($_[0])" };
+	my $comment = sub { my $new = shift; $new =~ s/^/# /mg; $line .= "\n$new" };
 	my $comment_out = sub { $line = "# $line"; $comment->(@_) };
 
 	my $ct = $response->content_type;
 
 	unless ($request->method eq 'GET') {
 	    $comment_out->("method was " . $request->method . ", not GET");
+	} elsif ($response->code == 302) {
+	    my $to = $response->header('Location');
+	    $comment->("redirected to $to");
+	    $redirected{$to} = $URL;
 	} elsif ($response->code != 200) {
 	    $comment_out->("response code was " . $response->code . ", not 200");
 	} elsif (not defined $ct or not length $ct) {
@@ -115,14 +123,43 @@
 	} else {
 	    # Looks good.  What else can we glean?
 	    
+	    if (exists $redirected{$request->uri}) {
+		$comment_out->("we were probably redirected here from " . delete $redirected{$request->uri});
+	    }
+	    
 	    my $referer = $request->header('Referer');
 	    if (defined $referer and length $referer) {
 		$comment->("looks like we got here from $referer");
 
-		$comment->("hey, that's the last page we went to. maybe we clicked a link.")
-		    if $last_real_URL and $referer eq $last_real_URL;
+	    	if ($last_real_URL and $referer eq $last_real_URL) {
+		    $comment->("hey, that's the last page we went to. maybe we clicked a link.");
+		    if ($last_links{$URL}) {
+			$comment->("maybe this would do the trick:");
+			$comment->("\$mech->follow_link(q{$last_links{$URL}})");
+		    } else {
+			$comment->("(but I don't see one there)");
+		    } 
+		} 
 	    } 
 
+	    my $html_tree = HTML::TreeBuilder->new_from_content($response->content);
+
+	    %last_links = ();
+
+	    for (@{ $html_tree->extract_links('a') }) {
+		my ($link, $element, $attr, $tag) = @$_;
+		next unless lc $attr eq 'href';
+
+		my $uri = URI->new($link)->abs($URL); # XXX TODO FIXME deal with base href
+		
+		$last_links{$uri} = $element->as_trimmed_text;
+	    }
+
+	    my $text = HTML::FormatText->new->format($html_tree);
+	    $comment->("text of page is:");
+	    $text =~ s/^/| /mg;
+	    $comment->($text);
+
 	    $last_real_URL = $URL;
 	} 
 

Modified: WWW-Mechanize-FromRecording/t/01.basic.t
==============================================================================
--- WWW-Mechanize-FromRecording/t/01.basic.t	(original)
+++ WWW-Mechanize-FromRecording/t/01.basic.t	Tue Aug  2 13:00:23 2005
@@ -14,5 +14,3 @@
 
 like($script, qr/my \$mech = WWW::Mechanize->new;/);
 
-warn $script;
-


More information about the Rt-commit mailing list