[Bps-public-commit] html-gumbo branch, master, created. 9207c21cf501300b224d5635abd2b66a3be9af6c
Alex Vandiver
alexmv at bestpractical.com
Thu Jul 17 13:03:16 EDT 2014
The branch, master has been created
at 9207c21cf501300b224d5635abd2b66a3be9af6c (commit)
- Log -----------------------------------------------------------------
commit 6ee0b91b1efe685c70c0ca0053a7da8191929d55
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Tue Sep 24 15:31:06 2013 +0400
.gitignore
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fd33f3d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,13 @@
+Build
+_build/
+MYMETA.*
+pm_to_blib
+blib/
+MANIFEST.bak
+MANIFEST.new
+MANIFEST.old
+cover_db/
+nytprof/
+*.tar.gz
+*.sw[po]
+*.bak
commit 91186b96f3b6bbaa34f13939c13d550f4bff2332
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Mon Sep 30 18:33:47 2013 +0400
releng files
diff --git a/Build.PL b/Build.PL
new file mode 100644
index 0000000..cba75d6
--- /dev/null
+++ b/Build.PL
@@ -0,0 +1,18 @@
+use Module::Build;
+use Alien::LibGumbo;
+
+my $alien = Alien::LibGumbo->new;
+my $builder = Module::Build->new(
+ module_name => 'HTML::Gumbo',
+
+ configure_requires => {
+ 'Alien::LibGumbo' => 0,
+ },
+ build_requires => {
+ 'ExtUtils::CBuilder' => 0,
+ },
+
+ extra_compiler_flags => $alien->cflags(),
+ extra_linker_flags => $alien->libs(),
+);
+$builder->create_build_script;
\ No newline at end of file
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..410bd95
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,7 @@
+Build.PL
+lib/HTML/Gumbo.pm
+lib/HTML/Gumbo.xs
+MANIFEST This list of files
+t/callback.t
+t/string.t
+t/tree.t
diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP
new file mode 100644
index 0000000..3801b04
--- /dev/null
+++ b/MANIFEST.SKIP
@@ -0,0 +1,73 @@
+
+#!start included /Users/ruz/perl5/perlbrew/perls/perl-5.16.1/lib/site_perl/5.16.1/ExtUtils/MANIFEST.SKIP
+# Avoid version control files.
+\bRCS\b
+\bCVS\b
+\bSCCS\b
+,v$
+\B\.svn\b
+\B\.git\b
+\B\.gitignore\b
+\b_darcs\b
+\B\.cvsignore$
+
+# Avoid VMS specific MakeMaker generated files
+\bDescrip.MMS$
+\bDESCRIP.MMS$
+\bdescrip.mms$
+
+# Avoid Makemaker generated and utility files.
+\bMANIFEST\.bak
+\bMakefile$
+\bblib/
+\bMakeMaker-\d
+\bpm_to_blib\.ts$
+\bpm_to_blib$
+\bblibdirs\.ts$ # 6.18 through 6.25 generated this
+
+# Avoid Module::Build generated and utility files.
+\bBuild$
+\b_build/
+\bBuild.bat$
+\bBuild.COM$
+\bBUILD.COM$
+\bbuild.com$
+
+# Avoid temp and backup files.
+~$
+\.old$
+\#$
+\b\.#
+\.bak$
+\.tmp$
+\.#
+\.rej$
+
+# Avoid OS-specific files/dirs
+# Mac OSX metadata
+\B\.DS_Store
+# Mac OSX SMB mount metadata files
+\B\._
+
+# Avoid Devel::Cover and Devel::CoverX::Covered files.
+\bcover_db\b
+\bcovered\b
+
+# Avoid MYMETA files
+^MYMETA\.
+#!end included /Users/ruz/perl5/perlbrew/perls/perl-5.16.1/lib/site_perl/5.16.1/ExtUtils/MANIFEST.SKIP
+
+# Avoid configuration metadata file
+^MYMETA\.
+
+# Avoid Module::Build generated and utility files.
+\bBuild$
+\bBuild.bat$
+\b_build
+\bBuild.COM$
+\bBUILD.COM$
+\bbuild.com$
+^MANIFEST\.SKIP
+
+.*\.c$
+.*\.o$
commit cb35f8e27976eb3b99fd0311c3b6baaf0995d26f
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Mon Sep 30 18:35:03 2013 +0400
initial implementation
diff --git a/lib/HTML/Gumbo.pm b/lib/HTML/Gumbo.pm
new file mode 100644
index 0000000..7bfd539
--- /dev/null
+++ b/lib/HTML/Gumbo.pm
@@ -0,0 +1,276 @@
+use v5.10;
+use strict;
+use warnings;
+
+package HTML::Gumbo;
+
+use Alien::LibGumbo;
+our $VERSION = '0.1';
+
+require XSLoader;
+XSLoader::load('HTML::Gumbo', $VERSION);
+
+=head1 NAME
+
+HTML::Gumbo - HTML5 parser based on gumbo C library
+
+=head1 DESCRIPTION
+
+L<Gumbo|https://github.com/google/gumbo-parser> is an implementation
+of L<the HTML5 parsing algorithm|http://www.w3.org/TR/html5/syntax.html>
+implemented as a pure C99 library with no outside dependencies.
+
+Goals and features of the C library:
+
+=over 4
+
+=item * Fully conformant with the HTML5 spec.
+
+=item * Robust and resilient to bad input.
+
+=item * Simple API that can be easily wrapped by other languages. (This is one of such wrappers.)
+
+=item * Support for source locations and pointers back to the original text.
+(Not exposed by this implementation at the moment.)
+
+=item * Relatively lightweight, with no outside dependencies.
+
+=item * Passes all html5lib-0.95 tests.
+
+=item * Tested on over 2.5 billion pages from Google's index.
+
+=back
+
+=head1 SUPPORTED OUTPUT FORMATS
+
+=head2 string
+
+Beta readiness.
+
+HTML is parsed and re-built from the tree, so tags are balanced
+(except void elements). Since fragments parsing is not supported
+at the moment the result always gets html, head and body elements.
+
+No additional arguments for this format.
+
+ $html = HTML::Gumbo->new->parse( $html );
+
+=head2 callback
+
+Beta readiness.
+
+L<HTML::Parser> like interface. Pass a sub as C<callback> argument to
+L</parse> method and it will be called for every node in the document:
+
+ HTML::Gumbo->new->parse( $html, format => 'callback', callback => sub {
+ my ($event) = shift;
+ if ( $event eq 'document start' ) {
+ my ($doctype) = @_;
+ }
+ elsif ( $event eq 'document end' ) {
+ }
+ elsif ( $event eq 'start' ) {
+ my ($tag, $attrs) = @_;
+ }
+ elsif ( $event eq 'end' ) {
+ my ($tag) = @_;
+ }
+ elsif ( $event eq /^(text|space|cdata|comment)$/ ) {
+ my ($text) = @_;
+ }
+ else {
+ die "Unknown event";
+ }
+ } );
+
+Note that 'end' events are not generated for
+L<void elements|http://www.w3.org/TR/html5/syntax.html#void-elements>,
+for example C<hr>, C<br> and C<img>.
+
+No additional arguments except mentioned C<callback>.
+
+=head2 tree
+
+Alpha stage.
+
+Produces tree based on L<HTML::Element>s, like L<HTML::TreeBuilder>.
+
+There is major difference from HTML::TreeBuilder, this method produces
+top level element with tag name 'document' which may have doctype, comments
+and html tags.
+
+Yes, it's not ready to use as drop in replacement of tree builder. Patches
+are wellcome. I don't use this formatter at the moment.
+
+=head1 CHARACTER ENCODING OF THE INPUT
+
+The C parser works only with UTF-8, so you have several options to make
+sure input is UTF-8. First of all define C<input_is>:
+
+=over 4
+
+=item string
+
+Input is Perl string, for example obtained from L<HTTP::Response/decoded_content>.
+Default value.
+
+=item octets
+
+Input are octets. Partial implementation of
+L<encoding sniffing algorithm|http://www.w3.org/TR/html5/syntax.html#encoding-sniffing-algorithm>
+is used:
+
+=over 4
+
+=item C<encoding> argument
+
+Use it to hardcode a specific encoding.
+
+=item BOM
+
+UTF-8/UTF-16 BOMs are checked.
+
+=item C<encoding_content_type>
+
+Encdoning from rransport layer, charset in content-type header.
+
+=item Prescan
+
+Not implemented, follow L<issue 58|https://github.com/google/gumbo-parser/issues/58>.
+
+HTML5 defines L<prescan algorithm|http://www.w3.org/TR/html5/syntax.html#prescan-a-byte-stream-to-determine-its-encoding>
+that extracts encoding from meta tags in the head.
+
+It would be cool to get it in the C library, but I will accept a patch that impements it in pure perl.
+
+=item C<encoding_tentative> argument
+
+The likely encoding for this page, e.g. based on the encoding of the
+page when it was last visited.
+
+=item nested browsing context
+
+Not implemented. Fragment parsing with or without context is not implemented. Parser
+also has no origin information, so it wouldn't be implemented.
+
+=item autodetection
+
+Not implemented.
+
+Can be implemented using L<Encode::Detect::Detector>. Patches are welcome.
+
+=item otherwise
+
+It B<dies>.
+
+=back
+
+=item C<utf8>
+
+Use utf8 as input_is when you're sure input is UTF-8, but octets.
+No pre-processing at all. Should only be used on trusted input or
+when it's preprocessed already.
+
+=back
+
+=head1 METHODS
+
+=head2 new
+
+ my $parser = HTML::Gumbo->new;
+
+No options at the moment.
+
+=head2 parse
+
+ my $res = $parser->parse(
+ "<h1>hello world!</h1>",
+ format => 'tree',
+ input_is => 'string',
+ );
+
+Takes html string and pairs of named arguments:
+
+=over 4
+
+=item format
+
+Output format, default is string. See L</SUPPORTED OUTPUT FORMATS>.
+
+=item input_is
+
+Whether html is perl 'string', 'octets' or 'utf8' (octets known to
+be utf8). See L</CHARACTER ENCODING OF THE INPUT>.
+
+=item encoding, encoding_content_type, encoding_tentative
+
+See L</CHARACTER ENCODING OF THE INPUT>.
+
+=item ...
+
+Some formatters may have additional arguments.
+
+=back
+
+Return value depends on the picked format.
+
+=cut
+
+sub new {
+ my $proto = shift;
+ return bless {@_}, ref($proto) || $proto;
+}
+
+sub parse {
+ my $self = shift;
+ my $what = shift;
+ my %args = @_;
+
+ my $format = $args{'format'} || 'string';
+ my $method = 'parse_to_'. $format;
+ die "'$format' format is not supported"
+ unless $self->can($method);
+
+ my $input_is = $args{'input_is'} || 'string';
+ if ( $input_is eq 'string' ) {
+ utf8::encode($what);
+ }
+ elsif ( $input_is eq 'utf8' ) {
+ }
+ elsif ( $input_is eq 'octets' ) {
+ my $enc = $args{'encoding'};
+ unless ( $enc ) {
+ if ( $input_is =~ /^(?: (\x{FE}\x{FF}) | (\x{FF}\x{FE}) | \x{EF}\x{BB}\x{BF} )/x ) {
+ $enc = $1 ? 'UTF-16BE' : $2 ? 'UTF-16LE' : 'UTF-8';
+ }
+ elsif ( $enc = $args{'encoding_content_type'} ) {
+ }
+ elsif ( $enc = $args{'encoding_tentative'} ) {
+ }
+ else {
+ die "Encoding detection is not implemented";
+ }
+
+ Encode::from_to($what, $enc, 'UTF-8');
+ }
+ }
+ return $self->$method( \$what, %args );
+}
+
+sub parse_to_callback {
+ my ($self, $buf, %rest) = @_;
+ die "No callback provided" unless $rest{'callback'};
+ return $self->_parse_to_callback( $buf, $rest{'callback'} );
+}
+
+=head1 AUTHOR
+
+Ruslan Zakirov E<lt>ruz at bestpractical.comE<gt>
+
+=head1 LICENSE
+
+Under the same terms as perl itself.
+
+=cut
+
+1;
\ No newline at end of file
diff --git a/lib/HTML/Gumbo.xs b/lib/HTML/Gumbo.xs
new file mode 100644
index 0000000..88a8b5c
--- /dev/null
+++ b/lib/HTML/Gumbo.xs
@@ -0,0 +1,524 @@
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+#include "gumbo.h"
+
+#define PHG_IS_VOID_ELEMENT(tag) \
+ ( tag == GUMBO_TAG_AREA \
+ || tag == GUMBO_TAG_BASE \
+ || tag == GUMBO_TAG_BR \
+ || tag == GUMBO_TAG_COL \
+ || tag == GUMBO_TAG_EMBED \
+ || tag == GUMBO_TAG_HR \
+ || tag == GUMBO_TAG_IMG \
+ || tag == GUMBO_TAG_INPUT \
+ || tag == GUMBO_TAG_KEYGEN \
+ || tag == GUMBO_TAG_LINK \
+ || tag == GUMBO_TAG_META \
+ || tag == GUMBO_TAG_PARAM \
+ || tag == GUMBO_TAG_SOURCE \
+ || tag == GUMBO_TAG_TRACK \
+ || tag == GUMBO_TAG_WBR )
+
+#define newSVpvz8(str) \
+ newSVpvn_utf8((str), strlen(str), 1)
+
+#define newSVpvn8(str, len) \
+ newSVpvn_utf8((str), (len), 1)
+
+typedef enum {
+ PHG_ELEMENT_START,
+ PHG_ELEMENT_END,
+ PHG_TEXT
+} PerlHtmlGumboType;
+
+STATIC
+void
+walk_tree(pTHX_ GumboNode* node, void (*cb)(pTHX_ PerlHtmlGumboType, GumboNode*, void*), void* ctx ) {
+ if ( node->type == GUMBO_NODE_DOCUMENT || node->type == GUMBO_NODE_ELEMENT ) {
+ GumboVector* children;
+ (*cb)(aTHX_ PHG_ELEMENT_START, node, ctx);
+ if ( node->type == GUMBO_NODE_DOCUMENT ) {
+ children = &node->v.document.children;
+ } else {
+ children = &node->v.element.children;
+ }
+ if (children) {
+ for (int i = 0; i < children->length; ++i) {
+ walk_tree(aTHX_ children->data[i], cb, ctx);
+ }
+ }
+ (*cb)(aTHX_ PHG_ELEMENT_END, node, ctx);
+ } else {
+ (*cb)(aTHX_ PHG_TEXT, node, ctx);
+ }
+}
+
+STATIC
+GumboStringPiece
+get_tag_name(GumboElement* e) {
+ GumboStringPiece res;
+ if ( e->tag == GUMBO_TAG_UNKNOWN ) {
+ res = e->original_tag;
+ gumbo_tag_from_original_text(&res);
+ } else {
+ res.data = gumbo_normalized_tagname(e->tag);
+ res.length = strlen(res.data);
+ }
+ return res;
+}
+
+STATIC void
+out_attr_value(SV* out, const char* v) {
+ STRLEN prev = 0;
+ STRLEN len = strlen(v);
+ for ( STRLEN i = 0; i < len; i++ ) {
+ if (v[i] != '"' && v[i] != '&' )
+ continue;
+ if (i != prev)
+ sv_catpvn(out, v+prev, i-prev);
+ sv_catpv(out, v[i] == '&'? "&": """);
+ prev = ++i;
+ }
+ if (prev < len)
+ sv_catpvn(out, v+prev, len-prev);
+}
+
+STATIC void
+out_text(SV* out, const char* v) {
+ STRLEN prev = 0;
+ STRLEN len = strlen(v);
+ for ( STRLEN i = 0; i < len; i++ ) {
+ if (v[i] != '<' && v[i] != '>' && v[i] != '&' )
+ continue;
+ if (i != prev)
+ sv_catpvn(out, v+prev, i-prev);
+ sv_catpv(out, v[i] == '&'? "&": (v[i] == '<'? "<" : ">"));
+ prev = ++i;
+ }
+ if (prev < len)
+ sv_catpvn(out, v+prev, len-prev);
+}
+
+STATIC void
+out_tag_start_line(SV* out, GumboElement* e) {
+ GumboStringPiece piece = get_tag_name(e);
+
+ sv_catpvs(out, "<");
+ sv_catpvn(out, piece.data, piece.length);
+ for (int i = 0; i < e->attributes.length; i++) {
+ GumboAttribute* attr = (GumboAttribute*) e->attributes.data[i];
+ sv_catpvs(out, " ");
+ sv_catpv(out, attr->name);
+ if (strlen(attr->value)) {
+ sv_catpvs(out, "=\"");
+ out_attr_value(out, attr->value);
+ sv_catpvs(out, "\"");
+ }
+ }
+ sv_catpvs(out, ">");
+
+ return;
+}
+
+STATIC void
+out_tag_end_line(SV* out, GumboElement* e) {
+ GumboStringPiece piece;
+ if ( PHG_IS_VOID_ELEMENT(e->tag))
+ return;
+
+ sv_catpvs(out, "</");
+ piece = get_tag_name(e);
+ sv_catpvn(out, piece.data, piece.length);
+ sv_catpvs(out, ">");
+
+ return;
+}
+
+STATIC void
+out_doctype_text( SV* out, GumboDocument* doc ) {
+ sv_catpvs(out, "DOCTYPE");
+ if (strlen(doc->name)>0) {
+ sv_catpvs(out, " ");
+ sv_catpv(out, doc->name);
+ }
+ if (strlen(doc->public_identifier)>0) {
+ sv_catpvs(out, " PUBLIC \"");
+ sv_catpv(out, doc->public_identifier);
+ sv_catpvs(out, "\"");
+ }
+ if (strlen(doc->system_identifier)>0) {
+ sv_catpvs(out, " \"");
+ sv_catpv(out, doc->system_identifier);
+ sv_catpvs(out, "\"");
+ }
+}
+
+STATIC void
+out_doctype( SV* out, GumboDocument* doc ) {
+ sv_catpvs(out, "<!");
+ out_doctype_text(out, doc);
+ sv_catpvs(out, ">\n");
+}
+
+STATIC void
+tree_to_string(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
+ SV* out = (SV*) ctx;
+ if ( type == PHG_TEXT ) {
+ if ( node->type == GUMBO_NODE_COMMENT ) {
+ sv_catpvs(out, "<!--");
+ }
+ else if ( node->type == GUMBO_NODE_CDATA ) {
+ sv_catpvs(out, "<![CDATA[");
+ }
+ if ( node->type == GUMBO_NODE_TEXT ) {
+ out_text(out, node->v.text.text);
+ } else {
+ sv_catpv(out, node->v.text.text);
+ }
+ if ( node->type == GUMBO_NODE_COMMENT ) {
+ sv_catpvs(out, "-->");
+ }
+ else if ( node->type == GUMBO_NODE_CDATA ) {
+ sv_catpvs(out, "]]>");
+ }
+ }
+ else if ( type == PHG_ELEMENT_START && node->type == GUMBO_NODE_DOCUMENT ) {
+ GumboDocument* doc = &node->v.document;
+ if ( doc->has_doctype )
+ out_doctype(out, doc);
+ }
+ else if ( type == PHG_ELEMENT_END && node->type == GUMBO_NODE_DOCUMENT ) {
+ sv_catpvs(out, "\n");
+ }
+ else if ( type == PHG_ELEMENT_START ) {
+ GumboElement* e = &node->v.element;
+ out_tag_start_line(out, e);
+ if ( e->tag == GUMBO_TAG_PRE || e->tag == GUMBO_TAG_TEXTAREA ) {
+ sv_catpvs(out, "\n");
+ }
+ }
+ else if ( type == PHG_ELEMENT_END ) {
+ GumboElement* e = &node->v.element;
+ out_tag_end_line(out, e);
+ }
+ return;
+}
+
+STATIC SV*
+new_html_element(pTHX_ GumboNode* node) {
+ dSP;
+ SV* res;
+ int rcount;
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+ mXPUSHs(newSVpvs("HTML::Element"));
+ if ( node->type == GUMBO_NODE_DOCUMENT ) {
+ mXPUSHs(newSVpvs("document"));
+ }
+ else if ( node->type == GUMBO_NODE_ELEMENT ) {
+ GumboVector* attrs = &node->v.element.attributes;
+ GumboStringPiece tag = get_tag_name(&node->v.element);
+ mXPUSHs(newSVpvn8( tag.data, tag.length ));
+ for (int i = 0; i < attrs->length; i++) {
+ GumboAttribute* attr = (GumboAttribute*) attrs->data[i];
+ mXPUSHs(newSVpvz8( attr->name ));
+ mXPUSHs(newSVpvz8( attr->value ));
+ }
+ }
+ else if ( node->type == GUMBO_NODE_COMMENT ) {
+ mXPUSHs(newSVpvs("~comment"));
+ mXPUSHs(newSVpvs("text"));
+ mXPUSHs(newSVpvz8( node->v.text.text ));
+ }
+ else {
+ croak("Unknown node type");
+ }
+ PUTBACK;
+
+ rcount = call_method("new", G_SCALAR);
+
+ SPAGAIN;
+
+ if (rcount != 1) croak("Big trouble\n");
+
+ res = SvREFCNT_inc_NN(POPs);
+ PUTBACK;
+
+ FREETMPS;
+ LEAVE;
+
+ return res;
+}
+
+STATIC SV*
+new_html_element_doctype(pTHX_ GumboDocument* doc) {
+ dSP;
+ SV* res;
+ SV* doctype;
+ int rcount;
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+ mXPUSHs(newSVpvs("HTML::Element"));
+ mXPUSHs(newSVpvs("~declaration"));
+ mXPUSHs(newSVpvs("text"));
+ doctype = newSVpvn8( "",0 );
+ out_doctype_text(doctype, doc);
+ mXPUSHs(doctype);
+ PUTBACK;
+
+ rcount = call_method("new", G_SCALAR);
+
+ SPAGAIN;
+
+ if (rcount != 1) croak("Big trouble\n");
+
+ res = SvREFCNT_inc_NN(POPs);
+ PUTBACK;
+
+ FREETMPS;
+ LEAVE;
+
+ return res;
+}
+
+
+STATIC void
+push_element(pTHX_ SV* parent, SV* element) {
+ dSP;
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+ XPUSHs(parent);
+ XPUSHs(element);
+ PUTBACK;
+
+ call_method("push_content", G_DISCARD);
+
+ FREETMPS;
+ LEAVE;
+}
+
+STATIC void
+push_text_element(pTHX_ SV* parent, const char *const s, const STRLEN len) {
+ dSP;
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+ XPUSHs(parent);
+ mXPUSHs(newSVpv(s, len));
+ PUTBACK;
+
+ call_method("push_content", G_DISCARD);
+
+ FREETMPS;
+ LEAVE;
+}
+
+STATIC SV*
+get_element_parent(pTHX_ SV* element) {
+ dSP;
+ SV* res;
+ int rcount;
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+ XPUSHs(element);
+ PUTBACK;
+
+ rcount = call_method("parent", G_SCALAR);
+
+ SPAGAIN;
+
+ if (rcount != 1) croak("Big trouble\n");
+
+ res = SvREFCNT_inc_NN(POPs);
+ PUTBACK;
+
+ FREETMPS;
+ LEAVE;
+
+ return res;
+}
+
+STATIC void
+tree_to_tree(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
+ SV** out = (SV**) ctx;
+ if ( type == PHG_TEXT ) {
+ if ( node->type == GUMBO_NODE_COMMENT ) {
+ SV* element = new_html_element(aTHX_ node);
+ push_element(*out, element);
+ SvREFCNT_dec(element);
+ } else {
+ push_text_element(*out, node->v.text.text, 0);
+ }
+ }
+ else if ( type == PHG_ELEMENT_START && node->type == GUMBO_NODE_DOCUMENT ) {
+ GumboDocument* doc = &node->v.document;
+ *out = new_html_element(aTHX_ node);
+ sv_2mortal(*out);
+ if ( doc->has_doctype ) {
+ SV* element = new_html_element_doctype(aTHX_ doc);
+ push_element(*out, element);
+ SvREFCNT_dec(element);
+ }
+ }
+ else if ( type == PHG_ELEMENT_END && node->type == GUMBO_NODE_DOCUMENT ) {
+ }
+ else if ( type == PHG_ELEMENT_START ) {
+ SV* element = new_html_element(aTHX_ node);
+ push_element(*out, element);
+ *out = element;
+ }
+ else if ( type == PHG_ELEMENT_END ) {
+ SV* parent = get_element_parent(aTHX_ *out);
+ SvREFCNT_dec(*out);
+ *out = parent;
+ }
+ return;
+}
+
+STATIC void
+tree_to_callback(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
+ dSP;
+ SV* cb = (SV*) ctx;
+
+ if ( type == PHG_ELEMENT_END && PHG_IS_VOID_ELEMENT(node->v.element.tag) )
+ return;
+
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(SP);
+ if ( type == PHG_TEXT ) {
+ switch ( node->type ) {
+ case GUMBO_NODE_TEXT:
+ mXPUSHs(newSVpvs("text"));break;
+ case GUMBO_NODE_WHITESPACE:
+ mXPUSHs(newSVpvs("space"));break;
+ case GUMBO_NODE_CDATA:
+ mXPUSHs(newSVpvs("cdata"));break;
+ case GUMBO_NODE_COMMENT:
+ mXPUSHs(newSVpvs("comment"));break;
+ default:
+ croak("Unknown node type");
+ }
+ mXPUSHs(newSVpvz8( node->v.text.text ));
+ }
+ else if ( type == PHG_ELEMENT_START && node->type == GUMBO_NODE_DOCUMENT ) {
+ GumboDocument* doc = &node->v.document;
+ mXPUSHs(newSVpvs("document start"));
+ if ( doc->has_doctype ) {
+ HV* h = newHV();
+ mXPUSHs(newRV_noinc(MUTABLE_SV(h)));
+ (void)hv_stores(h, "name", newSVpvz8( doc->name ));
+ (void)hv_stores(h, "public", newSVpvz8( doc->public_identifier ));
+ (void)hv_stores(h, "system", newSVpvz8( doc->system_identifier ));
+ } else {
+ mXPUSHs(&PL_sv_undef);
+ }
+ }
+ else if ( type == PHG_ELEMENT_END && node->type == GUMBO_NODE_DOCUMENT ) {
+ mXPUSHs(newSVpvs("document end"));
+ }
+ else if ( type == PHG_ELEMENT_START ) {
+ GumboVector* attrs = &node->v.element.attributes;
+ GumboStringPiece tag = get_tag_name(&node->v.element);
+ AV* for_attrs = newAV();
+
+ mXPUSHs(newSVpvs("start"));
+ mXPUSHs(newSVpvn8( tag.data, tag.length ));
+ mXPUSHs(newRV_noinc(MUTABLE_SV(for_attrs)));
+ for (int i = 0; i < attrs->length; i++) {
+ GumboAttribute* attr = (GumboAttribute*) attrs->data[i];
+ av_push(for_attrs, newSVpvz8( attr->name ));
+ av_push(for_attrs, newSVpvz8( attr->value ));
+ }
+ }
+ else if ( type == PHG_ELEMENT_END ) {
+ GumboStringPiece tag = get_tag_name(&node->v.element);
+ mXPUSHs(newSVpvs("end"));
+ mXPUSHs(newSVpvn8( tag.data, tag.length ));
+ }
+
+ PUTBACK;
+
+ call_sv(cb, G_DISCARD);
+
+ FREETMPS;
+ LEAVE;
+
+ return;
+}
+
+STATIC
+char* prepare_buffer(SV* buffer) {
+ if(!SvROK(buffer))
+ Perl_croak("First argument is not a reference");
+
+ buffer = SvRV(buffer);
+ return SvPV_nolen(buffer);
+}
+
+MODULE = HTML::Gumbo PACKAGE = HTML::Gumbo
+
+SV*
+parse_to_string(self, buffer, ...)
+ SV *self
+ SV *buffer
+
+ CODE:
+ const char* str = prepare_buffer(buffer);
+
+ RETVAL = newSVpvn8("", 0);
+
+ GumboOutput* output = gumbo_parse(str);
+ walk_tree(aTHX_ output->document, tree_to_string, (void*)RETVAL);
+ gumbo_destroy_output(&kGumboDefaultOptions, output);
+
+ OUTPUT: RETVAL
+
+SV*
+parse_to_tree(self, buffer, ...)
+ SV *self
+ SV *buffer
+
+ CODE:
+ const char* str;
+ load_module(
+ 0,
+ newSVpvs("HTML::TreeBuilder"),
+ newSViv(5), newSVpvs("-weak"), NULL
+ );
+ str = prepare_buffer(buffer);
+
+ SV* res;
+ GumboOutput* output = gumbo_parse(str);
+ walk_tree(aTHX_ output->document, tree_to_tree, (void*)(&res));
+ gumbo_destroy_output(&kGumboDefaultOptions, output);
+ RETVAL = res;
+
+ OUTPUT: RETVAL
+
+void
+_parse_to_callback(self, buffer, cb, ...)
+ SV *self
+ SV *buffer
+ SV *cb
+
+ CODE:
+ const char* str = prepare_buffer(buffer);
+
+ GumboOutput* output = gumbo_parse(str);
+ walk_tree(aTHX_ output->document, tree_to_callback, (void*)cb);
+ gumbo_destroy_output(&kGumboDefaultOptions, output);
+
+ XSRETURN_YES;
diff --git a/t/callback.t b/t/callback.t
new file mode 100644
index 0000000..7e786d1
--- /dev/null
+++ b/t/callback.t
@@ -0,0 +1,39 @@
+use strict;
+use warnings;
+use Test::More;
+
+use_ok('HTML::Gumbo');
+
+my $parser = HTML::Gumbo->new;
+my $input = <<'END';
+<!DOCTYPE html>
+<!--This is a comment-->
+<h1>hello world!</h1>
+<img disabled boo="foo" />
+END
+my @expected = (
+ ['document start', {name => 'html', public => '', system => ''}],
+ ['comment', 'This is a comment'],
+ ['start', 'html', []],
+ ['start', 'head', []],
+ ['end', 'head'],
+ ['start', 'body', []],
+
+ ['start', 'h1', []],
+ ['text', 'hello world!'],
+ ['end', 'h1'],
+ ['space', "\n"],
+
+ ['start', 'img', [disabled => "", boo => "foo"]],
+ ['space', "\n"],
+
+ ['end', 'body'],
+ ['end', 'html'],
+ ['document end'],
+);
+my @got;
+my $res = $parser->parse($input, format => 'callback', callback => sub {
+ push @got, [@_];
+});
+
+done_testing();
diff --git a/t/string.t b/t/string.t
new file mode 100644
index 0000000..c254072
--- /dev/null
+++ b/t/string.t
@@ -0,0 +1,75 @@
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+
+use_ok('HTML::Gumbo');
+
+my $parser = HTML::Gumbo->new;
+my $input = <<'END';
+<!DOCTYPE html>
+<!--This is a comment-->
+<h1>hello world!</h1>
+<div class="test">
+ <p>first para
+ <p>second
+</div>
+<div>
+ <img />
+ <img alt="©">
+ <img></img>
+</div>
+<some>
+END
+my $expected = <<'END';
+<!DOCTYPE html>
+<!--This is a comment--><html><head></head><body><h1>hello world!</h1>
+<div class="test">
+ <p>first para
+ </p><p>second
+</p></div>
+<div>
+ <img>
+ <img alt="©">
+ <img>
+</div>
+<some>
+</some></body></html>
+END
+my $res = $parser->parse($input);
+is $res, $expected, 'very basic test';
+
+$input = <<'END';
+<div class=""••&"><p></div>
+END
+$expected = <<'END';
+<html><head></head><body><div class=""••&"><p></div>
+</body></html>
+END
+$res = $parser->parse($input);
+is $res, $expected, 'very basic test';
+
+$input = <<'END';
+<pre>foo</pre>
+<pre>
+foo</pre>
+<pre>
+
+foo</pre>
+END
+$expected = <<'END';
+<html><head></head><body><pre>
+foo</pre>
+<pre>
+foo</pre>
+<pre>
+
+foo</pre>
+</body></html>
+END
+$res = $parser->parse($input);
+is $res, $expected, 'very basic test';
+
+
+
+done_testing();
diff --git a/t/tree.t b/t/tree.t
new file mode 100644
index 0000000..ec795fd
--- /dev/null
+++ b/t/tree.t
@@ -0,0 +1,21 @@
+use strict;
+use warnings;
+use Test::More;
+
+use_ok('HTML::Gumbo');
+
+my $parser = HTML::Gumbo->new;
+my $res = $parser->parse(<<'END', format => 'tree');
+<!DOCTYPE html>
+<!--This is a comment-->
+<h1>hello world!</h1>
+END
+
+my $expected = <<'END';
+<document><!DOCTYPE html><!--This is a comment--><html><head></head><body><h1>hello world!</h1>
+</body></html></document>
+END
+chomp $expected;
+is $res->as_HTML, $expected, 'correct value';
+
+done_testing();
commit 911c69b7d1530a438f4a7c0ab33f702982cdb0f6
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Mon Sep 30 18:36:45 2013 +0400
.gitignore
diff --git a/.gitignore b/.gitignore
index fd33f3d..975f66a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,4 @@ nytprof/
*.tar.gz
*.sw[po]
*.bak
+*.[co]
commit 09a44ddf835b6dcf2349ae8eb4cb33a8bf53474b
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Wed Oct 9 11:11:47 2013 +0400
require LibGumbo
diff --git a/Build.PL b/Build.PL
index cba75d6..4634b02 100644
--- a/Build.PL
+++ b/Build.PL
@@ -11,6 +11,9 @@ my $builder = Module::Build->new(
build_requires => {
'ExtUtils::CBuilder' => 0,
},
+ requires => {
+ 'Alien::LibGumbo' => 0,
+ },
extra_compiler_flags => $alien->cflags(),
extra_linker_flags => $alien->libs(),
commit 680f5e928748c534d2f8e139076a9d3f71045413
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Wed Oct 9 11:12:11 2013 +0400
update manifest
diff --git a/MANIFEST b/MANIFEST
index 410bd95..1cd61be 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -5,3 +5,5 @@ MANIFEST This list of files
t/callback.t
t/string.t
t/tree.t
+META.yml
+META.json
commit 5f13a58c90f72bfe1700eab9e0418fed7fa9302c
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Wed Oct 9 11:12:41 2013 +0400
skip a test if HTML::TreeBuilder is not installed
diff --git a/t/tree.t b/t/tree.t
index ec795fd..1d9fa7d 100644
--- a/t/tree.t
+++ b/t/tree.t
@@ -2,6 +2,10 @@ use strict;
use warnings;
use Test::More;
+unless ( do { local $@; eval "use HTML::TreeBuilder 5 -weak; 1" } } ) {
+ plan skip_all => 'No HTML::TreeBuilder 5 -weak';
+}
+
use_ok('HTML::Gumbo');
my $parser = HTML::Gumbo->new;
commit c323f0a147cfab3222107ec28798fc5700c05f58
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Wed Oct 9 11:13:42 2013 +0400
add metafiles
diff --git a/META.json b/META.json
new file mode 100644
index 0000000..85b0296
--- /dev/null
+++ b/META.json
@@ -0,0 +1,42 @@
+{
+ "abstract" : "HTML5 parser based on gumbo C library",
+ "author" : [
+ "Ruslan Zakirov E<lt>ruz at bestpractical.comE<gt>"
+ ],
+ "dynamic_config" : 1,
+ "generated_by" : "Module::Build version 0.4007, CPAN::Meta::Converter version 2.131560",
+ "license" : [
+ "unknown"
+ ],
+ "meta-spec" : {
+ "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
+ "version" : "2"
+ },
+ "name" : "HTML-Gumbo",
+ "prereqs" : {
+ "build" : {
+ "requires" : {
+ "ExtUtils::CBuilder" : "0"
+ }
+ },
+ "configure" : {
+ "requires" : {
+ "Alien::LibGumbo" : "0",
+ "Module::Build" : "0.40"
+ }
+ },
+ "runtime" : {
+ "requires" : {
+ "Alien::LibGumbo" : "0"
+ }
+ }
+ },
+ "provides" : {
+ "HTML::Gumbo" : {
+ "file" : "lib/HTML/Gumbo.pm",
+ "version" : "0.1"
+ }
+ },
+ "release_status" : "stable",
+ "version" : "0.1"
+}
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..3905a45
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,23 @@
+---
+abstract: 'HTML5 parser based on gumbo C library'
+author:
+ - 'Ruslan Zakirov E<lt>ruz at bestpractical.comE<gt>'
+build_requires:
+ ExtUtils::CBuilder: 0
+configure_requires:
+ Alien::LibGumbo: 0
+ Module::Build: 0.40
+dynamic_config: 1
+generated_by: 'Module::Build version 0.4007, CPAN::Meta::Converter version 2.131560'
+license: unknown
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.4.html
+ version: 1.4
+name: HTML-Gumbo
+provides:
+ HTML::Gumbo:
+ file: lib/HTML/Gumbo.pm
+ version: 0.1
+requires:
+ Alien::LibGumbo: 0
+version: 0.1
commit 156464438fe213e3ea3de3dc6f52c7fc026624f5
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Wed Oct 9 11:12:41 2013 +0400
skip a test if HTML::TreeBuilder is not installed
diff --git a/t/tree.t b/t/tree.t
index ec795fd..be8a409 100644
--- a/t/tree.t
+++ b/t/tree.t
@@ -2,6 +2,10 @@ use strict;
use warnings;
use Test::More;
+unless ( do { local $@; eval "use HTML::TreeBuilder 5 -weak; 1" } ) {
+ plan skip_all => 'No HTML::TreeBuilder 5 -weak';
+}
+
use_ok('HTML::Gumbo');
my $parser = HTML::Gumbo->new;
commit fe964496fd59a569d8fd6c4cb2b08023aba2c555
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Wed Oct 9 11:13:42 2013 +0400
add metafiles
diff --git a/META.json b/META.json
new file mode 100644
index 0000000..85b0296
--- /dev/null
+++ b/META.json
@@ -0,0 +1,42 @@
+{
+ "abstract" : "HTML5 parser based on gumbo C library",
+ "author" : [
+ "Ruslan Zakirov E<lt>ruz at bestpractical.comE<gt>"
+ ],
+ "dynamic_config" : 1,
+ "generated_by" : "Module::Build version 0.4007, CPAN::Meta::Converter version 2.131560",
+ "license" : [
+ "unknown"
+ ],
+ "meta-spec" : {
+ "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
+ "version" : "2"
+ },
+ "name" : "HTML-Gumbo",
+ "prereqs" : {
+ "build" : {
+ "requires" : {
+ "ExtUtils::CBuilder" : "0"
+ }
+ },
+ "configure" : {
+ "requires" : {
+ "Alien::LibGumbo" : "0",
+ "Module::Build" : "0.40"
+ }
+ },
+ "runtime" : {
+ "requires" : {
+ "Alien::LibGumbo" : "0"
+ }
+ }
+ },
+ "provides" : {
+ "HTML::Gumbo" : {
+ "file" : "lib/HTML/Gumbo.pm",
+ "version" : "0.1"
+ }
+ },
+ "release_status" : "stable",
+ "version" : "0.1"
+}
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..3905a45
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,23 @@
+---
+abstract: 'HTML5 parser based on gumbo C library'
+author:
+ - 'Ruslan Zakirov E<lt>ruz at bestpractical.comE<gt>'
+build_requires:
+ ExtUtils::CBuilder: 0
+configure_requires:
+ Alien::LibGumbo: 0
+ Module::Build: 0.40
+dynamic_config: 1
+generated_by: 'Module::Build version 0.4007, CPAN::Meta::Converter version 2.131560'
+license: unknown
+meta-spec:
+ url: http://module-build.sourceforge.net/META-spec-v1.4.html
+ version: 1.4
+name: HTML-Gumbo
+provides:
+ HTML::Gumbo:
+ file: lib/HTML/Gumbo.pm
+ version: 0.1
+requires:
+ Alien::LibGumbo: 0
+version: 0.1
commit ebe02f444de8827ba670dbca133df57e4c5835a5
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Wed Oct 9 11:47:01 2013 +0400
compilation problems
* forgot aTHX_ in a few places
* for(int i = 0; ...) -> int i; for(i=0;...)
diff --git a/lib/HTML/Gumbo.xs b/lib/HTML/Gumbo.xs
index 88a8b5c..dca4411 100644
--- a/lib/HTML/Gumbo.xs
+++ b/lib/HTML/Gumbo.xs
@@ -45,7 +45,8 @@ walk_tree(pTHX_ GumboNode* node, void (*cb)(pTHX_ PerlHtmlGumboType, GumboNode*,
children = &node->v.element.children;
}
if (children) {
- for (int i = 0; i < children->length; ++i) {
+ int i = 0;
+ for (i = 0; i < children->length; ++i) {
walk_tree(aTHX_ children->data[i], cb, ctx);
}
}
@@ -71,9 +72,10 @@ get_tag_name(GumboElement* e) {
STATIC void
out_attr_value(SV* out, const char* v) {
+ STRLEN i;
STRLEN prev = 0;
STRLEN len = strlen(v);
- for ( STRLEN i = 0; i < len; i++ ) {
+ for ( i = 0; i < len; i++ ) {
if (v[i] != '"' && v[i] != '&' )
continue;
if (i != prev)
@@ -87,9 +89,10 @@ out_attr_value(SV* out, const char* v) {
STATIC void
out_text(SV* out, const char* v) {
+ STRLEN i;
STRLEN prev = 0;
STRLEN len = strlen(v);
- for ( STRLEN i = 0; i < len; i++ ) {
+ for ( i = 0; i < len; i++ ) {
if (v[i] != '<' && v[i] != '>' && v[i] != '&' )
continue;
if (i != prev)
@@ -103,11 +106,12 @@ out_text(SV* out, const char* v) {
STATIC void
out_tag_start_line(SV* out, GumboElement* e) {
+ int i;
GumboStringPiece piece = get_tag_name(e);
sv_catpvs(out, "<");
sv_catpvn(out, piece.data, piece.length);
- for (int i = 0; i < e->attributes.length; i++) {
+ for (i = 0; i < e->attributes.length; i++) {
GumboAttribute* attr = (GumboAttribute*) e->attributes.data[i];
sv_catpvs(out, " ");
sv_catpv(out, attr->name);
@@ -220,10 +224,11 @@ new_html_element(pTHX_ GumboNode* node) {
mXPUSHs(newSVpvs("document"));
}
else if ( node->type == GUMBO_NODE_ELEMENT ) {
+ int i;
GumboVector* attrs = &node->v.element.attributes;
GumboStringPiece tag = get_tag_name(&node->v.element);
mXPUSHs(newSVpvn8( tag.data, tag.length ));
- for (int i = 0; i < attrs->length; i++) {
+ for (i = 0; i < attrs->length; i++) {
GumboAttribute* attr = (GumboAttribute*) attrs->data[i];
mXPUSHs(newSVpvz8( attr->name ));
mXPUSHs(newSVpvz8( attr->value ));
@@ -355,7 +360,7 @@ tree_to_tree(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
if ( type == PHG_TEXT ) {
if ( node->type == GUMBO_NODE_COMMENT ) {
SV* element = new_html_element(aTHX_ node);
- push_element(*out, element);
+ push_element(aTHX_ *out, element);
SvREFCNT_dec(element);
} else {
push_text_element(*out, node->v.text.text, 0);
@@ -367,7 +372,7 @@ tree_to_tree(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
sv_2mortal(*out);
if ( doc->has_doctype ) {
SV* element = new_html_element_doctype(aTHX_ doc);
- push_element(*out, element);
+ push_element(aTHX_ *out, element);
SvREFCNT_dec(element);
}
}
@@ -375,7 +380,7 @@ tree_to_tree(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
}
else if ( type == PHG_ELEMENT_START ) {
SV* element = new_html_element(aTHX_ node);
- push_element(*out, element);
+ push_element(aTHX_ *out, element);
*out = element;
}
else if ( type == PHG_ELEMENT_END ) {
@@ -430,6 +435,7 @@ tree_to_callback(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
mXPUSHs(newSVpvs("document end"));
}
else if ( type == PHG_ELEMENT_START ) {
+ int i;
GumboVector* attrs = &node->v.element.attributes;
GumboStringPiece tag = get_tag_name(&node->v.element);
AV* for_attrs = newAV();
@@ -437,7 +443,7 @@ tree_to_callback(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
mXPUSHs(newSVpvs("start"));
mXPUSHs(newSVpvn8( tag.data, tag.length ));
mXPUSHs(newRV_noinc(MUTABLE_SV(for_attrs)));
- for (int i = 0; i < attrs->length; i++) {
+ for (i = 0; i < attrs->length; i++) {
GumboAttribute* attr = (GumboAttribute*) attrs->data[i];
av_push(for_attrs, newSVpvz8( attr->name ));
av_push(for_attrs, newSVpvz8( attr->value ));
@@ -460,9 +466,9 @@ tree_to_callback(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
}
STATIC
-char* prepare_buffer(SV* buffer) {
+char* prepare_buffer(pTHX_ SV* buffer) {
if(!SvROK(buffer))
- Perl_croak("First argument is not a reference");
+ croak("First argument is not a reference");
buffer = SvRV(buffer);
return SvPV_nolen(buffer);
@@ -476,7 +482,7 @@ parse_to_string(self, buffer, ...)
SV *buffer
CODE:
- const char* str = prepare_buffer(buffer);
+ const char* str = prepare_buffer(aTHX_ buffer);
RETVAL = newSVpvn8("", 0);
@@ -498,7 +504,7 @@ parse_to_tree(self, buffer, ...)
newSVpvs("HTML::TreeBuilder"),
newSViv(5), newSVpvs("-weak"), NULL
);
- str = prepare_buffer(buffer);
+ str = prepare_buffer(aTHX_ buffer);
SV* res;
GumboOutput* output = gumbo_parse(str);
@@ -515,7 +521,7 @@ _parse_to_callback(self, buffer, cb, ...)
SV *cb
CODE:
- const char* str = prepare_buffer(buffer);
+ const char* str = prepare_buffer(aTHX_ buffer);
GumboOutput* output = gumbo_parse(str);
walk_tree(aTHX_ output->document, tree_to_callback, (void*)cb);
commit 4a11e5d268804bce8a137b67ea9f3d8a8927ad9a
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Wed Oct 9 11:51:44 2013 +0400
bump version, 0.11
diff --git a/META.json b/META.json
index 85b0296..155104a 100644
--- a/META.json
+++ b/META.json
@@ -34,9 +34,9 @@
"provides" : {
"HTML::Gumbo" : {
"file" : "lib/HTML/Gumbo.pm",
- "version" : "0.1"
+ "version" : "0.11"
}
},
"release_status" : "stable",
- "version" : "0.1"
+ "version" : "0.11"
}
diff --git a/META.yml b/META.yml
index 3905a45..5f8fa14 100644
--- a/META.yml
+++ b/META.yml
@@ -17,7 +17,7 @@ name: HTML-Gumbo
provides:
HTML::Gumbo:
file: lib/HTML/Gumbo.pm
- version: 0.1
+ version: 0.11
requires:
Alien::LibGumbo: 0
-version: 0.1
+version: 0.11
diff --git a/lib/HTML/Gumbo.pm b/lib/HTML/Gumbo.pm
index 7bfd539..a0ceb07 100644
--- a/lib/HTML/Gumbo.pm
+++ b/lib/HTML/Gumbo.pm
@@ -5,7 +5,7 @@ use warnings;
package HTML::Gumbo;
use Alien::LibGumbo;
-our $VERSION = '0.1';
+our $VERSION = '0.11';
require XSLoader;
XSLoader::load('HTML::Gumbo', $VERSION);
commit e1efca454a5e593f0cadcccfd9107cb0af2d1de7
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Wed Oct 9 22:52:28 2013 +0400
replace MUTABLE_*V(x) with explicit casting
This set of macroses was introduced in perl 5.11
and at this moment is not supported by Devel::PPPort[1].
Not going to shave this yak for two uses of the macro.
[1] https://rt.cpan.org/Public/Bug/Display.html?id=80476
diff --git a/lib/HTML/Gumbo.xs b/lib/HTML/Gumbo.xs
index dca4411..f450969 100644
--- a/lib/HTML/Gumbo.xs
+++ b/lib/HTML/Gumbo.xs
@@ -423,7 +423,7 @@ tree_to_callback(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
mXPUSHs(newSVpvs("document start"));
if ( doc->has_doctype ) {
HV* h = newHV();
- mXPUSHs(newRV_noinc(MUTABLE_SV(h)));
+ mXPUSHs(newRV_noinc((SV*)h));
(void)hv_stores(h, "name", newSVpvz8( doc->name ));
(void)hv_stores(h, "public", newSVpvz8( doc->public_identifier ));
(void)hv_stores(h, "system", newSVpvz8( doc->system_identifier ));
@@ -442,7 +442,7 @@ tree_to_callback(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
mXPUSHs(newSVpvs("start"));
mXPUSHs(newSVpvn8( tag.data, tag.length ));
- mXPUSHs(newRV_noinc(MUTABLE_SV(for_attrs)));
+ mXPUSHs(newRV_noinc((SV*)for_attrs));
for (i = 0; i < attrs->length; i++) {
GumboAttribute* attr = (GumboAttribute*) attrs->data[i];
av_push(for_attrs, newSVpvz8( attr->name ));
commit a9c39575193d6731630e49a61ba297a297b866b6
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Wed Oct 9 22:55:52 2013 +0400
bump version, 0.12
diff --git a/META.json b/META.json
index 155104a..1917b5c 100644
--- a/META.json
+++ b/META.json
@@ -34,7 +34,7 @@
"provides" : {
"HTML::Gumbo" : {
"file" : "lib/HTML/Gumbo.pm",
- "version" : "0.11"
+ "version" : "0.12"
}
},
"release_status" : "stable",
diff --git a/META.yml b/META.yml
index 5f8fa14..5868cf5 100644
--- a/META.yml
+++ b/META.yml
@@ -17,7 +17,7 @@ name: HTML-Gumbo
provides:
HTML::Gumbo:
file: lib/HTML/Gumbo.pm
- version: 0.11
+ version: 0.12
requires:
Alien::LibGumbo: 0
version: 0.11
diff --git a/lib/HTML/Gumbo.pm b/lib/HTML/Gumbo.pm
index a0ceb07..43b1c37 100644
--- a/lib/HTML/Gumbo.pm
+++ b/lib/HTML/Gumbo.pm
@@ -5,7 +5,7 @@ use warnings;
package HTML::Gumbo;
use Alien::LibGumbo;
-our $VERSION = '0.11';
+our $VERSION = '0.12';
require XSLoader;
XSLoader::load('HTML::Gumbo', $VERSION);
commit 92d5c908de18d20358348087c6a9e17a2e31d5f4
Merge: a9c3957 c323f0a
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Wed Oct 9 23:02:28 2013 +0400
Merge remote-tracking branch 'origin/master'
Conflicts:
META.json
META.yml
t/tree.t
commit d349f7c9cea42f57347fd14b7d856ec670028539
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Wed Oct 9 23:03:19 2013 +0400
update meta
diff --git a/META.json b/META.json
index 1917b5c..535af14 100644
--- a/META.json
+++ b/META.json
@@ -38,5 +38,5 @@
}
},
"release_status" : "stable",
- "version" : "0.11"
+ "version" : "0.12"
}
diff --git a/META.yml b/META.yml
index 5868cf5..c1c8e2e 100644
--- a/META.yml
+++ b/META.yml
@@ -20,4 +20,4 @@ provides:
version: 0.12
requires:
Alien::LibGumbo: 0
-version: 0.11
+version: 0.12
commit b9472f65048425462b14121da6cae3c131caed1e
Author: Neil Bowers <neil at bowers.com>
Date: Wed Oct 9 20:57:16 2013 +0100
Added link to github repo
diff --git a/Build.PL b/Build.PL
index 4634b02..17388e4 100644
--- a/Build.PL
+++ b/Build.PL
@@ -15,7 +15,13 @@ my $builder = Module::Build->new(
'Alien::LibGumbo' => 0,
},
+ meta_merge => {
+ resources => {
+ repository => 'https://github.com/ruz/HTML-Gumbo'
+ }
+ },
+
extra_compiler_flags => $alien->cflags(),
extra_linker_flags => $alien->libs(),
);
-$builder->create_build_script;
\ No newline at end of file
+$builder->create_build_script;
commit 3364cd4e1f47b19dce0981fc03ab9519f66c0d8e
Author: Neil Bowers <neil at bowers.com>
Date: Wed Oct 9 21:00:07 2013 +0100
Initial skeleton Changes file
diff --git a/Changes b/Changes
new file mode 100644
index 0000000..2b77299
--- /dev/null
+++ b/Changes
@@ -0,0 +1,10 @@
+Revision history for Perl module HTML::Gumbo
+
+0.12 2013-10-09
+
+0.11 2013-10-09
+
+0.1 2013-09-30
+
+ - first release to CPAN
+
commit 296240499d9c4518b65dcf94bdf25dc3e7fec565
Merge: d349f7c 3364cd4
Author: Ruslan Zakirov <Ruslan.Zakirov at gmail.com>
Date: Wed Oct 9 22:29:00 2013 -0700
Merge pull request #1 from neilbowers/master
Added Changes file and ensure repo link in metadata
commit 9207c21cf501300b224d5635abd2b66a3be9af6c
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date: Thu Oct 10 09:33:40 2013 +0400
update changelog
diff --git a/Changes b/Changes
index 2b77299..0ab4c56 100644
--- a/Changes
+++ b/Changes
@@ -2,8 +2,14 @@ Revision history for Perl module HTML::Gumbo
0.12 2013-10-09
+ - perl 5.10 compatibility
+
0.11 2013-10-09
+ - use c89 scoping of iterator in for loops
+ - fixes for threaded perls
+ - skip test if HTML::TreeBuilder is not installed
+
0.1 2013-09-30
- first release to CPAN
-----------------------------------------------------------------------
More information about the Bps-public-commit
mailing list