[Bps-public-commit] html-gumbo branch, master, created. 9207c21cf501300b224d5635abd2b66a3be9af6c

Alex Vandiver alexmv at bestpractical.com
Thu Jul 17 13:03:16 EDT 2014


The branch, master has been created
        at  9207c21cf501300b224d5635abd2b66a3be9af6c (commit)

- Log -----------------------------------------------------------------
commit 6ee0b91b1efe685c70c0ca0053a7da8191929d55
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Tue Sep 24 15:31:06 2013 +0400

    .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fd33f3d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,13 @@
+Build
+_build/
+MYMETA.*
+pm_to_blib
+blib/
+MANIFEST.bak
+MANIFEST.new
+MANIFEST.old
+cover_db/
+nytprof/
+*.tar.gz
+*.sw[po]
+*.bak

commit 91186b96f3b6bbaa34f13939c13d550f4bff2332
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Mon Sep 30 18:33:47 2013 +0400

    releng files

diff --git a/Build.PL b/Build.PL
new file mode 100644
index 0000000..cba75d6
--- /dev/null
+++ b/Build.PL
@@ -0,0 +1,18 @@
+use Module::Build;
+use Alien::LibGumbo;
+
+my $alien = Alien::LibGumbo->new;
+my $builder = Module::Build->new(
+    module_name => 'HTML::Gumbo',
+
+    configure_requires => {
+        'Alien::LibGumbo' => 0,
+    },
+    build_requires => {
+        'ExtUtils::CBuilder' => 0,
+    },
+
+    extra_compiler_flags => $alien->cflags(),
+    extra_linker_flags   => $alien->libs(),
+);
+$builder->create_build_script;
\ No newline at end of file
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 0000000..410bd95
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,7 @@
+Build.PL
+lib/HTML/Gumbo.pm
+lib/HTML/Gumbo.xs
+MANIFEST			This list of files
+t/callback.t
+t/string.t
+t/tree.t
diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP
new file mode 100644
index 0000000..3801b04
--- /dev/null
+++ b/MANIFEST.SKIP
@@ -0,0 +1,73 @@
+
+#!start included /Users/ruz/perl5/perlbrew/perls/perl-5.16.1/lib/site_perl/5.16.1/ExtUtils/MANIFEST.SKIP
+# Avoid version control files.
+\bRCS\b
+\bCVS\b
+\bSCCS\b
+,v$
+\B\.svn\b
+\B\.git\b
+\B\.gitignore\b
+\b_darcs\b
+\B\.cvsignore$
+
+# Avoid VMS specific MakeMaker generated files
+\bDescrip.MMS$
+\bDESCRIP.MMS$
+\bdescrip.mms$
+
+# Avoid Makemaker generated and utility files.
+\bMANIFEST\.bak
+\bMakefile$
+\bblib/
+\bMakeMaker-\d
+\bpm_to_blib\.ts$
+\bpm_to_blib$
+\bblibdirs\.ts$         # 6.18 through 6.25 generated this
+
+# Avoid Module::Build generated and utility files.
+\bBuild$
+\b_build/
+\bBuild.bat$
+\bBuild.COM$
+\bBUILD.COM$
+\bbuild.com$
+
+# Avoid temp and backup files.
+~$
+\.old$
+\#$
+\b\.#
+\.bak$
+\.tmp$
+\.#
+\.rej$
+
+# Avoid OS-specific files/dirs
+# Mac OSX metadata
+\B\.DS_Store
+# Mac OSX SMB mount metadata files
+\B\._
+
+# Avoid Devel::Cover and Devel::CoverX::Covered files.
+\bcover_db\b
+\bcovered\b
+ 
+# Avoid MYMETA files
+^MYMETA\.
+#!end included /Users/ruz/perl5/perlbrew/perls/perl-5.16.1/lib/site_perl/5.16.1/ExtUtils/MANIFEST.SKIP
+
+# Avoid configuration metadata file
+^MYMETA\.
+
+# Avoid Module::Build generated and utility files.
+\bBuild$
+\bBuild.bat$
+\b_build
+\bBuild.COM$
+\bBUILD.COM$
+\bbuild.com$
+^MANIFEST\.SKIP
+
+.*\.c$
+.*\.o$

commit cb35f8e27976eb3b99fd0311c3b6baaf0995d26f
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Mon Sep 30 18:35:03 2013 +0400

    initial implementation

diff --git a/lib/HTML/Gumbo.pm b/lib/HTML/Gumbo.pm
new file mode 100644
index 0000000..7bfd539
--- /dev/null
+++ b/lib/HTML/Gumbo.pm
@@ -0,0 +1,276 @@
+use v5.10;
+use strict;
+use warnings;
+
+package HTML::Gumbo;
+
+use Alien::LibGumbo;
+our $VERSION = '0.1';
+
+require XSLoader;
+XSLoader::load('HTML::Gumbo', $VERSION);
+
+=head1 NAME
+
+HTML::Gumbo - HTML5 parser based on gumbo C library
+
+=head1 DESCRIPTION
+
+L<Gumbo|https://github.com/google/gumbo-parser> is an implementation
+of L<the HTML5 parsing algorithm|http://www.w3.org/TR/html5/syntax.html>
+implemented as a pure C99 library with no outside dependencies.
+
+Goals and features of the C library:
+
+=over 4
+
+=item * Fully conformant with the HTML5 spec.
+
+=item * Robust and resilient to bad input.
+
+=item * Simple API that can be easily wrapped by other languages. (This is one of such wrappers.)
+
+=item * Support for source locations and pointers back to the original text.
+(Not exposed by this implementation at the moment.)
+
+=item * Relatively lightweight, with no outside dependencies.
+
+=item * Passes all html5lib-0.95 tests.
+
+=item * Tested on over 2.5 billion pages from Google's index.
+
+=back
+
+=head1 SUPPORTED OUTPUT FORMATS
+
+=head2 string
+
+Beta readiness.
+
+HTML is parsed and re-built from the tree, so tags are balanced
+(except void elements). Since fragments parsing is not supported
+at the moment the result always gets html, head and body elements.
+
+No additional arguments for this format.
+
+    $html = HTML::Gumbo->new->parse( $html );
+
+=head2 callback
+
+Beta readiness.
+
+L<HTML::Parser> like interface. Pass a sub as C<callback> argument to
+L</parse> method and it will be called for every node in the document:
+
+    HTML::Gumbo->new->parse( $html, format => 'callback', callback => sub {
+        my ($event) = shift;
+        if ( $event eq 'document start' ) {
+            my ($doctype) = @_;
+        }
+        elsif ( $event eq 'document end' ) {
+        }
+        elsif ( $event eq 'start' ) {
+            my ($tag, $attrs) = @_;
+        }
+        elsif ( $event eq 'end' ) {
+            my ($tag) = @_;
+        }
+        elsif ( $event eq /^(text|space|cdata|comment)$/ ) {
+            my ($text) = @_;
+        }
+        else {
+            die "Unknown event";
+        }
+    } );
+
+Note that 'end' events are not generated for
+L<void elements|http://www.w3.org/TR/html5/syntax.html#void-elements>,
+for example C<hr>, C<br> and C<img>.
+
+No additional arguments except mentioned C<callback>.
+
+=head2 tree
+
+Alpha stage.
+
+Produces tree based on L<HTML::Element>s, like L<HTML::TreeBuilder>.
+
+There is major difference from HTML::TreeBuilder, this method produces
+top level element with tag name 'document' which may have doctype, comments
+and html tags.
+
+Yes, it's not ready to use as drop in replacement of tree builder. Patches
+are wellcome. I don't use this formatter at the moment.
+
+=head1 CHARACTER ENCODING OF THE INPUT
+
+The C parser works only with UTF-8, so you have several options to make
+sure input is UTF-8. First of all define C<input_is>:
+
+=over 4
+
+=item string
+
+Input is Perl string, for example obtained from L<HTTP::Response/decoded_content>.
+Default value.
+
+=item octets
+
+Input are octets. Partial implementation of
+L<encoding sniffing algorithm|http://www.w3.org/TR/html5/syntax.html#encoding-sniffing-algorithm>
+is used:
+
+=over 4
+
+=item C<encoding> argument
+
+Use it to hardcode a specific encoding.
+
+=item BOM
+
+UTF-8/UTF-16 BOMs are checked.
+
+=item C<encoding_content_type>
+
+Encdoning from rransport layer, charset in content-type header.
+
+=item Prescan
+
+Not implemented, follow L<issue 58|https://github.com/google/gumbo-parser/issues/58>.
+
+HTML5 defines L<prescan algorithm|http://www.w3.org/TR/html5/syntax.html#prescan-a-byte-stream-to-determine-its-encoding>
+that extracts encoding from meta tags in the head.
+
+It would be cool to get it in the C library, but I will accept a patch that impements it in pure perl.
+
+=item C<encoding_tentative> argument
+
+The likely encoding for this page, e.g. based on the encoding of the
+page when it was last visited.
+
+=item nested browsing context
+
+Not implemented. Fragment parsing with or without context is not implemented. Parser
+also has no origin information, so it wouldn't be implemented.
+
+=item autodetection
+
+Not implemented.
+
+Can be implemented using L<Encode::Detect::Detector>. Patches are welcome.
+
+=item otherwise
+
+It B<dies>.
+
+=back
+
+=item C<utf8>
+
+Use utf8 as input_is when you're sure input is UTF-8, but octets.
+No pre-processing at all. Should only be used on trusted input or
+when it's preprocessed already.
+
+=back
+
+=head1 METHODS
+
+=head2 new
+
+    my $parser = HTML::Gumbo->new;
+
+No options at the moment.
+
+=head2 parse
+
+    my $res = $parser->parse(
+        "<h1>hello world!</h1>",
+        format => 'tree',
+        input_is => 'string',
+    );
+
+Takes html string and pairs of named arguments:
+
+=over 4
+
+=item format
+
+Output format, default is string. See L</SUPPORTED OUTPUT FORMATS>.
+
+=item input_is
+
+Whether html is perl 'string', 'octets' or 'utf8' (octets known to
+be utf8). See L</CHARACTER ENCODING OF THE INPUT>.
+
+=item encoding, encoding_content_type, encoding_tentative
+
+See L</CHARACTER ENCODING OF THE INPUT>.
+
+=item ...
+
+Some formatters may have additional arguments.
+
+=back
+
+Return value depends on the picked format.
+
+=cut
+
+sub new {
+    my $proto = shift;
+    return bless {@_}, ref($proto) || $proto;
+}
+
+sub parse {
+    my $self = shift;
+    my $what = shift;
+    my %args = @_;
+
+    my $format = $args{'format'} || 'string';
+    my $method = 'parse_to_'. $format;
+    die "'$format' format is not supported"
+        unless $self->can($method);
+
+    my $input_is = $args{'input_is'} || 'string';
+    if ( $input_is eq 'string' ) {
+        utf8::encode($what);
+    }
+    elsif ( $input_is eq 'utf8' ) {
+    }
+    elsif ( $input_is eq 'octets' ) {
+        my $enc = $args{'encoding'};
+        unless ( $enc ) {
+            if ( $input_is =~ /^(?: (\x{FE}\x{FF}) | (\x{FF}\x{FE}) | \x{EF}\x{BB}\x{BF} )/x ) {
+                $enc = $1 ? 'UTF-16BE' : $2 ? 'UTF-16LE' : 'UTF-8';
+            }
+            elsif ( $enc = $args{'encoding_content_type'} ) {
+            }
+            elsif ( $enc = $args{'encoding_tentative'} ) {
+            }
+            else {
+                die "Encoding detection is not implemented";
+            }
+
+            Encode::from_to($what, $enc, 'UTF-8');
+        }
+    }
+    return $self->$method( \$what, %args );
+}
+
+sub parse_to_callback {
+    my ($self, $buf, %rest) = @_;
+    die "No callback provided" unless $rest{'callback'};
+    return $self->_parse_to_callback( $buf, $rest{'callback'} );
+}
+
+=head1 AUTHOR
+
+Ruslan Zakirov E<lt>ruz at bestpractical.comE<gt>
+
+=head1 LICENSE
+
+Under the same terms as perl itself.
+
+=cut
+
+1;
\ No newline at end of file
diff --git a/lib/HTML/Gumbo.xs b/lib/HTML/Gumbo.xs
new file mode 100644
index 0000000..88a8b5c
--- /dev/null
+++ b/lib/HTML/Gumbo.xs
@@ -0,0 +1,524 @@
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+
+#include "gumbo.h"
+
+#define PHG_IS_VOID_ELEMENT(tag) \
+    (  tag == GUMBO_TAG_AREA \
+    || tag == GUMBO_TAG_BASE \
+    || tag == GUMBO_TAG_BR \
+    || tag == GUMBO_TAG_COL \
+    || tag == GUMBO_TAG_EMBED \
+    || tag == GUMBO_TAG_HR \
+    || tag == GUMBO_TAG_IMG \
+    || tag == GUMBO_TAG_INPUT \
+    || tag == GUMBO_TAG_KEYGEN \
+    || tag == GUMBO_TAG_LINK \
+    || tag == GUMBO_TAG_META \
+    || tag == GUMBO_TAG_PARAM \
+    || tag == GUMBO_TAG_SOURCE \
+    || tag == GUMBO_TAG_TRACK \
+    || tag == GUMBO_TAG_WBR )
+
+#define newSVpvz8(str) \
+    newSVpvn_utf8((str), strlen(str), 1)
+
+#define newSVpvn8(str, len) \
+    newSVpvn_utf8((str), (len), 1)
+
+typedef enum {
+    PHG_ELEMENT_START,
+    PHG_ELEMENT_END,
+    PHG_TEXT
+} PerlHtmlGumboType;
+
+STATIC
+void
+walk_tree(pTHX_ GumboNode* node, void (*cb)(pTHX_ PerlHtmlGumboType, GumboNode*, void*), void* ctx ) {
+    if ( node->type == GUMBO_NODE_DOCUMENT || node->type == GUMBO_NODE_ELEMENT ) {
+        GumboVector* children;
+        (*cb)(aTHX_ PHG_ELEMENT_START, node, ctx);
+        if ( node->type == GUMBO_NODE_DOCUMENT ) {
+            children = &node->v.document.children;
+        } else {
+            children = &node->v.element.children;
+        }
+        if (children) {
+            for (int i = 0; i < children->length; ++i) {
+                walk_tree(aTHX_ children->data[i], cb, ctx);
+            }
+        }
+        (*cb)(aTHX_ PHG_ELEMENT_END, node, ctx);
+    } else {
+        (*cb)(aTHX_ PHG_TEXT, node, ctx);
+    }
+}
+
+STATIC
+GumboStringPiece
+get_tag_name(GumboElement* e) {
+    GumboStringPiece res;
+    if ( e->tag == GUMBO_TAG_UNKNOWN ) {
+        res = e->original_tag;
+        gumbo_tag_from_original_text(&res);
+    } else {
+        res.data = gumbo_normalized_tagname(e->tag);
+        res.length = strlen(res.data);
+    }
+    return res;
+}
+
+STATIC void
+out_attr_value(SV* out, const char* v) {
+    STRLEN prev = 0;
+    STRLEN len = strlen(v);
+    for ( STRLEN i = 0; i < len; i++ ) {
+        if (v[i] != '"' && v[i] != '&' )
+            continue;
+        if (i != prev)
+            sv_catpvn(out, v+prev, i-prev);
+        sv_catpv(out, v[i] == '&'? "&": """);
+        prev = ++i;
+    }
+    if (prev < len)
+        sv_catpvn(out, v+prev, len-prev);
+}
+
+STATIC void
+out_text(SV* out, const char* v) {
+    STRLEN prev = 0;
+    STRLEN len = strlen(v);
+    for ( STRLEN i = 0; i < len; i++ ) {
+        if (v[i] != '<' && v[i] != '>' && v[i] != '&' )
+            continue;
+        if (i != prev)
+            sv_catpvn(out, v+prev, i-prev);
+        sv_catpv(out, v[i] == '&'? "&": (v[i] == '<'? "<" : ">"));
+        prev = ++i;
+    }
+    if (prev < len)
+        sv_catpvn(out, v+prev, len-prev);
+}
+
+STATIC void
+out_tag_start_line(SV* out, GumboElement* e) {
+    GumboStringPiece piece = get_tag_name(e);
+
+    sv_catpvs(out, "<");
+    sv_catpvn(out, piece.data, piece.length);
+    for (int i = 0; i < e->attributes.length; i++) {
+        GumboAttribute* attr = (GumboAttribute*) e->attributes.data[i];
+        sv_catpvs(out, " ");
+        sv_catpv(out, attr->name);
+        if (strlen(attr->value)) {
+            sv_catpvs(out, "=\"");
+            out_attr_value(out, attr->value);
+            sv_catpvs(out, "\"");
+        }
+    }
+    sv_catpvs(out, ">");
+
+    return;
+}
+
+STATIC void
+out_tag_end_line(SV* out, GumboElement* e) {
+    GumboStringPiece piece;
+    if ( PHG_IS_VOID_ELEMENT(e->tag))
+        return;
+
+    sv_catpvs(out, "</");
+    piece = get_tag_name(e);
+    sv_catpvn(out, piece.data, piece.length);
+    sv_catpvs(out, ">");
+
+    return;
+}
+
+STATIC void
+out_doctype_text( SV* out, GumboDocument* doc ) {
+    sv_catpvs(out, "DOCTYPE");
+    if (strlen(doc->name)>0) {
+        sv_catpvs(out, " ");
+        sv_catpv(out, doc->name);
+    }
+    if (strlen(doc->public_identifier)>0) {
+        sv_catpvs(out, " PUBLIC \"");
+        sv_catpv(out, doc->public_identifier);
+        sv_catpvs(out, "\"");
+    }
+    if (strlen(doc->system_identifier)>0) {
+        sv_catpvs(out, " \"");
+        sv_catpv(out, doc->system_identifier);
+        sv_catpvs(out, "\"");
+    }
+}
+
+STATIC void
+out_doctype( SV* out, GumboDocument* doc ) {
+    sv_catpvs(out, "<!");
+    out_doctype_text(out, doc);
+    sv_catpvs(out, ">\n");
+}
+
+STATIC void
+tree_to_string(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
+    SV* out = (SV*) ctx;
+    if ( type == PHG_TEXT ) {
+        if ( node->type == GUMBO_NODE_COMMENT ) {
+            sv_catpvs(out, "<!--");
+        }
+        else if ( node->type == GUMBO_NODE_CDATA ) {
+            sv_catpvs(out, "<![CDATA[");
+        }
+        if ( node->type == GUMBO_NODE_TEXT ) {
+            out_text(out, node->v.text.text);
+        } else {
+            sv_catpv(out, node->v.text.text);
+        }
+        if ( node->type == GUMBO_NODE_COMMENT ) {
+            sv_catpvs(out, "-->");
+        }
+        else if ( node->type == GUMBO_NODE_CDATA ) {
+            sv_catpvs(out, "]]>");
+        }
+    }
+    else if ( type == PHG_ELEMENT_START && node->type == GUMBO_NODE_DOCUMENT ) {
+        GumboDocument* doc = &node->v.document;
+        if ( doc->has_doctype )
+            out_doctype(out, doc);
+    }
+    else if ( type == PHG_ELEMENT_END && node->type == GUMBO_NODE_DOCUMENT ) {
+        sv_catpvs(out, "\n");
+    }
+    else if ( type == PHG_ELEMENT_START ) {
+        GumboElement* e = &node->v.element;
+        out_tag_start_line(out, e);
+        if ( e->tag == GUMBO_TAG_PRE || e->tag == GUMBO_TAG_TEXTAREA ) {
+            sv_catpvs(out, "\n");
+        }
+    }
+    else if ( type == PHG_ELEMENT_END ) {
+        GumboElement* e = &node->v.element;
+        out_tag_end_line(out, e);
+    }
+    return;
+}
+
+STATIC SV*
+new_html_element(pTHX_ GumboNode* node) {
+    dSP;
+    SV* res;
+    int rcount;
+
+    ENTER;
+    SAVETMPS;
+    PUSHMARK(SP);
+    mXPUSHs(newSVpvs("HTML::Element"));
+    if ( node->type == GUMBO_NODE_DOCUMENT ) {
+        mXPUSHs(newSVpvs("document"));
+    }
+    else if ( node->type == GUMBO_NODE_ELEMENT ) {
+        GumboVector* attrs = &node->v.element.attributes;
+        GumboStringPiece tag = get_tag_name(&node->v.element);
+        mXPUSHs(newSVpvn8( tag.data, tag.length ));
+        for (int i = 0; i < attrs->length; i++) {
+            GumboAttribute* attr = (GumboAttribute*) attrs->data[i];
+            mXPUSHs(newSVpvz8( attr->name ));
+            mXPUSHs(newSVpvz8( attr->value ));
+        }
+    }
+    else if ( node->type == GUMBO_NODE_COMMENT ) {
+        mXPUSHs(newSVpvs("~comment"));
+        mXPUSHs(newSVpvs("text"));
+        mXPUSHs(newSVpvz8( node->v.text.text ));
+    }
+    else {
+        croak("Unknown node type");
+    }
+    PUTBACK;
+
+    rcount = call_method("new", G_SCALAR);
+
+    SPAGAIN;
+
+    if (rcount != 1) croak("Big trouble\n");
+
+    res = SvREFCNT_inc_NN(POPs);
+    PUTBACK;
+
+    FREETMPS;
+    LEAVE;
+
+    return res;
+}
+
+STATIC SV*
+new_html_element_doctype(pTHX_ GumboDocument* doc) {
+    dSP;
+    SV* res;
+    SV* doctype;
+    int rcount;
+
+    ENTER;
+    SAVETMPS;
+    PUSHMARK(SP);
+    mXPUSHs(newSVpvs("HTML::Element"));
+    mXPUSHs(newSVpvs("~declaration"));
+    mXPUSHs(newSVpvs("text"));
+    doctype = newSVpvn8( "",0 );
+    out_doctype_text(doctype, doc);
+    mXPUSHs(doctype);
+    PUTBACK;
+
+    rcount = call_method("new", G_SCALAR);
+
+    SPAGAIN;
+
+    if (rcount != 1) croak("Big trouble\n");
+
+    res = SvREFCNT_inc_NN(POPs);
+    PUTBACK;
+
+    FREETMPS;
+    LEAVE;
+
+    return res;
+}
+
+
+STATIC void
+push_element(pTHX_ SV* parent, SV* element) {
+    dSP;
+
+    ENTER;
+    SAVETMPS;
+    PUSHMARK(SP);
+    XPUSHs(parent);
+    XPUSHs(element);
+    PUTBACK;
+
+    call_method("push_content", G_DISCARD);
+
+    FREETMPS;
+    LEAVE;
+}
+
+STATIC void
+push_text_element(pTHX_ SV* parent, const char *const s, const STRLEN len) {
+    dSP;
+
+    ENTER;
+    SAVETMPS;
+    PUSHMARK(SP);
+    XPUSHs(parent);
+    mXPUSHs(newSVpv(s, len));
+    PUTBACK;
+
+    call_method("push_content", G_DISCARD);
+
+    FREETMPS;
+    LEAVE;
+}
+
+STATIC SV*
+get_element_parent(pTHX_ SV* element) {
+    dSP;
+    SV* res;
+    int rcount;
+
+    ENTER;
+    SAVETMPS;
+    PUSHMARK(SP);
+    XPUSHs(element);
+    PUTBACK;
+
+    rcount = call_method("parent", G_SCALAR);
+
+    SPAGAIN;
+
+    if (rcount != 1) croak("Big trouble\n");
+
+    res = SvREFCNT_inc_NN(POPs);
+    PUTBACK;
+
+    FREETMPS;
+    LEAVE;
+
+    return res;
+}
+
+STATIC void
+tree_to_tree(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
+    SV** out = (SV**) ctx;
+    if ( type == PHG_TEXT ) {
+        if ( node->type == GUMBO_NODE_COMMENT ) {
+            SV* element = new_html_element(aTHX_ node);
+            push_element(*out, element);
+            SvREFCNT_dec(element);
+        } else {
+            push_text_element(*out, node->v.text.text, 0);
+        }
+    }
+    else if ( type == PHG_ELEMENT_START && node->type == GUMBO_NODE_DOCUMENT ) {
+        GumboDocument* doc = &node->v.document;
+        *out = new_html_element(aTHX_ node);
+        sv_2mortal(*out);
+        if ( doc->has_doctype ) {
+            SV* element = new_html_element_doctype(aTHX_ doc);
+            push_element(*out, element);
+            SvREFCNT_dec(element);
+        }
+    }
+    else if ( type == PHG_ELEMENT_END && node->type == GUMBO_NODE_DOCUMENT ) {
+    }
+    else if ( type == PHG_ELEMENT_START ) {
+        SV* element = new_html_element(aTHX_ node);
+        push_element(*out, element);
+        *out = element;
+    }
+    else if ( type == PHG_ELEMENT_END ) {
+        SV* parent = get_element_parent(aTHX_ *out);
+        SvREFCNT_dec(*out);
+        *out = parent;
+    }
+    return;
+}
+
+STATIC void
+tree_to_callback(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
+    dSP;
+    SV* cb = (SV*) ctx;
+
+    if ( type == PHG_ELEMENT_END && PHG_IS_VOID_ELEMENT(node->v.element.tag) )
+        return;
+
+    ENTER;
+    SAVETMPS;
+
+    PUSHMARK(SP);
+    if ( type == PHG_TEXT ) {
+        switch ( node->type ) {
+            case GUMBO_NODE_TEXT:
+                mXPUSHs(newSVpvs("text"));break;
+            case GUMBO_NODE_WHITESPACE:
+                mXPUSHs(newSVpvs("space"));break;
+            case GUMBO_NODE_CDATA:
+                mXPUSHs(newSVpvs("cdata"));break;
+            case GUMBO_NODE_COMMENT:
+                mXPUSHs(newSVpvs("comment"));break;
+            default:
+                croak("Unknown node type");
+        }
+        mXPUSHs(newSVpvz8( node->v.text.text ));
+    }
+    else if ( type == PHG_ELEMENT_START && node->type == GUMBO_NODE_DOCUMENT ) {
+        GumboDocument* doc = &node->v.document;
+        mXPUSHs(newSVpvs("document start"));
+        if ( doc->has_doctype ) {
+            HV* h = newHV();
+            mXPUSHs(newRV_noinc(MUTABLE_SV(h)));
+            (void)hv_stores(h, "name", newSVpvz8( doc->name ));
+            (void)hv_stores(h, "public", newSVpvz8( doc->public_identifier ));
+            (void)hv_stores(h, "system", newSVpvz8( doc->system_identifier ));
+        } else {
+            mXPUSHs(&PL_sv_undef);
+        }
+    }
+    else if ( type == PHG_ELEMENT_END && node->type == GUMBO_NODE_DOCUMENT ) {
+        mXPUSHs(newSVpvs("document end"));
+    }
+    else if ( type == PHG_ELEMENT_START ) {
+        GumboVector* attrs = &node->v.element.attributes;
+        GumboStringPiece tag = get_tag_name(&node->v.element);
+        AV* for_attrs = newAV();
+
+        mXPUSHs(newSVpvs("start"));
+        mXPUSHs(newSVpvn8( tag.data, tag.length ));
+        mXPUSHs(newRV_noinc(MUTABLE_SV(for_attrs)));
+        for (int i = 0; i < attrs->length; i++) {
+            GumboAttribute* attr = (GumboAttribute*) attrs->data[i];
+            av_push(for_attrs, newSVpvz8( attr->name ));
+            av_push(for_attrs, newSVpvz8( attr->value ));
+        }
+    }
+    else if ( type == PHG_ELEMENT_END ) {
+        GumboStringPiece tag = get_tag_name(&node->v.element);
+        mXPUSHs(newSVpvs("end"));
+        mXPUSHs(newSVpvn8( tag.data, tag.length ));
+    }
+
+    PUTBACK;
+
+    call_sv(cb, G_DISCARD);
+
+    FREETMPS;
+    LEAVE;
+
+    return;
+}
+
+STATIC
+char* prepare_buffer(SV* buffer) {
+    if(!SvROK(buffer))
+        Perl_croak("First argument is not a reference");
+
+    buffer = SvRV(buffer);
+    return SvPV_nolen(buffer);
+}
+
+MODULE = HTML::Gumbo    PACKAGE = HTML::Gumbo
+
+SV*
+parse_to_string(self, buffer, ...)
+    SV *self
+    SV *buffer
+
+    CODE:
+        const char* str = prepare_buffer(buffer);
+
+        RETVAL = newSVpvn8("", 0);
+
+        GumboOutput* output = gumbo_parse(str);
+        walk_tree(aTHX_ output->document, tree_to_string, (void*)RETVAL);
+        gumbo_destroy_output(&kGumboDefaultOptions, output);
+
+    OUTPUT: RETVAL
+
+SV*
+parse_to_tree(self, buffer, ...)
+    SV *self
+    SV *buffer
+
+    CODE:
+        const char* str;
+        load_module(
+            0,
+            newSVpvs("HTML::TreeBuilder"),
+            newSViv(5), newSVpvs("-weak"), NULL
+        );
+        str = prepare_buffer(buffer);
+
+        SV* res;
+        GumboOutput* output = gumbo_parse(str);
+        walk_tree(aTHX_ output->document, tree_to_tree, (void*)(&res));
+        gumbo_destroy_output(&kGumboDefaultOptions, output);
+        RETVAL = res;
+
+    OUTPUT: RETVAL
+
+void
+_parse_to_callback(self, buffer, cb, ...)
+    SV *self
+    SV *buffer
+    SV *cb
+
+    CODE:
+        const char* str = prepare_buffer(buffer);
+
+        GumboOutput* output = gumbo_parse(str);
+        walk_tree(aTHX_ output->document, tree_to_callback, (void*)cb);
+        gumbo_destroy_output(&kGumboDefaultOptions, output);
+
+        XSRETURN_YES;
diff --git a/t/callback.t b/t/callback.t
new file mode 100644
index 0000000..7e786d1
--- /dev/null
+++ b/t/callback.t
@@ -0,0 +1,39 @@
+use strict;
+use warnings;
+use Test::More;
+
+use_ok('HTML::Gumbo');
+
+my $parser = HTML::Gumbo->new;
+my $input = <<'END';
+<!DOCTYPE html>
+<!--This is a comment-->
+<h1>hello world!</h1>
+<img disabled boo="foo" />
+END
+my @expected = (
+    ['document start', {name => 'html', public => '', system => ''}],
+    ['comment', 'This is a comment'],
+    ['start', 'html', []],
+    ['start', 'head', []],
+    ['end', 'head'],
+    ['start', 'body', []],
+
+    ['start', 'h1', []],
+    ['text', 'hello world!'],
+    ['end', 'h1'],
+    ['space', "\n"],
+
+    ['start', 'img', [disabled => "", boo => "foo"]],
+    ['space', "\n"],
+
+    ['end', 'body'],
+    ['end', 'html'],
+    ['document end'],
+);
+my @got;
+my $res = $parser->parse($input, format => 'callback', callback => sub {
+    push @got, [@_];
+});
+
+done_testing();
diff --git a/t/string.t b/t/string.t
new file mode 100644
index 0000000..c254072
--- /dev/null
+++ b/t/string.t
@@ -0,0 +1,75 @@
+use strict;
+use warnings;
+use utf8;
+use Test::More;
+
+use_ok('HTML::Gumbo');
+
+my $parser = HTML::Gumbo->new;
+my $input = <<'END';
+<!DOCTYPE html>
+<!--This is a comment-->
+<h1>hello world!</h1>
+<div class="test">
+  <p>first para
+  <p>second
+</div>
+<div>
+  <img />
+  <img alt="©">
+  <img></img>
+</div>
+<some>
+END
+my $expected = <<'END';
+<!DOCTYPE html>
+<!--This is a comment--><html><head></head><body><h1>hello world!</h1>
+<div class="test">
+  <p>first para
+  </p><p>second
+</p></div>
+<div>
+  <img>
+  <img alt="©">
+  <img>
+</div>
+<some>
+</some></body></html>
+END
+my $res = $parser->parse($input);
+is $res, $expected, 'very basic test';
+
+$input = <<'END';
+<div class=""•&bull;&"><p></div>
+END
+$expected = <<'END';
+<html><head></head><body><div class=""•&bull;&"><p></div>
+</body></html>
+END
+$res = $parser->parse($input);
+is $res, $expected, 'very basic test';
+
+$input = <<'END';
+<pre>foo</pre>
+<pre>
+foo</pre>
+<pre>
+
+foo</pre>
+END
+$expected = <<'END';
+<html><head></head><body><pre>
+foo</pre>
+<pre>
+foo</pre>
+<pre>
+
+foo</pre>
+</body></html>
+END
+$res = $parser->parse($input);
+is $res, $expected, 'very basic test';
+
+
+
+done_testing();
diff --git a/t/tree.t b/t/tree.t
new file mode 100644
index 0000000..ec795fd
--- /dev/null
+++ b/t/tree.t
@@ -0,0 +1,21 @@
+use strict;
+use warnings;
+use Test::More;
+
+use_ok('HTML::Gumbo');
+
+my $parser = HTML::Gumbo->new;
+my $res = $parser->parse(<<'END', format => 'tree');
+<!DOCTYPE html>
+<!--This is a comment-->
+<h1>hello world!</h1>
+END
+
+my $expected = <<'END';
+<document><!DOCTYPE html><!--This is a comment--><html><head></head><body><h1>hello world!</h1>
+</body></html></document>
+END
+chomp $expected;
+is $res->as_HTML, $expected, 'correct value';
+
+done_testing();

commit 911c69b7d1530a438f4a7c0ab33f702982cdb0f6
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Mon Sep 30 18:36:45 2013 +0400

    .gitignore

diff --git a/.gitignore b/.gitignore
index fd33f3d..975f66a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,4 @@ nytprof/
 *.tar.gz
 *.sw[po]
 *.bak
+*.[co]

commit 09a44ddf835b6dcf2349ae8eb4cb33a8bf53474b
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Wed Oct 9 11:11:47 2013 +0400

    require LibGumbo

diff --git a/Build.PL b/Build.PL
index cba75d6..4634b02 100644
--- a/Build.PL
+++ b/Build.PL
@@ -11,6 +11,9 @@ my $builder = Module::Build->new(
     build_requires => {
         'ExtUtils::CBuilder' => 0,
     },
+    requires => {
+        'Alien::LibGumbo' => 0,
+    },
 
     extra_compiler_flags => $alien->cflags(),
     extra_linker_flags   => $alien->libs(),

commit 680f5e928748c534d2f8e139076a9d3f71045413
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Wed Oct 9 11:12:11 2013 +0400

    update manifest

diff --git a/MANIFEST b/MANIFEST
index 410bd95..1cd61be 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -5,3 +5,5 @@ MANIFEST			This list of files
 t/callback.t
 t/string.t
 t/tree.t
+META.yml
+META.json

commit 5f13a58c90f72bfe1700eab9e0418fed7fa9302c
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Wed Oct 9 11:12:41 2013 +0400

    skip a test if HTML::TreeBuilder is not installed

diff --git a/t/tree.t b/t/tree.t
index ec795fd..1d9fa7d 100644
--- a/t/tree.t
+++ b/t/tree.t
@@ -2,6 +2,10 @@ use strict;
 use warnings;
 use Test::More;
 
+unless ( do { local $@; eval "use HTML::TreeBuilder 5 -weak; 1" } } ) {
+    plan skip_all => 'No HTML::TreeBuilder 5 -weak';
+}
+
 use_ok('HTML::Gumbo');
 
 my $parser = HTML::Gumbo->new;

commit c323f0a147cfab3222107ec28798fc5700c05f58
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Wed Oct 9 11:13:42 2013 +0400

    add metafiles

diff --git a/META.json b/META.json
new file mode 100644
index 0000000..85b0296
--- /dev/null
+++ b/META.json
@@ -0,0 +1,42 @@
+{
+   "abstract" : "HTML5 parser based on gumbo C library",
+   "author" : [
+      "Ruslan Zakirov E<lt>ruz at bestpractical.comE<gt>"
+   ],
+   "dynamic_config" : 1,
+   "generated_by" : "Module::Build version 0.4007, CPAN::Meta::Converter version 2.131560",
+   "license" : [
+      "unknown"
+   ],
+   "meta-spec" : {
+      "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
+      "version" : "2"
+   },
+   "name" : "HTML-Gumbo",
+   "prereqs" : {
+      "build" : {
+         "requires" : {
+            "ExtUtils::CBuilder" : "0"
+         }
+      },
+      "configure" : {
+         "requires" : {
+            "Alien::LibGumbo" : "0",
+            "Module::Build" : "0.40"
+         }
+      },
+      "runtime" : {
+         "requires" : {
+            "Alien::LibGumbo" : "0"
+         }
+      }
+   },
+   "provides" : {
+      "HTML::Gumbo" : {
+         "file" : "lib/HTML/Gumbo.pm",
+         "version" : "0.1"
+      }
+   },
+   "release_status" : "stable",
+   "version" : "0.1"
+}
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..3905a45
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,23 @@
+---
+abstract: 'HTML5 parser based on gumbo C library'
+author:
+  - 'Ruslan Zakirov E<lt>ruz at bestpractical.comE<gt>'
+build_requires:
+  ExtUtils::CBuilder: 0
+configure_requires:
+  Alien::LibGumbo: 0
+  Module::Build: 0.40
+dynamic_config: 1
+generated_by: 'Module::Build version 0.4007, CPAN::Meta::Converter version 2.131560'
+license: unknown
+meta-spec:
+  url: http://module-build.sourceforge.net/META-spec-v1.4.html
+  version: 1.4
+name: HTML-Gumbo
+provides:
+  HTML::Gumbo:
+    file: lib/HTML/Gumbo.pm
+    version: 0.1
+requires:
+  Alien::LibGumbo: 0
+version: 0.1

commit 156464438fe213e3ea3de3dc6f52c7fc026624f5
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Wed Oct 9 11:12:41 2013 +0400

    skip a test if HTML::TreeBuilder is not installed

diff --git a/t/tree.t b/t/tree.t
index ec795fd..be8a409 100644
--- a/t/tree.t
+++ b/t/tree.t
@@ -2,6 +2,10 @@ use strict;
 use warnings;
 use Test::More;
 
+unless ( do { local $@; eval "use HTML::TreeBuilder 5 -weak; 1" } ) {
+    plan skip_all => 'No HTML::TreeBuilder 5 -weak';
+}
+
 use_ok('HTML::Gumbo');
 
 my $parser = HTML::Gumbo->new;

commit fe964496fd59a569d8fd6c4cb2b08023aba2c555
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Wed Oct 9 11:13:42 2013 +0400

    add metafiles

diff --git a/META.json b/META.json
new file mode 100644
index 0000000..85b0296
--- /dev/null
+++ b/META.json
@@ -0,0 +1,42 @@
+{
+   "abstract" : "HTML5 parser based on gumbo C library",
+   "author" : [
+      "Ruslan Zakirov E<lt>ruz at bestpractical.comE<gt>"
+   ],
+   "dynamic_config" : 1,
+   "generated_by" : "Module::Build version 0.4007, CPAN::Meta::Converter version 2.131560",
+   "license" : [
+      "unknown"
+   ],
+   "meta-spec" : {
+      "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
+      "version" : "2"
+   },
+   "name" : "HTML-Gumbo",
+   "prereqs" : {
+      "build" : {
+         "requires" : {
+            "ExtUtils::CBuilder" : "0"
+         }
+      },
+      "configure" : {
+         "requires" : {
+            "Alien::LibGumbo" : "0",
+            "Module::Build" : "0.40"
+         }
+      },
+      "runtime" : {
+         "requires" : {
+            "Alien::LibGumbo" : "0"
+         }
+      }
+   },
+   "provides" : {
+      "HTML::Gumbo" : {
+         "file" : "lib/HTML/Gumbo.pm",
+         "version" : "0.1"
+      }
+   },
+   "release_status" : "stable",
+   "version" : "0.1"
+}
diff --git a/META.yml b/META.yml
new file mode 100644
index 0000000..3905a45
--- /dev/null
+++ b/META.yml
@@ -0,0 +1,23 @@
+---
+abstract: 'HTML5 parser based on gumbo C library'
+author:
+  - 'Ruslan Zakirov E<lt>ruz at bestpractical.comE<gt>'
+build_requires:
+  ExtUtils::CBuilder: 0
+configure_requires:
+  Alien::LibGumbo: 0
+  Module::Build: 0.40
+dynamic_config: 1
+generated_by: 'Module::Build version 0.4007, CPAN::Meta::Converter version 2.131560'
+license: unknown
+meta-spec:
+  url: http://module-build.sourceforge.net/META-spec-v1.4.html
+  version: 1.4
+name: HTML-Gumbo
+provides:
+  HTML::Gumbo:
+    file: lib/HTML/Gumbo.pm
+    version: 0.1
+requires:
+  Alien::LibGumbo: 0
+version: 0.1

commit ebe02f444de8827ba670dbca133df57e4c5835a5
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Wed Oct 9 11:47:01 2013 +0400

    compilation problems
    
    * forgot aTHX_ in a few places
    * for(int i = 0; ...) -> int i; for(i=0;...)

diff --git a/lib/HTML/Gumbo.xs b/lib/HTML/Gumbo.xs
index 88a8b5c..dca4411 100644
--- a/lib/HTML/Gumbo.xs
+++ b/lib/HTML/Gumbo.xs
@@ -45,7 +45,8 @@ walk_tree(pTHX_ GumboNode* node, void (*cb)(pTHX_ PerlHtmlGumboType, GumboNode*,
             children = &node->v.element.children;
         }
         if (children) {
-            for (int i = 0; i < children->length; ++i) {
+            int i = 0;
+            for (i = 0; i < children->length; ++i) {
                 walk_tree(aTHX_ children->data[i], cb, ctx);
             }
         }
@@ -71,9 +72,10 @@ get_tag_name(GumboElement* e) {
 
 STATIC void
 out_attr_value(SV* out, const char* v) {
+    STRLEN i;
     STRLEN prev = 0;
     STRLEN len = strlen(v);
-    for ( STRLEN i = 0; i < len; i++ ) {
+    for ( i = 0; i < len; i++ ) {
         if (v[i] != '"' && v[i] != '&' )
             continue;
         if (i != prev)
@@ -87,9 +89,10 @@ out_attr_value(SV* out, const char* v) {
 
 STATIC void
 out_text(SV* out, const char* v) {
+    STRLEN i;
     STRLEN prev = 0;
     STRLEN len = strlen(v);
-    for ( STRLEN i = 0; i < len; i++ ) {
+    for ( i = 0; i < len; i++ ) {
         if (v[i] != '<' && v[i] != '>' && v[i] != '&' )
             continue;
         if (i != prev)
@@ -103,11 +106,12 @@ out_text(SV* out, const char* v) {
 
 STATIC void
 out_tag_start_line(SV* out, GumboElement* e) {
+    int i;
     GumboStringPiece piece = get_tag_name(e);
 
     sv_catpvs(out, "<");
     sv_catpvn(out, piece.data, piece.length);
-    for (int i = 0; i < e->attributes.length; i++) {
+    for (i = 0; i < e->attributes.length; i++) {
         GumboAttribute* attr = (GumboAttribute*) e->attributes.data[i];
         sv_catpvs(out, " ");
         sv_catpv(out, attr->name);
@@ -220,10 +224,11 @@ new_html_element(pTHX_ GumboNode* node) {
         mXPUSHs(newSVpvs("document"));
     }
     else if ( node->type == GUMBO_NODE_ELEMENT ) {
+        int i;
         GumboVector* attrs = &node->v.element.attributes;
         GumboStringPiece tag = get_tag_name(&node->v.element);
         mXPUSHs(newSVpvn8( tag.data, tag.length ));
-        for (int i = 0; i < attrs->length; i++) {
+        for (i = 0; i < attrs->length; i++) {
             GumboAttribute* attr = (GumboAttribute*) attrs->data[i];
             mXPUSHs(newSVpvz8( attr->name ));
             mXPUSHs(newSVpvz8( attr->value ));
@@ -355,7 +360,7 @@ tree_to_tree(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
     if ( type == PHG_TEXT ) {
         if ( node->type == GUMBO_NODE_COMMENT ) {
             SV* element = new_html_element(aTHX_ node);
-            push_element(*out, element);
+            push_element(aTHX_ *out, element);
             SvREFCNT_dec(element);
         } else {
             push_text_element(*out, node->v.text.text, 0);
@@ -367,7 +372,7 @@ tree_to_tree(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
         sv_2mortal(*out);
         if ( doc->has_doctype ) {
             SV* element = new_html_element_doctype(aTHX_ doc);
-            push_element(*out, element);
+            push_element(aTHX_ *out, element);
             SvREFCNT_dec(element);
         }
     }
@@ -375,7 +380,7 @@ tree_to_tree(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
     }
     else if ( type == PHG_ELEMENT_START ) {
         SV* element = new_html_element(aTHX_ node);
-        push_element(*out, element);
+        push_element(aTHX_ *out, element);
         *out = element;
     }
     else if ( type == PHG_ELEMENT_END ) {
@@ -430,6 +435,7 @@ tree_to_callback(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
         mXPUSHs(newSVpvs("document end"));
     }
     else if ( type == PHG_ELEMENT_START ) {
+        int i;
         GumboVector* attrs = &node->v.element.attributes;
         GumboStringPiece tag = get_tag_name(&node->v.element);
         AV* for_attrs = newAV();
@@ -437,7 +443,7 @@ tree_to_callback(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
         mXPUSHs(newSVpvs("start"));
         mXPUSHs(newSVpvn8( tag.data, tag.length ));
         mXPUSHs(newRV_noinc(MUTABLE_SV(for_attrs)));
-        for (int i = 0; i < attrs->length; i++) {
+        for (i = 0; i < attrs->length; i++) {
             GumboAttribute* attr = (GumboAttribute*) attrs->data[i];
             av_push(for_attrs, newSVpvz8( attr->name ));
             av_push(for_attrs, newSVpvz8( attr->value ));
@@ -460,9 +466,9 @@ tree_to_callback(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
 }
 
 STATIC
-char* prepare_buffer(SV* buffer) {
+char* prepare_buffer(pTHX_ SV* buffer) {
     if(!SvROK(buffer))
-        Perl_croak("First argument is not a reference");
+        croak("First argument is not a reference");
 
     buffer = SvRV(buffer);
     return SvPV_nolen(buffer);
@@ -476,7 +482,7 @@ parse_to_string(self, buffer, ...)
     SV *buffer
 
     CODE:
-        const char* str = prepare_buffer(buffer);
+        const char* str = prepare_buffer(aTHX_ buffer);
 
         RETVAL = newSVpvn8("", 0);
 
@@ -498,7 +504,7 @@ parse_to_tree(self, buffer, ...)
             newSVpvs("HTML::TreeBuilder"),
             newSViv(5), newSVpvs("-weak"), NULL
         );
-        str = prepare_buffer(buffer);
+        str = prepare_buffer(aTHX_ buffer);
 
         SV* res;
         GumboOutput* output = gumbo_parse(str);
@@ -515,7 +521,7 @@ _parse_to_callback(self, buffer, cb, ...)
     SV *cb
 
     CODE:
-        const char* str = prepare_buffer(buffer);
+        const char* str = prepare_buffer(aTHX_ buffer);
 
         GumboOutput* output = gumbo_parse(str);
         walk_tree(aTHX_ output->document, tree_to_callback, (void*)cb);

commit 4a11e5d268804bce8a137b67ea9f3d8a8927ad9a
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Wed Oct 9 11:51:44 2013 +0400

    bump version, 0.11

diff --git a/META.json b/META.json
index 85b0296..155104a 100644
--- a/META.json
+++ b/META.json
@@ -34,9 +34,9 @@
    "provides" : {
       "HTML::Gumbo" : {
          "file" : "lib/HTML/Gumbo.pm",
-         "version" : "0.1"
+         "version" : "0.11"
       }
    },
    "release_status" : "stable",
-   "version" : "0.1"
+   "version" : "0.11"
 }
diff --git a/META.yml b/META.yml
index 3905a45..5f8fa14 100644
--- a/META.yml
+++ b/META.yml
@@ -17,7 +17,7 @@ name: HTML-Gumbo
 provides:
   HTML::Gumbo:
     file: lib/HTML/Gumbo.pm
-    version: 0.1
+    version: 0.11
 requires:
   Alien::LibGumbo: 0
-version: 0.1
+version: 0.11
diff --git a/lib/HTML/Gumbo.pm b/lib/HTML/Gumbo.pm
index 7bfd539..a0ceb07 100644
--- a/lib/HTML/Gumbo.pm
+++ b/lib/HTML/Gumbo.pm
@@ -5,7 +5,7 @@ use warnings;
 package HTML::Gumbo;
 
 use Alien::LibGumbo;
-our $VERSION = '0.1';
+our $VERSION = '0.11';
 
 require XSLoader;
 XSLoader::load('HTML::Gumbo', $VERSION);

commit e1efca454a5e593f0cadcccfd9107cb0af2d1de7
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Wed Oct 9 22:52:28 2013 +0400

    replace MUTABLE_*V(x) with explicit casting
    
    This set of macroses was introduced in perl 5.11
    and at this moment is not supported by Devel::PPPort[1].
    Not going to shave this yak for two uses of the macro.
    
    [1] https://rt.cpan.org/Public/Bug/Display.html?id=80476

diff --git a/lib/HTML/Gumbo.xs b/lib/HTML/Gumbo.xs
index dca4411..f450969 100644
--- a/lib/HTML/Gumbo.xs
+++ b/lib/HTML/Gumbo.xs
@@ -423,7 +423,7 @@ tree_to_callback(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
         mXPUSHs(newSVpvs("document start"));
         if ( doc->has_doctype ) {
             HV* h = newHV();
-            mXPUSHs(newRV_noinc(MUTABLE_SV(h)));
+            mXPUSHs(newRV_noinc((SV*)h));
             (void)hv_stores(h, "name", newSVpvz8( doc->name ));
             (void)hv_stores(h, "public", newSVpvz8( doc->public_identifier ));
             (void)hv_stores(h, "system", newSVpvz8( doc->system_identifier ));
@@ -442,7 +442,7 @@ tree_to_callback(pTHX_ PerlHtmlGumboType type, GumboNode* node, void* ctx) {
 
         mXPUSHs(newSVpvs("start"));
         mXPUSHs(newSVpvn8( tag.data, tag.length ));
-        mXPUSHs(newRV_noinc(MUTABLE_SV(for_attrs)));
+        mXPUSHs(newRV_noinc((SV*)for_attrs));
         for (i = 0; i < attrs->length; i++) {
             GumboAttribute* attr = (GumboAttribute*) attrs->data[i];
             av_push(for_attrs, newSVpvz8( attr->name ));

commit a9c39575193d6731630e49a61ba297a297b866b6
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Wed Oct 9 22:55:52 2013 +0400

    bump version, 0.12

diff --git a/META.json b/META.json
index 155104a..1917b5c 100644
--- a/META.json
+++ b/META.json
@@ -34,7 +34,7 @@
    "provides" : {
       "HTML::Gumbo" : {
          "file" : "lib/HTML/Gumbo.pm",
-         "version" : "0.11"
+         "version" : "0.12"
       }
    },
    "release_status" : "stable",
diff --git a/META.yml b/META.yml
index 5f8fa14..5868cf5 100644
--- a/META.yml
+++ b/META.yml
@@ -17,7 +17,7 @@ name: HTML-Gumbo
 provides:
   HTML::Gumbo:
     file: lib/HTML/Gumbo.pm
-    version: 0.11
+    version: 0.12
 requires:
   Alien::LibGumbo: 0
 version: 0.11
diff --git a/lib/HTML/Gumbo.pm b/lib/HTML/Gumbo.pm
index a0ceb07..43b1c37 100644
--- a/lib/HTML/Gumbo.pm
+++ b/lib/HTML/Gumbo.pm
@@ -5,7 +5,7 @@ use warnings;
 package HTML::Gumbo;
 
 use Alien::LibGumbo;
-our $VERSION = '0.11';
+our $VERSION = '0.12';
 
 require XSLoader;
 XSLoader::load('HTML::Gumbo', $VERSION);

commit 92d5c908de18d20358348087c6a9e17a2e31d5f4
Merge: a9c3957 c323f0a
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Wed Oct 9 23:02:28 2013 +0400

    Merge remote-tracking branch 'origin/master'
    
    Conflicts:
    	META.json
    	META.yml
    	t/tree.t


commit d349f7c9cea42f57347fd14b7d856ec670028539
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Wed Oct 9 23:03:19 2013 +0400

    update meta

diff --git a/META.json b/META.json
index 1917b5c..535af14 100644
--- a/META.json
+++ b/META.json
@@ -38,5 +38,5 @@
       }
    },
    "release_status" : "stable",
-   "version" : "0.11"
+   "version" : "0.12"
 }
diff --git a/META.yml b/META.yml
index 5868cf5..c1c8e2e 100644
--- a/META.yml
+++ b/META.yml
@@ -20,4 +20,4 @@ provides:
     version: 0.12
 requires:
   Alien::LibGumbo: 0
-version: 0.11
+version: 0.12

commit b9472f65048425462b14121da6cae3c131caed1e
Author: Neil Bowers <neil at bowers.com>
Date:   Wed Oct 9 20:57:16 2013 +0100

    Added link to github repo

diff --git a/Build.PL b/Build.PL
index 4634b02..17388e4 100644
--- a/Build.PL
+++ b/Build.PL
@@ -15,7 +15,13 @@ my $builder = Module::Build->new(
         'Alien::LibGumbo' => 0,
     },
 
+    meta_merge => {
+        resources => {
+            repository => 'https://github.com/ruz/HTML-Gumbo'
+        }
+    },
+
     extra_compiler_flags => $alien->cflags(),
     extra_linker_flags   => $alien->libs(),
 );
-$builder->create_build_script;
\ No newline at end of file
+$builder->create_build_script;

commit 3364cd4e1f47b19dce0981fc03ab9519f66c0d8e
Author: Neil Bowers <neil at bowers.com>
Date:   Wed Oct 9 21:00:07 2013 +0100

    Initial skeleton Changes file

diff --git a/Changes b/Changes
new file mode 100644
index 0000000..2b77299
--- /dev/null
+++ b/Changes
@@ -0,0 +1,10 @@
+Revision history for Perl module HTML::Gumbo
+
+0.12 2013-10-09
+
+0.11 2013-10-09
+
+0.1 2013-09-30
+
+    - first release to CPAN
+

commit 296240499d9c4518b65dcf94bdf25dc3e7fec565
Merge: d349f7c 3364cd4
Author: Ruslan Zakirov <Ruslan.Zakirov at gmail.com>
Date:   Wed Oct 9 22:29:00 2013 -0700

    Merge pull request #1 from neilbowers/master
    
    Added Changes file and ensure repo link in metadata


commit 9207c21cf501300b224d5635abd2b66a3be9af6c
Author: Ruslan Zakirov <ruz at bestpractical.com>
Date:   Thu Oct 10 09:33:40 2013 +0400

    update changelog

diff --git a/Changes b/Changes
index 2b77299..0ab4c56 100644
--- a/Changes
+++ b/Changes
@@ -2,8 +2,14 @@ Revision history for Perl module HTML::Gumbo
 
 0.12 2013-10-09
 
+    - perl 5.10 compatibility
+
 0.11 2013-10-09
 
+    - use c89 scoping of iterator in for loops
+    - fixes for threaded perls
+    - skip test if HTML::TreeBuilder is not installed
+
 0.1 2013-09-30
 
     - first release to CPAN

-----------------------------------------------------------------------


More information about the Bps-public-commit mailing list