[Bps-public-commit] wifty branch, master, updated. 92714a46dbce2c18670d1bb88a1165edf6367056

Thomas Sibley trs at bestpractical.com
Tue Nov 2 13:24:58 EDT 2010


The branch, master has been updated
       via  92714a46dbce2c18670d1bb88a1165edf6367056 (commit)
      from  6fd9227e69c740f31e87a2ca7c2d31affef53fb9 (commit)

Summary of changes:
 Makefile.PL             |    8 ++
 bin/export-to-mediawiki |  169 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 177 insertions(+), 0 deletions(-)
 create mode 100755 bin/export-to-mediawiki

- Log -----------------------------------------------------------------
commit 92714a46dbce2c18670d1bb88a1165edf6367056
Author: Thomas Sibley <trs at bestpractical.com>
Date:   Tue Nov 2 13:23:29 2010 -0400

    Add a script to export to the MediaWiki XML format

diff --git a/Makefile.PL b/Makefile.PL
index 5059089..64a9be3 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -10,4 +10,12 @@ requires('List::Compare');
 requires('Regexp::Common');
 requires('Scalar::Util');
 recommends('Text::KwikiFormatish');
+
+feature 'Export to MediaWiki script' =>
+    -default => 0,
+    requires('XML::Simple'),
+    requires('HTML::WikiConverter'),
+    requires('HTML::WikiConverter::MediaWiki'),
+    ;
+
 WriteAll;
diff --git a/bin/export-to-mediawiki b/bin/export-to-mediawiki
new file mode 100755
index 0000000..f9775b7
--- /dev/null
+++ b/bin/export-to-mediawiki
@@ -0,0 +1,169 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+
+use Jifty;
+BEGIN { Jifty->new }
+
+use Wifty::CurrentUser;
+use Wifty::Model::PageCollection;
+use Encode qw(encode_utf8 decode_utf8);
+use XML::Simple;
+use HTML::WikiConverter;
+use HTML::WikiConverter::MediaWiki;
+
+use Getopt::Long;
+my ($HELP, $FILE, $SHALLOW);
+my $THRESH = 1.5; #Mb
+
+GetOptions(
+    'help'    => \$HELP,
+    'shallow' => \$SHALLOW,
+    'chunk=i' => \$THRESH,
+    'file=s'  => \$FILE,
+);
+
+if ($HELP or not defined $FILE) {
+    print <<"    EOT";
+Usage: $0 --file=export [--shallow] [--chunk=1.5]
+
+  --file=name   Output filename prefix (required)
+
+  --shallow     Makes a shallow export containing only the latest
+                revision of each page.
+
+  --chunk=#     Chunks the export into files of # megabytes.
+                Defaults to 1.5.
+
+    EOT
+    exit;
+}
+
+# Fake up a request and response since actions expect them
+Jifty->web->request( Jifty::Request->new );
+Jifty->web->response( Jifty::Response->new );
+
+my $siteinfo = xml(
+    siteinfo => {
+        sitename    => Jifty->config->app('WikiName'),
+        base        => Jifty->config->framework('Web')->{'BaseURL'},
+        generator   => join(' ', Jifty->config->framework('ApplicationName'),
+                                 Jifty->config->framework('Database')->{'Version'}),
+        case        => 'first-letter',
+    },
+    1   # indent one level
+);
+
+my $converter = HTML::WikiConverter->new(
+    dialect         => 'MediaWiki',
+    base_uri        => Jifty->config->framework('Web')->{'BaseURL'},
+    wiki_uri        => ['./', '/view/'],
+    pad_headings    => 1,
+    preserve_italic => 1,
+    preserve_bold   => 1,
+);
+
+# State variables
+my $LASTID  = undef;
+my $LASTREV = undef;
+my $CHUNK   = 1;
+my $FH;
+
+CHUNK: while ($CHUNK > 0) {
+    open $FH, '>', "$FILE-$CHUNK.xml"
+        or die "Unable to open file '$FILE-$CHUNK.xml' for writing: $!\n";
+
+    binmode $FH, ':encoding(utf8)';
+
+    # Header and site info
+    print $FH "<mediawiki xml:lang='en'>\n$siteinfo";
+
+    # Pages
+    my $super = Wifty::CurrentUser->superuser;
+    my $pages = Wifty::Model::PageCollection->new( current_user => $super );
+    $pages->order_by( column => 'id', order => 'asc' );
+
+    if (defined $LASTID) {
+        $pages->limit( column => 'id', operator => '>', value => $LASTID );
+    } else {
+        $pages->unlimit;
+    }
+
+    while (my $p = $pages->next) {
+        print $FH "  <page>\n";
+
+        # Page info
+        print $FH xml(undef, { title => $p->name }, 1);
+
+        # Revisions
+        my $revisions = $p->revisions;
+        $revisions->limit( column => 'id', operator => '<', value => $LASTREV )
+            if defined $LASTREV;
+        $revisions->order_by( column => 'id', order => 'desc' );
+        $revisions->set_page_info( per_page => 1, current_page => 1 )
+            if $SHALLOW;
+
+        while (my $r = $revisions->next) {
+            my $creator = $r->created_by;
+            my $created = $r->created;
+            $created =~ s/ /T/;
+            $created =~ s/$/Z/;
+
+            # Do the (kwiki|markdown) -> HTML -> mediawiki conversion here
+            my $wiki = '';
+            eval {
+                if (defined $r->content and length $r->content) {
+                    my $html = $r->viewer->form_field('content')->wiki_content;
+                    $wiki = decode_utf8($converter->html2wiki(encode_utf8($html)))
+                        if defined $html and length $html; # html2wiki chokes when there's no html
+                }
+            };
+            if ($@) {
+                # Don't die, just warn and move on
+                warn "Error converting " . $p->name . ": $@\n";
+                $wiki = "Error converting page: $@";
+            }
+
+            print $FH xml(
+                revision => {
+                    text        => $wiki,
+                    timestamp   => $created,
+                    contributor => {
+                        username => $creator->friendly_name,
+                        ($r->ip ? (ip => $r->ip) : ()),
+                    },
+                },
+                2
+            );
+
+            $LASTREV = $r->id;
+
+            if (tell $FH > $THRESH*1024*1024) {
+                warn "Starting a new chunk after ", tell $FH, " bytes\n";
+                next CHUNK;
+            }
+        }
+        print $FH "  </page>\n";
+        $LASTID  = $p->id;
+        $LASTREV = undef;
+    }
+
+    # That's all, captain
+    $CHUNK = -1;
+}
+continue {
+    print $FH "</mediawiki>\n";
+    close $FH;
+    $CHUNK++;
+}
+
+# Returns our XML with a root name and no attributes, optionally indented
+sub xml {
+    my ($root, $data, $indent) = @_;
+    my $space = " " x (($indent || 0) * 2);
+
+    my $xml = XMLout($data, RootName => $root, NoAttr => 1, SuppressEmpty => undef);
+    $xml =~ s/^(\s*<)/$space$1/gm;
+    return $xml;
+}
+

-----------------------------------------------------------------------



More information about the Bps-public-commit mailing list