[Rt-commit] rt branch, master, updated. rt-4.4.1-116-g9c9cede

Shawn Moore shawn at bestpractical.com
Tue Jul 26 13:43:42 EDT 2016


The branch, master has been updated
       via  9c9cedebbb63e05ffbb226ff1461f09f6b3a83cc (commit)
       via  01f90b627e6b9374e58ac9181bc335bd4d0ef8c3 (commit)
       via  fdf53ec63fc76310b3c99e55db6aa04158521608 (commit)
       via  b7cdb92a88913d11cc6ffb246f07b66b017f6ce3 (commit)
       via  6e68c08c38d21b33a375954aa949af64c1086cf4 (commit)
       via  caddbd6587925d973ba5aa81d7a2e393b6bf62b2 (commit)
       via  4d50869493b80efad910b49108f2b02a27bbabe9 (commit)
       via  f2c4c6e6a6598b34f5c9b8f05e5fd8f7395fcf68 (commit)
       via  a1eeb10a73f2d03822052cef5bc998be0b0db833 (commit)
       via  7eb35c70a5bf93c9effdab64bb290e755772b4e2 (commit)
       via  b063b228ce03d44542436993182631a590276b65 (commit)
       via  080c4ac4007d2290cb2f31d182cc2fe4b9d1de89 (commit)
       via  4afbfd566d4b0ba215b8cd016ea63833b4a7562c (commit)
       via  7a83e47fa2ced219e602de3de54b3a8344d2eb63 (commit)
       via  d4dbf02d93a3620e7f58ce6411f21f1615174896 (commit)
       via  52af5d6f3bd7d282302a1f7012ade22ff5568557 (commit)
       via  a6e7e054dcb3ed211693339d5e9ba150683ed79a (commit)
      from  b08c6cda9b4b544385fb3ee9368016574e55a811 (commit)

Summary of changes:
 Makefile.in                      |   6 +-
 docs/UPGRADING-4.6               |  24 ++
 etc/RT_Config.pm.in              |  35 ++-
 etc/cpanfile                     | 212 +++++++++++++++++
 lib/RT/I18N.pm                   |  41 +---
 lib/RT/Interface/Email.pm        |   6 -
 lib/RT/Interface/Web.pm          | 126 +---------
 lib/RT/Interface/Web/Scrubber.pm | 230 ++++++++++++++++++
 sbin/rt-test-dependencies.in     | 492 +++++++++++++--------------------------
 t/api/i18n_guess.t               |   8 -
 t/web/scrub.t                    |  13 +-
 11 files changed, 686 insertions(+), 507 deletions(-)
 create mode 100644 docs/UPGRADING-4.6
 create mode 100644 etc/cpanfile
 create mode 100644 lib/RT/Interface/Web/Scrubber.pm

- Log -----------------------------------------------------------------
commit 01f90b627e6b9374e58ac9181bc335bd4d0ef8c3
Author: Alex Vandiver <alex at chmrr.net>
Date:   Thu Jul 21 22:52:34 2016 -0700

    Make HTML::Gumbo required, and split scrubbing into its own class
    
    HTML::Gumbo was previously an optional runtime dependency, which
    allowed tables to not be scrubbed because it guaranteed that the
    user-provided content couldn't "escape" the confines of the
    transaction history.
    
    Make HTML::Gumbo a required dependency, allowing tables in HTML for
    all installations of RT.  In doing so, factor out the HTML-scrubbing
    code into its own class, for compartmentalization.

diff --git a/docs/UPGRADING-4.6 b/docs/UPGRADING-4.6
new file mode 100644
index 0000000..6755ffa
--- /dev/null
+++ b/docs/UPGRADING-4.6
@@ -0,0 +1,24 @@
+=head1 UPGRADING FROM RT 4.4.0 and greater
+
+The 4.6 release is a major upgrade and as such there are more changes
+than in a minor bugfix release (e.g., 4.4.0 to 4.4.1) and some of these
+changes are backward-incompatible. The following lists some of the notable
+changes, especially those that might require you to change a configuration
+option or other setting due to a change in RT. Read this section carefully
+before you upgrade and look for changes to features you currently use.
+
+See F<devel/docs/UPGRADING-4.6> for internals changes relevant to
+extension writers.
+
+=over
+
+=item *
+
+The variables which alter the set of HTML elements allowed in HTML
+scrubbing have moved; they have been renamed, and are now found under
+L<RT::Interface::Web::Scrubber>.
+
+=back
+
+=cut
+
diff --git a/etc/RT_Config.pm.in b/etc/RT_Config.pm.in
index 0b5d1c7..bf9f01e 100644
--- a/etc/RT_Config.pm.in
+++ b/etc/RT_Config.pm.in
@@ -2033,9 +2033,6 @@ multiple vectors for XSS and phishing attacks.  If
 L</$TrustHTMLAttachments> is enabled, the original HTML is available for
 viewing via the "Download" link.
 
-If the optional L<HTML::Gumbo> dependency is installed, RT will leverage
-this to allow a broader set of HTML through, including tables.
-
 =cut
 
 Set($PreferRichText, 1);
diff --git a/etc/cpanfile b/etc/cpanfile
index 6425c35..59e4b3c 100644
--- a/etc/cpanfile
+++ b/etc/cpanfile
@@ -41,6 +41,7 @@ requires 'HTML::Entities';
 requires 'HTML::FormatExternal';
 requires 'HTML::FormatText::WithLinks', '>= 0.14';
 requires 'HTML::FormatText::WithLinks::AndTables', '>= 0.06';
+requires 'HTML::Gumbo';
 requires 'HTML::Mason', '>= 1.43';
 requires 'HTML::Mason::PSGIHandler', '>= 0.52';
 requires 'HTML::Quoted';
diff --git a/lib/RT/Interface/Web.pm b/lib/RT/Interface/Web.pm
index e3cf905..33e810d 100644
--- a/lib/RT/Interface/Web.pm
+++ b/lib/RT/Interface/Web.pm
@@ -60,6 +60,7 @@ RT::Interface::Web
 
 use strict;
 use warnings;
+use 5.010;
 
 package RT::Interface::Web;
 
@@ -68,6 +69,7 @@ use RT::CustomRoles;
 use URI qw();
 use RT::Interface::Web::Menu;
 use RT::Interface::Web::Session;
+use RT::Interface::Web::Scrubber;
 use Digest::MD5 ();
 use List::MoreUtils qw();
 use JSON qw();
@@ -4261,129 +4263,9 @@ Removes unsafe and undesired HTML from the passed content
 
 =cut
 
-my $SCRUBBER;
 sub ScrubHTML {
-    my $Content = shift;
-    $SCRUBBER = _NewScrubber() unless $SCRUBBER;
-
-    $Content = '' if !defined($Content);
-    return $SCRUBBER->scrub($Content);
-}
-
-=head2 _NewScrubber
-
-Returns a new L<HTML::Scrubber> object.
-
-If you need to be more lax about what HTML tags and attributes are allowed,
-create C</opt/rt4/local/lib/RT/Interface/Web_Local.pm> with something like the
-following:
-
-    package HTML::Mason::Commands;
-    # Let tables through
-    push @SCRUBBER_ALLOWED_TAGS, qw(TABLE THEAD TBODY TFOOT TR TD TH);
-    1;
-
-=cut
-
-our @SCRUBBER_ALLOWED_TAGS = qw(
-    A B U P BR I HR BR SMALL EM FONT SPAN STRONG SUB SUP S DEL STRIKE H1 H2 H3 H4 H5
-    H6 DIV UL OL LI DL DT DD PRE BLOCKQUOTE BDO
-);
-
-our %SCRUBBER_ALLOWED_ATTRIBUTES = (
-    # Match http, https, ftp, mailto and relative urls
-    # XXX: we also scrub format strings with this module then allow simple config options
-    href   => qr{^(?:https?:|ftp:|mailto:|/|__Web(?:Path|HomePath|BaseURL|URL)__)}i,
-    face   => 1,
-    size   => 1,
-    color  => 1,
-    target => 1,
-    style  => qr{
-        ^(?:\s*
-            (?:(?:background-)?color: \s*
-                    (?:rgb\(\s* \d+, \s* \d+, \s* \d+ \s*\) |   # rgb(d,d,d)
-                       \#[a-f0-9]{3,6}                      |   # #fff or #ffffff
-                       [\w\-]+                                  # green, light-blue, etc.
-                       )                            |
-               text-align: \s* \w+                  |
-               font-size: \s* [\w.\-]+              |
-               font-family: \s* [\w\s"',.\-]+       |
-               font-weight: \s* [\w\-]+             |
-
-               border-style: \s* \w+                |
-               border-color: \s* [#\w]+             |
-               border-width: \s* [\s\w]+            |
-               padding: \s* [\s\w]+                 |
-               margin: \s* [\s\w]+                  |
-
-               # MS Office styles, which are probably fine.  If we don't, then any
-               # associated styles in the same attribute get stripped.
-               mso-[\w\-]+?: \s* [\w\s"',.\-]+
-            )\s* ;? \s*)
-         +$ # one or more of these allowed properties from here 'till sunset
-    }ix,
-    dir    => qr/^(rtl|ltr)$/i,
-    lang   => qr/^\w+(-\w+)?$/,
-
-    # timeworked per user attributes
-    'data-ticket-id'    => 1,
-    'data-ticket-class' => 1,
-);
-
-our %SCRUBBER_RULES = ();
-
-# If we're displaying images, let embedded ones through
-if (RT->Config->Get('ShowTransactionImages') or RT->Config->Get('ShowRemoteImages')) {
-    $SCRUBBER_RULES{'img'} = {
-        '*' => 0,
-        alt => 1,
-    };
-
-    my @src;
-    push @src, qr/^cid:/i
-        if RT->Config->Get('ShowTransactionImages');
-
-    push @src, $SCRUBBER_ALLOWED_ATTRIBUTES{'href'}
-        if RT->Config->Get('ShowRemoteImages');
-
-    $SCRUBBER_RULES{'img'}->{'src'} = join "|", @src;
-}
-
-sub _NewScrubber {
-    require HTML::Scrubber;
-    my $scrubber = HTML::Scrubber->new();
-
-    if (HTML::Gumbo->require) {
-        no warnings 'redefine';
-        my $orig = \&HTML::Scrubber::scrub;
-        *HTML::Scrubber::scrub = sub {
-            my $self = shift;
-
-            eval { $_[0] = HTML::Gumbo->new->parse( $_[0] ); chomp $_[0] };
-            warn "HTML::Gumbo pre-parse failed: $@" if $@;
-            return $orig->($self, @_);
-        };
-        push @SCRUBBER_ALLOWED_TAGS, qw/TABLE THEAD TBODY TFOOT TR TD TH/;
-        $SCRUBBER_ALLOWED_ATTRIBUTES{$_} = 1 for
-            qw/colspan rowspan align valign cellspacing cellpadding border width height/;
-    }
-
-    $scrubber->default(
-        0,
-        {
-            %SCRUBBER_ALLOWED_ATTRIBUTES,
-            '*' => 0, # require attributes be explicitly allowed
-        },
-    );
-    $scrubber->deny(qw[*]);
-    $scrubber->allow(@SCRUBBER_ALLOWED_TAGS);
-    $scrubber->rules(%SCRUBBER_RULES);
-
-    # Scrubbing comments is vital since IE conditional comments can contain
-    # arbitrary HTML and we'd pass it right on through.
-    $scrubber->comment(0);
-
-    return $scrubber;
+    state $scrubber = RT::Interface::Web::Scrubber->new;
+    return $scrubber->scrub(@_);
 }
 
 =head2 JSON
diff --git a/lib/RT/Interface/Web/Scrubber.pm b/lib/RT/Interface/Web/Scrubber.pm
new file mode 100644
index 0000000..fefffd4
--- /dev/null
+++ b/lib/RT/Interface/Web/Scrubber.pm
@@ -0,0 +1,230 @@
+# BEGIN BPS TAGGED BLOCK {{{
+#
+# COPYRIGHT:
+#
+# This software is Copyright (c) 1996-2016 Best Practical Solutions, LLC
+#                                          <sales at bestpractical.com>
+#
+# (Except where explicitly superseded by other copyright notices)
+#
+#
+# LICENSE:
+#
+# This work is made available to you under the terms of Version 2 of
+# the GNU General Public License. A copy of that license should have
+# been provided with this software, but in any event can be snarfed
+# from www.gnu.org.
+#
+# This work is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 or visit their web page on the internet at
+# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
+#
+#
+# CONTRIBUTION SUBMISSION POLICY:
+#
+# (The following paragraph is not intended to limit the rights granted
+# to you to modify and distribute this software under the terms of
+# the GNU General Public License and is only of importance to you if
+# you choose to contribute your changes and enhancements to the
+# community by submitting them to Best Practical Solutions, LLC.)
+#
+# By intentionally submitting any modifications, corrections or
+# derivatives to this work, or any other work intended for use with
+# Request Tracker, to Best Practical Solutions, LLC, you confirm that
+# you are the copyright holder for those contributions and you grant
+# Best Practical Solutions,  LLC a nonexclusive, worldwide, irrevocable,
+# royalty-free, perpetual, license to use, copy, create derivative
+# works based on those contributions, and sublicense and distribute
+# those contributions and any derivatives thereof.
+#
+# END BPS TAGGED BLOCK }}}
+package RT::Interface::Web::Scrubber;
+use strict;
+use warnings;
+use 5.010;
+use base qw/HTML::Scrubber/;
+
+use HTML::Gumbo;
+
+=head1 NAME
+
+RT::Interface::Web::Scrubber
+
+=head1 DESCRIPTION
+
+This is a subclass of L<HTML::Scrubber> which automatically configures
+itself with a sane and safe default set of rules.  Additionally, it
+ensures that the input is balanced HTML by use of the L<HTML::Gumbo>
+on the input to L</scrub>.
+
+=head1 VARIABLES
+
+These variables can be altered by creating a C<Scrubber_Local.pm>
+file, containing something of the form:
+
+    package RT::Interface::Web::Scrubber;
+
+    # Allow the "title" attribute
+    $ALLOWED_ATTRIBUTES{title} = 1;
+
+=over
+
+=item C<@ALLOWED_TAGS>
+
+Passed to L<HTML::Scrubber/allow>.
+
+=item C<%ALLOWED_ATTRIBUTES>
+
+Passed into L<HTML::Scrubber/default>.
+
+=item C<%RULES>
+
+Passed to L<HTML::Scrubber/rules>.
+
+=back
+
+=cut
+
+our @ALLOWED_TAGS = qw(
+    A B U P BR I HR BR SMALL EM FONT SPAN STRONG SUB SUP S DEL STRIKE H1 H2 H3 H4 H5
+    H6 DIV UL OL LI DL DT DD PRE BLOCKQUOTE BDO TABLE THEAD TBODY TFOOT TR TD TH
+);
+
+our %ALLOWED_ATTRIBUTES = (
+    # Match http, https, ftp, mailto and relative urls
+    # XXX: we also scrub format strings with this module then allow simple config options
+    href   => qr{^(?:https?:|ftp:|mailto:|/|__Web(?:Path|HomePath|BaseURL|URL)__)}i,
+    face   => 1,
+    size   => 1,
+    color  => 1,
+    target => 1,
+    style  => qr{
+        ^(?:\s*
+            (?:(?:background-)?color: \s*
+                    (?:rgb\(\s* \d+, \s* \d+, \s* \d+ \s*\) |   # rgb(d,d,d)
+                       \#[a-f0-9]{3,6}                      |   # #fff or #ffffff
+                       [\w\-]+                                  # green, light-blue, etc.
+                       )                            |
+               text-align: \s* \w+                  |
+               font-size: \s* [\w.\-]+              |
+               font-family: \s* [\w\s"',.\-]+       |
+               font-weight: \s* [\w\-]+             |
+
+               border-style: \s* \w+                |
+               border-color: \s* [#\w]+             |
+               border-width: \s* [\s\w]+            |
+               padding: \s* [\s\w]+                 |
+               margin: \s* [\s\w]+                  |
+
+               # MS Office styles, which are probably fine.  If we don't, then any
+               # associated styles in the same attribute get stripped.
+               mso-[\w\-]+?: \s* [\w\s"',.\-]+
+            )\s* ;? \s*)
+         +$ # one or more of these allowed properties from here 'till sunset
+    }ix,
+    dir    => qr/^(rtl|ltr)$/i,
+    lang   => qr/^\w+(-\w+)?$/,
+
+    colspan     => 1,
+    rowspan     => 1,
+    align       => 1,
+    valign      => 1,
+    cellspacing => 1,
+    cellpadding => 1,
+    border      => 1,
+    width       => 1,
+    height      => 1,
+
+    # timeworked per user attributes
+    'data-ticket-id'    => 1,
+    'data-ticket-class' => 1,
+);
+
+our %RULES = ();
+
+=head1 METHODS
+
+=head2 new
+
+Returns a new L<RT::Interface::Web::Scrubber> object, configured with
+the above globals.  Takes no arguments.
+
+=cut
+
+sub new {
+    my $class = shift;
+    my $self = $class->SUPER::new(@_);
+
+    $self->default(
+        0,
+        {
+            %ALLOWED_ATTRIBUTES,
+            '*' => 0, # require attributes be explicitly allowed
+        },
+    );
+    $self->deny(qw[*]);
+    $self->allow(@ALLOWED_TAGS);
+
+    # If we're displaying images, let embedded ones through
+    if (RT->Config->Get('ShowTransactionImages') or RT->Config->Get('ShowRemoteImages')) {
+        my @src;
+        push @src, qr/^cid:/i
+            if RT->Config->Get('ShowTransactionImages');
+
+        push @src, $ALLOWED_ATTRIBUTES{'href'}
+            if RT->Config->Get('ShowRemoteImages');
+
+        $RULES{'img'} ||= {
+            '*' => 0,
+            alt => 1,
+            src => join("|", @src),
+        };
+    }
+    $self->rules(%RULES);
+
+    # Scrubbing comments is vital since IE conditional comments can contain
+    # arbitrary HTML and we'd pass it right on through.
+    $self->comment(0);
+
+    return $self;
+}
+
+=head2 gumbo
+
+Returns a L<HTML::Gumbo> object.
+
+=cut
+
+sub gumbo {
+    my $self = shift;
+    return $self->{_gumbo} //= HTML::Gumbo->new;
+}
+
+=head2 scrub TEXT
+
+Takes a string of HTML, and returns it scrubbed, via L<HTML::Gumbo>
+then the rules.  This is a more limited interface than
+L<HTML::Scrubber/scrub>.
+
+=cut
+
+sub scrub {
+    my $self = shift;
+    my $Content = shift // '';
+
+    # First pass through HTML::Gumbo to balance the tags
+    eval { $Content = $self->gumbo->parse( $Content ); chomp $Content };
+    warn "HTML::Gumbo pre-parse failed: $@" if $@;
+
+    return $self->SUPER::scrub($Content);
+}
+
+RT::Base->_ImportOverlays();
+1;
diff --git a/t/web/scrub.t b/t/web/scrub.t
index 835f412..4137986 100644
--- a/t/web/scrub.t
+++ b/t/web/scrub.t
@@ -1,10 +1,14 @@
 use strict;
 use warnings;
 
-use RT::Test nodb => 1, tests => 6;
+use RT::Test nodb => 1, tests => undef;
 use RT::Interface::Web; # This gets us HTML::Mason::Commands
 use Test::LongString;
 
+sub scrub_html {
+    return HTML::Mason::Commands::ScrubHTML(shift);
+}
+
 {
     my $html = 'This is a test of <span style="color: rgb(255, 0, 0); ">color</span> and <span style="font-size: 18px; "><span style="font-family: Georgia, serif; ">font</span></span> and <em><u><strike><strong>boldness</strong></strike></u></em>.';
     is_string(scrub_html($html), $html, "CKEditor produced HTML sails through");
@@ -39,7 +43,10 @@ use Test::LongString;
     is_string(scrub_html($html), $expected, "outlook html");
 }
 
-sub scrub_html {
-    return HTML::Mason::Commands::ScrubHTML(shift);
+{
+    my $html = q[</td></tr></table><table><td>Some content here</table>An unclosed <b>bold<b> tag.];
+    my $expected = q[<table><tbody><tr><td>Some content here</td></tr></tbody></table>An unclosed <b>bold<b> tag.</b></b>];
+    is_string(scrub_html($html), $expected, "Unbalanced tags get balanced");
 }
 
+done_testing;

commit 9c9cedebbb63e05ffbb226ff1461f09f6b3a83cc
Merge: b08c6cd 01f90b6
Author: Shawn M Moore <shawn at bestpractical.com>
Date:   Tue Jul 26 17:43:38 2016 +0000

    Merge branch '4.6/cpanfile'


-----------------------------------------------------------------------


More information about the rt-commit mailing list