[Rt-commit] rt branch, 4.0/utf8-reckoning, repushed

Wed Aug 20 16:41:17 EDT 2014

The branch 4.0/utf8-reckoning was deleted and repushed:
       was 48c019ea97b8b617b0dcfcde4efe2b1235d1a697
       now 9eb1178b7cd31072fbaac0288944e040192c8d69

 1:  f2324d1 =  1:  f2324d1 Re-indent _EncodeLOB and _DecodeLOB
 2:  1827f6c =  2:  1827f6c Respect the database Content-Type header in decoding textual parts
 3:  8dbaf2c =  3:  8dbaf2c Stop needlessly frobbing utf8 internals
 4:  820c7d9 =  4:  820c7d9 Decoding content, and returning characters, is incorrect
 5:  38920a2 =  5:  38920a2 Stop assuming the data in the database is utf8
 6:  f2b0db6 =  6:  f2b0db6 Modernize and condense t/mail/sendmail.t
 7:  1acfacb =  7:  1acfacb Always log bytes, not characters
 8:  e9e7e96 =  8:  e9e7e96 The alluded-to deficiency is not a concern in perl ≥ 5.8.3
 9:  0c028fc =  9:  0c028fc Ensure all MIME::Entity bodies are UTF-8 encoded bytes
10:  c84d3ab ! 10:  3543a44 Ensure all MIME::Entity headers are UTF-8 encoded bytes
    @@ -185,6 +185,15 @@
     --- a/lib/RT/I18N.pm
     +++ b/lib/RT/I18N.pm
     @@
    +     );
    + 
    +     # If this is a textual entity, we'd need to preserve its original encoding
    +-    $head->replace( "X-RT-Original-Encoding" => $charset )
    ++    $head->replace( "X-RT-Original-Encoding" => Encode::encode( "UTF-8", $charset ) )
    + 	if $head->mime_attr('content-type.charset') or IsTextualContentType($head->mime_type);
    + 
    +     return unless IsTextualContentType($head->mime_type);
    +@@
      
              $RT::Logger->debug( "Converting '$charset' to '$enc' for "
                    . $head->mime_type . " - "
11:  afbf935 = 11:  206e688 Make RT::Action::SendEmail->SetHeader take characters, not bytes
12:  78d8e6d = 12:  aa3cc45 Add a utility method to check that an input is bytes
13:  160013b = 13:  19321eb Verify that MIME::Entity bodies are bytes, and remove _utf8_off call
14:  d747abd = 14:  5a0cfda Verify that MIME::Entity headers are bytes, and remove _utf8_off call
15:  5e2575c = 15:  b865183 Standardize on the stricter Encode::encode("UTF-8", ...) everywhere
16:  dda36a5 = 16:  df88c57 Remove "use utf8" from RT::I18N::fr, making NBSP explicit
17:  13584ff = 17:  a6e3fb5 Remove remaining cases of "use utf8"
18:  e93b44e = 18:  abe35cd Dashboard: decode bytes in query parameters into characters
19:  ecb36fb = 19:  774a740 Tests: WWW::Mechanize correctly returns characters now
20:  c344f5a = 20:  69dae45 _utf8_on in EncodeToMIME is needless and incorrect; remove it
21:  5653fcd = 21:  df961df Move comment from PreprocessTimeUpdates to DecodeArgs, where it belongs
22:  95a67c1 = 22:  ed57bcd Always decode data in %ARGS as UTF-8 in DecodeArgs
23:  cf70d75 = 23:  aec38ea Add RT::Util::assert_bytes checks to _EncodeLOB and _DecodeLOB
24:  a64df89 = 24:  44f43cf Update POD and comments to be clearer about characters vs bytes
25:  cc4d303 = 25:  a502084 Remove an unreachable line
26:  8c1de22 = 26:  ecb655e TSV need not explicitly encode as UTF-8; all output is UTF-8 encoded
27:  6a729e0 = 27:  3dbae7a Move "use Encode" calls to one central location
28:  97dce33 = 28:  b26af9b Consistent character/byte hygene allows RT to run with DBD::Pg 3.3.0
29:  abdf886 = 29:  83649c6 Note that HTTP output still incorrectly relies on is_utf8
30:  1dcb8bd = 30:  db93e66 Comment the logic for database decode_utf8/is_utf8 checking
31:  a078b17 ! 31:  89d45e9 Encode characters on their way out of tests
    @@ -33,3 +33,16 @@
          my $self = shift;
          my %args = @_;
     
    +diff --git a/t/mail/sendmail.t b/t/mail/sendmail.t
    +--- a/t/mail/sendmail.t
    ++++ b/t/mail/sendmail.t
    +@@
    +     my $encoded_subject = $mail[0]->head->get("Subject");
    +     chomp $encoded_subject;
    +     my $subject = Encode::decode('MIME-Header',$encoded_subject);
    +-    like($subject, qr/Niv\x{e5}er/, Encode::encode("UTF-8", "The subject matches the word - $subject"));
    ++    like($subject, qr/Niv\x{e5}er/, "The subject matches the word - $subject");
    + }
    + 
    + {
    +
32:  48c019e = 32:  9eb1178 Stop hiding "Wide character in..." warnings