[Rt-commit] rt branch, 4.0/utf8-reckoning, repushed

Tue Aug 19 18:55:56 EDT 2014

The branch 4.0/utf8-reckoning was deleted and repushed:
       was 2fe55fa5bee157244efd4352773da8f6ea81409d
       now 48c019ea97b8b617b0dcfcde4efe2b1235d1a697

 1:  f2324d1 =  1:  f2324d1 Re-indent _EncodeLOB and _DecodeLOB
 2:  1827f6c =  2:  1827f6c Respect the database Content-Type header in decoding textual parts
 3:  8dbaf2c =  3:  8dbaf2c Stop needlessly frobbing utf8 internals
 4:  820c7d9 =  4:  820c7d9 Decoding content, and returning characters, is incorrect
 5:  05ce431 !  5:  38920a2 Stop assuming the data in the database is utf8
    @@ -2,7 +2,7 @@
     
         Stop assuming the data in the database is utf8
         
    -    As noted in 1827f6c, not all content we currently call "texual" was
    +    As noted in 1827f6c, not all content we currently call "textual" was
         always treated as such.  When re-encoding, do not assume that the
         encoding in the database is UTF-8 -- rather, read the Content-Type
         header, and examine the charset stated there.  Convert from that to the
 6:  5998508 =  6:  f2b0db6 Modernize and condense t/mail/sendmail.t
 7:  4c5a64b =  7:  1acfacb Always log bytes, not characters
 8:  f298717 =  8:  e9e7e96 The alluded-to deficiency is not a concern in perl ≥ 5.8.3
 9:  63e5afb =  9:  0c028fc Ensure all MIME::Entity bodies are UTF-8 encoded bytes
10:  f4da5b5 ! 10:  c84d3ab Ensure all MIME::Entity headers are UTF-8 encoded bytes
    @@ -28,6 +28,15 @@
      
          return (@addresses);
     @@
    +     # ability to pass @_ to a 'post' routine.
    +     my ( $self, $MIMEObj ) = @_;
    + 
    +-    my $msgid = $MIMEObj->head->get('Message-ID');
    ++    my $msgid = Encode::decode( "UTF-8", $MIMEObj->head->get('Message-ID') );
    +     chomp $msgid;
    + 
    +     $self->ScripActionObj->{_Message_ID}++;
    +@@
      
          my $success = $msgid . " sent ";
          foreach (@EMAIL_RECIPIENT_HEADERS) {
    @@ -36,6 +45,15 @@
              $success .= " $_: " . $recipients if $recipients;
          }
      
    +@@
    +         $type = 'EmailRecord';
    +     }
    + 
    +-    my $msgid = $MIMEObj->head->get('Message-ID');
    ++    my $msgid = Encode::decode( "UTF-8", $MIMEObj->head->get('Message-ID') );
    +     chomp $msgid;
    + 
    +     my ( $id, $msg ) = $transaction->Create(
     @@
      
              # Have to get the list of addresses directly from the MIME header
    @@ -46,6 +64,15 @@
                  next unless $rcpt;
                  my $user_obj = RT::User->new(RT->SystemUser);
     @@
    +     # If there are no recipients, don't try to send the message.
    +     # If the transaction has content and has the header RT-Squelch-Replies-To
    + 
    +-    my $msgid = $self->TemplateObj->MIMEObj->head->get('Message-Id');
    ++    my $msgid = Encode::decode( "UTF-8", $self->TemplateObj->MIMEObj->head->get('Message-Id') );
    +     if ( my $attachment = $self->TransactionObj->Attachments->First ) {
    + 
    +         if ( $attachment->GetHeader('RT-DetectedAutoGenerated') ) {
    +@@
          my $head = $self->TemplateObj->MIMEObj->head;
      
          if ( lc($field) eq 'from' and RT->Config->Get('SMTPFrom') ) {
    @@ -87,7 +114,7 @@
      
          #Get the Message-ID
     -    my $MessageId = $Attachment->head->get( 'Message-ID', 0 );
    -+    my $MessageId = $Attachment->head->get( 'Message-ID' );
    ++    my $MessageId = Encode::decode( "UTF-8", $Attachment->head->get( 'Message-ID' ) );
          defined($MessageId) or $MessageId = '';
          chomp ($MessageId);
          $MessageId =~ s/^<(.*?)>$/$1/o;
    @@ -204,6 +231,15 @@
      
          my $entity = MIME::Entity->build(%entity_args);
     @@
    +         return 0;
    +     }
    + 
    +-    my $msgid = $args{'Entity'}->head->get('Message-ID') || '';
    ++    my $msgid = Encode::decode( "UTF-8", $args{'Entity'}->head->get('Message-ID') || '' );
    +     chomp $msgid;
    +     
    +     # If we don't have any recipients to send to, don't send a message;
    +@@
              require RT::Date;
              my $date = RT::Date->new( RT->SystemUser );
              $date->SetToNow;
    @@ -238,6 +274,15 @@
              foreach keys %{ $args{ExtraHeaders} };
      
          SetInReplyTo( Message => $mail, InReplyTo => $args{'InReplyTo'} );
    +@@
    +     );
    +     return 1 unless $args{'Sign'} || $args{'Encrypt'};
    + 
    +-    my $msgid = $args{'Entity'}->head->get('Message-ID') || '';
    ++    my $msgid = Encode::decode( "UTF-8", $args{'Entity'}->head->get('Message-ID') || '' );
    +     chomp $msgid;
    + 
    +     $RT::Logger->debug("$msgid Signing message") if $args{'Sign'};
     @@
          return
              grep $_ ne $current_address && !RT::EmailParser->IsRTAddress( $_ ),
    @@ -407,6 +452,15 @@
     --- a/lib/RT/Ticket.pm
     +++ b/lib/RT/Ticket.pm
     @@
    +     # internal Message-ID now, so all emails sent because of this
    +     # message have a common Message-ID
    +     my $org = RT->Config->Get('Organization');
    +-    my $msgid = $args{'MIMEObj'}->head->get('Message-ID');
    ++    my $msgid = Encode::decode( "UTF-8", $args{'MIMEObj'}->head->get('Message-ID') );
    +     unless (defined $msgid && $msgid =~ /<(rt-.*?-\d+-\d+)\.(\d+-0-0)\@\Q$org\E>/) {
    +         $args{'MIMEObj'}->head->set(
    +             'RT-Message-ID' => Encode::encode_utf8(
    +@@
          #Record the correspondence (write the transaction)
          my ( $Trans, $msg, $TransObj ) = $self->_NewTransaction(
                   Type => $args{'NoteType'},
11:  fe4f983 = 11:  afbf935 Make RT::Action::SendEmail->SetHeader take characters, not bytes
12:  9ba06a7 = 12:  78d8e6d Add a utility method to check that an input is bytes
13:  6165b61 = 13:  160013b Verify that MIME::Entity bodies are bytes, and remove _utf8_off call
14:  4586408 = 14:  d747abd Verify that MIME::Entity headers are bytes, and remove _utf8_off call
15:  4d222bd ! 15:  5e2575c Standardize on the stricter Encode::encode("UTF-8", ...) everywhere
    @@ -187,7 +187,7 @@
          }
      
     @@
    -     my $msgid = $args{'MIMEObj'}->head->get('Message-ID');
    +     my $msgid = Encode::decode( "UTF-8", $args{'MIMEObj'}->head->get('Message-ID') );
          unless (defined $msgid && $msgid =~ /<(rt-.*?-\d+-\d+)\.(\d+-0-0)\@\Q$org\E>/) {
              $args{'MIMEObj'}->head->set(
     -            'RT-Message-ID' => Encode::encode_utf8(
16:  8e9eab3 = 16:  dda36a5 Remove "use utf8" from RT::I18N::fr, making NBSP explicit
17:  5a1800d = 17:  13584ff Remove remaining cases of "use utf8"
18:  5769a74 = 18:  e93b44e Dashboard: decode bytes in query parameters into characters
19:  f808850 = 19:  ecb36fb Tests: WWW::Mechanize correctly returns characters now
20:  60e6f37 = 20:  c344f5a _utf8_on in EncodeToMIME is needless and incorrect; remove it
21:  5611b32 = 21:  5653fcd Move comment from PreprocessTimeUpdates to DecodeArgs, where it belongs
22:  098d9c0 = 22:  95a67c1 Always decode data in %ARGS as UTF-8 in DecodeArgs
23:  e6299de = 23:  cf70d75 Add RT::Util::assert_bytes checks to _EncodeLOB and _DecodeLOB
24:  616efe5 = 24:  a64df89 Update POD and comments to be clearer about characters vs bytes
25:  4556c79 = 25:  cc4d303 Remove an unreachable line
26:  695a5df = 26:  8c1de22 TSV need not explicitly encode as UTF-8; all output is UTF-8 encoded
27:  2a035ec = 27:  6a729e0 Move "use Encode" calls to one central location
28:  5664624 ! 28:  97dce33 Consistent character/byte hygene allows RT to run with DBD::Pg 3.3.0
    @@ -1,10 +1,25 @@
     Author: Alex Vandiver <alexmv at bestpractical.com>
     
         Consistent character/byte hygene allows RT to run with DBD::Pg 3.3.0
    +    
    +    This does require a version dump of the DBIx::SearchBuilder dependency
    +    for PostgreSQL installs, as DBIx::SearchBuilder previous to that version
    +    manually forced the "UTF8" off on all bound values before executing
    +    statements.  When the character/byte distinction has been intentionally
    +    made, manually (and unpredictably) applying an additional layer of UTF-8
    +    encoding is incorrect.
     
     diff --git a/sbin/rt-test-dependencies.in b/sbin/rt-test-dependencies.in
     --- a/sbin/rt-test-dependencies.in
     +++ b/sbin/rt-test-dependencies.in
    +@@
    + .
    + 
    + $deps{'POSTGRESQL'} = [ text_to_hash( << '.') ];
    ++DBIx::SearchBuilder 1.66
    + DBD::Pg 1.43
    + .
    + 
     @@
      
      my %AVOID = (
29:  6fbabf9 = 29:  abdf886 Note that HTTP output still incorrectly relies on is_utf8
30:  ecfacf1 = 30:  1dcb8bd Comment the logic for database decode_utf8/is_utf8 checking
31:  ec59563 = 31:  a078b17 Encode characters on their way out of tests
32:  2fe55fa = 32:  48c019e Stop hiding "Wide character in..." warnings