Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Unverified Commit 26f6963e authored by cketti's avatar cketti Committed by GitHub
Browse files

Merge pull request #3134 from k9mail/fix_encoded_word_decoding

Split B-/Q-decoding and charset decoding into two different phases
parents a36254db 4bdf64e9
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ repositories {
dependencies {
    compile 'org.apache.james:apache-mime4j-core:0.8.1'
    compile 'org.apache.james:apache-mime4j-dom:0.8.1'
    compile "com.squareup.okio:okio:${okioVersion}"
    compile 'commons-io:commons-io:2.4'
    compile 'com.jcraft:jzlib:1.0.7'
    compile 'com.beetstra.jutf7:jutf7:1.0.0'
@@ -27,7 +28,6 @@ dependencies {
    androidTestCompile 'com.madgag.spongycastle:pg:1.51.0.0'

    testCompile "org.jetbrains.kotlin:kotlin-stdlib-jre7:${kotlinVersion}"
    testCompile "com.squareup.okio:okio:${okioVersion}"
    testCompile "org.robolectric:robolectric:${robolectricVersion}"
    testCompile "junit:junit:${junitVersion}"
    testCompile "com.google.truth:truth:${truthVersion}"
+59 −75
Original line number Diff line number Diff line

package com.fsck.k9.mail.internet;

import com.fsck.k9.mail.Message;
import com.fsck.k9.mail.MessagingException;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;

import org.apache.james.mime4j.codec.Base64InputStream;
import com.fsck.k9.mail.Message;
import com.fsck.k9.mail.MessagingException;
import okio.Buffer;
import okio.ByteString;
import okio.Okio;
import org.apache.james.mime4j.codec.QuotedPrintableInputStream;
import org.apache.james.mime4j.util.CharsetUtil;
import timber.log.Timber;
@@ -21,65 +25,6 @@ import timber.log.Timber;
 * it has to be determined with the sender address, the mailer and so on.
 */
class DecoderUtil {

    private static class EncodedWord {
        private String charset;
        private String encoding;
        private String encodedText;
    }

    /**
     * Decodes an encoded word encoded with the 'B' encoding (described in
     * RFC 2047) found in a header field body.
     *
     * @param encodedWord the encoded word to decode.
     * @param charset the Java charset to use.
     * @return the decoded string.
     */
    private static String decodeB(String encodedWord, String charset) {
        byte[] bytes = encodedWord.getBytes(Charset.forName("US-ASCII"));

        Base64InputStream is = new Base64InputStream(new ByteArrayInputStream(bytes));
        try {
            return CharsetSupport.readToString(is, charset);
        } catch (IOException e) {
            return null;
        }
    }

    /**
     * Decodes an encoded word encoded with the 'Q' encoding (described in
     * RFC 2047) found in a header field body.
     *
     * @param encodedWord the encoded word to decode.
     * @param charset the Java charset to use.
     * @return the decoded string.
     */
    static String decodeQ(String encodedWord, String charset) {

        /*
         * Replace _ with =20
         */
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < encodedWord.length(); i++) {
            char c = encodedWord.charAt(i);
            if (c == '_') {
                sb.append("=20");
            } else {
                sb.append(c);
            }
        }

        byte[] bytes = sb.toString().getBytes(Charset.forName("US-ASCII"));

        QuotedPrintableInputStream is = new QuotedPrintableInputStream(new ByteArrayInputStream(bytes));
        try {
            return CharsetSupport.readToString(is, charset);
        } catch (IOException e) {
            return null;
        }
    }

    /**
     * Decodes a string containing encoded words as defined by RFC 2047.
     * Encoded words in have the form
@@ -145,18 +90,18 @@ class DecoderUtil {
                }
            } else {
                if (word == null) {
                    sb.append(decodeEncodedWord(previousWord));
                    sb.append(charsetDecode(previousWord));
                    sb.append(sep);
                    sb.append(body.substring(begin, end));
                } else {
                    if (!CharsetUtil.isWhitespace(sep)) {
                        sb.append(decodeEncodedWord(previousWord));
                        sb.append(charsetDecode(previousWord));
                        sb.append(sep);
                    } else if (previousWord.encoding.equals(word.encoding) &&
                            previousWord.charset.equals(word.charset)) {
                        word.encodedText = previousWord.encodedText + word.encodedText;
                        word.data = concat(previousWord.data, word.data);
                    } else {
                        sb.append(decodeEncodedWord(previousWord));
                        sb.append(charsetDecode(previousWord));
                    }
                }
            }
@@ -170,19 +115,17 @@ class DecoderUtil {
            int previousEnd) {

        if (previousWord != null) {
            sb.append(decodeEncodedWord(previousWord));
            sb.append(charsetDecode(previousWord));
        }

        sb.append(body.substring(previousEnd));
    }

    private static String decodeEncodedWord(EncodedWord word) {
        if (word.encoding.equals("Q")) {
            return decodeQ(word.encodedText, word.charset);
        } else if (word.encoding.equals("B")) {
            return DecoderUtil.decodeB(word.encodedText, word.charset);
        } else {
            Timber.w("Warning: Unknown encoding '%s'", word.encoding);
    private static String charsetDecode(EncodedWord word) {
        try {
            InputStream inputStream = new Buffer().write(word.data).inputStream();
            return CharsetSupport.readToString(inputStream, word.charset);
        } catch (IOException e) {
            return null;
        }
    }
@@ -216,13 +159,54 @@ class DecoderUtil {
        encodedWord.charset = charset;
        if (encoding.equalsIgnoreCase("Q")) {
            encodedWord.encoding = "Q";
            encodedWord.data = decodeQ(encodedText);
        } else if (encoding.equalsIgnoreCase("B")) {
            encodedWord.encoding = "B";
            encodedWord.data = decodeB(encodedText);
        } else {
            Timber.w("Warning: Unknown encoding in encoded word '%s'", body.substring(begin, end));
            return null;
        }
        encodedWord.encodedText = encodedText;
        return encodedWord;
    }

    private static ByteString decodeQ(String encodedWord) {
        /*
         * Replace _ with =20
         */
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < encodedWord.length(); i++) {
            char c = encodedWord.charAt(i);
            if (c == '_') {
                sb.append("=20");
            } else {
                sb.append(c);
            }
        }

        byte[] bytes = sb.toString().getBytes(Charset.forName("US-ASCII"));

        QuotedPrintableInputStream is = new QuotedPrintableInputStream(new ByteArrayInputStream(bytes));
        try {
            return Okio.buffer(Okio.source(is)).readByteString();
        } catch (IOException e) {
            return null;
        }
    }

    private static ByteString decodeB(String encodedText) {
        ByteString decoded = ByteString.decodeBase64(encodedText);
        return decoded == null ? ByteString.EMPTY : decoded;
    }

    private static ByteString concat(ByteString first, ByteString second) {
        return new Buffer().write(first).write(second).readByteString();
    }


    private static class EncodedWord {
        private String charset;
        private String encoding;
        private ByteString data;
    }
}
+7 −1
Original line number Diff line number Diff line
@@ -119,7 +119,7 @@ public class DecoderUtilTest {

    @Test
    public void decodeEncodedWords_withInvalidBase64String_returnsEmptyString() {
        assertInputDecodesToExpected("=?us-ascii?b?abc?=", "");
        assertInputDecodesToExpected("=?us-ascii?b?ab#?=", "");
    }

    @Test
@@ -192,6 +192,12 @@ public class DecoderUtilTest {
        assertInputDecodesToExpected("=?us-ascii?Q?oh_no_?= =?utf-8?Q?=F0=9F=92=A9?=", "oh no 💩");
    }

    @Test
    public void decodeEncodedWords_withTwoCompleteEncodedWords_shouldProvideBoth() {
        assertInputDecodesToExpected("=?UTF-8?B?W+aWsOioguWWrl0g6aGn5a6iOiB4eHhAeHh4LmNvbSDmnInmlrDoqILllq46ICMyMDE4MA==?= " +
                "=?UTF-8?B?MTE4MTIzNDU2Nzg=?=", "[新訂單] 顧客: xxx@xxx.com 有新訂單: #2018011812345678");
    }

    @Test
    public void decodeEncodedWords_withRFC2047examples_decodesCorrectly() {
        assertInputDecodesToExpected("(=?ISO-8859-1?Q?a?=)", "(a)");