Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9861fc4d authored by cketti's avatar cketti
Browse files

Take special care when decoding encoded words with charset ISO-2022-JP

parent 94548c11
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@ dependencies {
    testImplementation "com.google.truth:truth:${versions.truth}"
    testImplementation "org.mockito:mockito-core:${versions.mockito}"
    testImplementation "org.mockito.kotlin:mockito-kotlin:${versions.mockitoKotlin}"
    testImplementation "com.ibm.icu:icu4j-charset:70.1"
}

android {
+9 −3
Original line number Diff line number Diff line
@@ -81,7 +81,7 @@ internal object DecoderUtil {
            } else if (!CharsetUtil.isWhitespace(sep)) {
                output.append(charsetDecode(previousWord))
                output.append(sep)
            } else if (previousWord.isTypeEqualTo(word)) {
            } else if (previousWord.canBeCombinedWith(word)) {
                word.data = previousWord.data + word.data
            } else {
                output.append(charsetDecode(previousWord))
@@ -179,13 +179,19 @@ internal object DecoderUtil {
        return Buffer().write(this).write(second).readByteString()
    }

    private val ASCII_ESCAPE_SEQUENCE = byteArrayOf(0x1B, 0x28, 0x42)

    private class EncodedWord(
        val charset: String,
        val encoding: Encoding,
        var data: ByteString
    ) {
        fun isTypeEqualTo(other: EncodedWord): Boolean {
            return encoding == other.encoding && charset == other.charset
        fun canBeCombinedWith(other: EncodedWord): Boolean {
            return encoding == other.encoding && charset == other.charset && !isAsciiEscapeSequence()
        }

        private fun isAsciiEscapeSequence(): Boolean {
            return charset.startsWith("ISO-2022-JP", ignoreCase = true) && data.endsWith(ASCII_ESCAPE_SEQUENCE)
        }
    }

+10 −0
Original line number Diff line number Diff line
@@ -220,6 +220,16 @@ public class DecoderUtilTest {
        assertInputDecodesToExpected("=?utf-8*de?b?R3LDvMOfZQ==?=", "Grüße");
    }

    @Test
    public void decodeEncodedWords_withMultipleIso2022JpEncodedWordsProperlyEndingWithSwitchingToAscii() {
        // If we try to combine the base64-decoded data of both encoded words and only then perform the charset
        // decoding, we end up with an escape sequence switching to ASCII (end of first encoded word) followed by an
        // escape sequence switching to JIS X 0208:1983 (start of second encoded word). The decoder on Android reports
        // an error for this case, leading to a replacement character being inserted.
        // We use the ISO-2022-JP-TEST charset to get Android's behavior on the JVM. See TestCharsetProvider.
        assertInputDecodesToExpected("=?ISO-2022-JP-TEST?B?GyRCRnxLXDhsJEhGfEtcOGwkSEZ8S1w4bCROJUElJyVDGyhC?=\r\n" +
                " =?ISO-2022-JP-TEST?B?GyRCJS8bKEI=?=", "日本語と日本語と日本語のチェック");
    }

    private void assertInputDecodesToExpected(String input, String expected) {
        String decodedText = DecoderUtil.decodeEncodedWords(input, null);
+28 −0
Original line number Diff line number Diff line
package com.fsck.k9.mail.internet

import com.ibm.icu.charset.CharsetProviderICU
import java.nio.charset.Charset
import java.nio.charset.spi.CharsetProvider

/**
 * CharsetProvider that adds the "ISO-2022-JP-TEST" charset.
 *
 * The "ISO-2022-JP" decoder on the JVM is more lenient than the ICU4J decoder that is used on Android. For tests we
 * use the ICU4J implementation that is also used on Android.
 */
class TestCharsetProvider : CharsetProvider() {
    private val icuCharsetProvider = CharsetProviderICU()
    private val charset = icuCharsetProvider.charsetForName("ISO-2022-JP")

    override fun charsets(): Iterator<Charset> {
        return listOf(charset).iterator()
    }

    override fun charsetForName(charsetName: String?): Charset? {
        return if (charsetName?.equals("ISO-2022-JP-TEST", ignoreCase = true) == true) {
            charset
        } else {
            null
        }
    }
}
+1 −0
Original line number Diff line number Diff line
com.fsck.k9.mail.internet.TestCharsetProvider