Take special care when decoding encoded words with charset ISO-2022-JP (9861fc4d) · Commits · e / os / Mail

mail/common/build.gradle

+1 −0

Original line number	Diff line number	Diff line
		@@ -19,6 +19,7 @@ dependencies {
		testImplementation "com.google.truth:truth:${versions.truth}"
		testImplementation "org.mockito:mockito-core:${versions.mockito}"
		testImplementation "org.mockito.kotlin:mockito-kotlin:${versions.mockitoKotlin}"
		testImplementation "com.ibm.icu:icu4j-charset:70.1"
		}

		android {

+9 −3

Original line number	Diff line number	Diff line
		@@ -81,7 +81,7 @@ internal object DecoderUtil {
		} else if (!CharsetUtil.isWhitespace(sep)) {
		output.append(charsetDecode(previousWord))
		output.append(sep)
		} else if (previousWord.isTypeEqualTo(word)) {
		} else if (previousWord.canBeCombinedWith(word)) {
		word.data = previousWord.data + word.data
		} else {
		output.append(charsetDecode(previousWord))
		@@ -179,13 +179,19 @@ internal object DecoderUtil {
		return Buffer().write(this).write(second).readByteString()
		}

		private val ASCII_ESCAPE_SEQUENCE = byteArrayOf(0x1B, 0x28, 0x42)

		private class EncodedWord(
		val charset: String,
		val encoding: Encoding,
		var data: ByteString
		) {
		fun isTypeEqualTo(other: EncodedWord): Boolean {
		return encoding == other.encoding && charset == other.charset
		fun canBeCombinedWith(other: EncodedWord): Boolean {
		return encoding == other.encoding && charset == other.charset && !isAsciiEscapeSequence()
		}

		private fun isAsciiEscapeSequence(): Boolean {
		return charset.startsWith("ISO-2022-JP", ignoreCase = true) && data.endsWith(ASCII_ESCAPE_SEQUENCE)
		}
		}

+10 −0

Original line number	Diff line number	Diff line
		@@ -220,6 +220,16 @@ public class DecoderUtilTest {
		assertInputDecodesToExpected("=?utf-8*de?b?R3LDvMOfZQ==?=", "Grüße");
		}

		@Test
		public void decodeEncodedWords_withMultipleIso2022JpEncodedWordsProperlyEndingWithSwitchingToAscii() {
		// If we try to combine the base64-decoded data of both encoded words and only then perform the charset
		// decoding, we end up with an escape sequence switching to ASCII (end of first encoded word) followed by an
		// escape sequence switching to JIS X 0208:1983 (start of second encoded word). The decoder on Android reports
		// an error for this case, leading to a replacement character being inserted.
		// We use the ISO-2022-JP-TEST charset to get Android's behavior on the JVM. See TestCharsetProvider.
		assertInputDecodesToExpected("=?ISO-2022-JP-TEST?B?GyRCRnxLXDhsJEhGfEtcOGwkSEZ8S1w4bCROJUElJyVDGyhC?=\r\n" +
		" =?ISO-2022-JP-TEST?B?GyRCJS8bKEI=?=", "日本語と日本語と日本語のチェック");
		}

		private void assertInputDecodesToExpected(String input, String expected) {
		String decodedText = DecoderUtil.decodeEncodedWords(input, null);

0 → 100644

+28 −0

Original line number	Diff line number	Diff line
		package com.fsck.k9.mail.internet

		import com.ibm.icu.charset.CharsetProviderICU
		import java.nio.charset.Charset
		import java.nio.charset.spi.CharsetProvider

		/**
		* CharsetProvider that adds the "ISO-2022-JP-TEST" charset.
		*
		* The "ISO-2022-JP" decoder on the JVM is more lenient than the ICU4J decoder that is used on Android. For tests we
		* use the ICU4J implementation that is also used on Android.
		*/
		class TestCharsetProvider : CharsetProvider() {
		private val icuCharsetProvider = CharsetProviderICU()
		private val charset = icuCharsetProvider.charsetForName("ISO-2022-JP")

		override fun charsets(): Iterator<Charset> {
		return listOf(charset).iterator()
		}

		override fun charsetForName(charsetName: String?): Charset? {
		return if (charsetName?.equals("ISO-2022-JP-TEST", ignoreCase = true) == true) {
		charset
		} else {
		null
		}
		}
		}

0 → 100644

+1 −0

Original line number	Diff line number	Diff line
		com.fsck.k9.mail.internet.TestCharsetProvider