Merge pull request #3134 from k9mail/fix_encoded_word_decoding (26f6963e) · Commits · e / os / Mail

k9mail-library/build.gradle

+1 −1

Original line number	Diff line number	Diff line
		@@ -17,6 +17,7 @@ repositories {
		dependencies {
		compile 'org.apache.james:apache-mime4j-core:0.8.1'
		compile 'org.apache.james:apache-mime4j-dom:0.8.1'
		compile "com.squareup.okio:okio:${okioVersion}"
		compile 'commons-io:commons-io:2.4'
		compile 'com.jcraft:jzlib:1.0.7'
		compile 'com.beetstra.jutf7:jutf7:1.0.0'
		@@ -27,7 +28,6 @@ dependencies {
		androidTestCompile 'com.madgag.spongycastle:pg:1.51.0.0'

		testCompile "org.jetbrains.kotlin:kotlin-stdlib-jre7:${kotlinVersion}"
		testCompile "com.squareup.okio:okio:${okioVersion}"
		testCompile "org.robolectric:robolectric:${robolectricVersion}"
		testCompile "junit:junit:${junitVersion}"
		testCompile "com.google.truth:truth:${truthVersion}"

k9mail-library/src/main/java/com/fsck/k9/mail/internet/DecoderUtil.java

+59 −75

Original line number	Diff line number	Diff line

		package com.fsck.k9.mail.internet;

		import com.fsck.k9.mail.Message;
		import com.fsck.k9.mail.MessagingException;

		import java.io.ByteArrayInputStream;
		import java.io.IOException;
		import java.io.InputStream;
		import java.nio.charset.Charset;

		import org.apache.james.mime4j.codec.Base64InputStream;
		import com.fsck.k9.mail.Message;
		import com.fsck.k9.mail.MessagingException;
		import okio.Buffer;
		import okio.ByteString;
		import okio.Okio;
		import org.apache.james.mime4j.codec.QuotedPrintableInputStream;
		import org.apache.james.mime4j.util.CharsetUtil;
		import timber.log.Timber;
		@@ -21,65 +25,6 @@ import timber.log.Timber;
		* it has to be determined with the sender address, the mailer and so on.
		*/
		class DecoderUtil {

		private static class EncodedWord {
		private String charset;
		private String encoding;
		private String encodedText;
		}

		/**
		* Decodes an encoded word encoded with the 'B' encoding (described in
		* RFC 2047) found in a header field body.
		*
		* @param encodedWord the encoded word to decode.
		* @param charset the Java charset to use.
		* @return the decoded string.
		*/
		private static String decodeB(String encodedWord, String charset) {
		byte[] bytes = encodedWord.getBytes(Charset.forName("US-ASCII"));

		Base64InputStream is = new Base64InputStream(new ByteArrayInputStream(bytes));
		try {
		return CharsetSupport.readToString(is, charset);
		} catch (IOException e) {
		return null;
		}
		}

		/**
		* Decodes an encoded word encoded with the 'Q' encoding (described in
		* RFC 2047) found in a header field body.
		*
		* @param encodedWord the encoded word to decode.
		* @param charset the Java charset to use.
		* @return the decoded string.
		*/
		static String decodeQ(String encodedWord, String charset) {

		/*
		* Replace _ with =20
		*/
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < encodedWord.length(); i++) {
		char c = encodedWord.charAt(i);
		if (c == '_') {
		sb.append("=20");
		} else {
		sb.append(c);
		}
		}

		byte[] bytes = sb.toString().getBytes(Charset.forName("US-ASCII"));

		QuotedPrintableInputStream is = new QuotedPrintableInputStream(new ByteArrayInputStream(bytes));
		try {
		return CharsetSupport.readToString(is, charset);
		} catch (IOException e) {
		return null;
		}
		}

		/**
		* Decodes a string containing encoded words as defined by RFC 2047.
		* Encoded words in have the form
		@@ -145,18 +90,18 @@ class DecoderUtil {
		}
		} else {
		if (word == null) {
		sb.append(decodeEncodedWord(previousWord));
		sb.append(charsetDecode(previousWord));
		sb.append(sep);
		sb.append(body.substring(begin, end));
		} else {
		if (!CharsetUtil.isWhitespace(sep)) {
		sb.append(decodeEncodedWord(previousWord));
		sb.append(charsetDecode(previousWord));
		sb.append(sep);
		} else if (previousWord.encoding.equals(word.encoding) &&
		previousWord.charset.equals(word.charset)) {
		word.encodedText = previousWord.encodedText + word.encodedText;
		word.data = concat(previousWord.data, word.data);
		} else {
		sb.append(decodeEncodedWord(previousWord));
		sb.append(charsetDecode(previousWord));
		}
		}
		}
		@@ -170,19 +115,17 @@ class DecoderUtil {
		int previousEnd) {

		if (previousWord != null) {
		sb.append(decodeEncodedWord(previousWord));
		sb.append(charsetDecode(previousWord));
		}

		sb.append(body.substring(previousEnd));
		}

		private static String decodeEncodedWord(EncodedWord word) {
		if (word.encoding.equals("Q")) {
		return decodeQ(word.encodedText, word.charset);
		} else if (word.encoding.equals("B")) {
		return DecoderUtil.decodeB(word.encodedText, word.charset);
		} else {
		Timber.w("Warning: Unknown encoding '%s'", word.encoding);
		private static String charsetDecode(EncodedWord word) {
		try {
		InputStream inputStream = new Buffer().write(word.data).inputStream();
		return CharsetSupport.readToString(inputStream, word.charset);
		} catch (IOException e) {
		return null;
		}
		}
		@@ -216,13 +159,54 @@ class DecoderUtil {
		encodedWord.charset = charset;
		if (encoding.equalsIgnoreCase("Q")) {
		encodedWord.encoding = "Q";
		encodedWord.data = decodeQ(encodedText);
		} else if (encoding.equalsIgnoreCase("B")) {
		encodedWord.encoding = "B";
		encodedWord.data = decodeB(encodedText);
		} else {
		Timber.w("Warning: Unknown encoding in encoded word '%s'", body.substring(begin, end));
		return null;
		}
		encodedWord.encodedText = encodedText;
		return encodedWord;
		}

		private static ByteString decodeQ(String encodedWord) {
		/*
		* Replace _ with =20
		*/
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < encodedWord.length(); i++) {
		char c = encodedWord.charAt(i);
		if (c == '_') {
		sb.append("=20");
		} else {
		sb.append(c);
		}
		}

		byte[] bytes = sb.toString().getBytes(Charset.forName("US-ASCII"));

		QuotedPrintableInputStream is = new QuotedPrintableInputStream(new ByteArrayInputStream(bytes));
		try {
		return Okio.buffer(Okio.source(is)).readByteString();
		} catch (IOException e) {
		return null;
		}
		}

		private static ByteString decodeB(String encodedText) {
		ByteString decoded = ByteString.decodeBase64(encodedText);
		return decoded == null ? ByteString.EMPTY : decoded;
		}

		private static ByteString concat(ByteString first, ByteString second) {
		return new Buffer().write(first).write(second).readByteString();
		}


		private static class EncodedWord {
		private String charset;
		private String encoding;
		private ByteString data;
		}
		}

k9mail-library/src/test/java/com/fsck/k9/mail/internet/DecoderUtilTest.java

+7 −1

Original line number	Diff line number	Diff line
		@@ -119,7 +119,7 @@ public class DecoderUtilTest {

		@Test
		public void decodeEncodedWords_withInvalidBase64String_returnsEmptyString() {
		assertInputDecodesToExpected("=?us-ascii?b?abc?=", "");
		assertInputDecodesToExpected("=?us-ascii?b?ab#?=", "");
		}

		@Test
		@@ -192,6 +192,12 @@ public class DecoderUtilTest {
		assertInputDecodesToExpected("=?us-ascii?Q?oh_no_?= =?utf-8?Q?=F0=9F=92=A9?=", "oh no 💩");
		}

		@Test
		public void decodeEncodedWords_withTwoCompleteEncodedWords_shouldProvideBoth() {
		assertInputDecodesToExpected("=?UTF-8?B?W+aWsOioguWWrl0g6aGn5a6iOiB4eHhAeHh4LmNvbSDmnInmlrDoqILllq46ICMyMDE4MA==?= " +
		"=?UTF-8?B?MTE4MTIzNDU2Nzg=?=", "[新訂單] 顧客: xxx@xxx.com 有新訂單: #2018011812345678");
		}

		@Test
		public void decodeEncodedWords_withRFC2047examples_decodesCorrectly() {
		assertInputDecodesToExpected("(=?ISO-8859-1?Q?a?=)", "(a)");