Merge pull request #3639 from k9mail/html_to_text (7b105d7c) · Commits · e / os / Mail

app/core/src/main/java/com/fsck/k9/message/html/HtmlConverter.java

+4 −85

Original line number	Diff line number	Diff line
		package com.fsck.k9.message.html;


		import java.util.Collections;
		import java.util.HashSet;
		import java.util.Locale;
		import java.util.Set;

		import android.text.Annotation;
		import android.text.Editable;
		import android.text.Html;
		import android.text.Html.TagHandler;
		import android.text.Spannable;
		import android.text.Spanned;

		import com.fsck.k9.K9;
		import org.jsoup.Jsoup;
		import org.jsoup.nodes.Document;
		import org.xml.sax.XMLReader;

		/**
		@@ -42,88 +37,12 @@ public class HtmlConverter {
		* @return Plain text result.
		*/
		public static String htmlToText(final String html) {
		return Html.fromHtml(html, null, new HtmlToTextTagHandler()).toString()
		Document document = Jsoup.parse(html);
		return HtmlToPlainText.toPlainText(document.body())
		.replace(PREVIEW_OBJECT_CHARACTER, PREVIEW_OBJECT_REPLACEMENT)
		.replace(NBSP_CHARACTER, NBSP_REPLACEMENT);
		}

		/**
		* Custom tag handler to use when converting HTML messages to text. It currently handles text
		* representations of HTML tags that Android's built-in parser doesn't understand and hides code
		* contained in STYLE and SCRIPT blocks.
		*/
		private static class HtmlToTextTagHandler implements Html.TagHandler {
		// List of tags whose content should be ignored.
		private static final Set<String> TAGS_WITH_IGNORED_CONTENT;
		static {
		Set<String> set = new HashSet<>();
		set.add("style");
		set.add("script");
		set.add("title");
		set.add("!"); // comments
		TAGS_WITH_IGNORED_CONTENT = Collections.unmodifiableSet(set);
		}

		@Override
		public void handleTag(boolean opening, String tag, Editable output, XMLReader xmlReader) {
		tag = tag.toLowerCase(Locale.US);
		if (tag.equals("hr") && opening) {
		// In the case of an <hr>, replace it with a bunch of underscores. This is roughly
		// the behaviour of Outlook in Rich Text mode.
		output.append("_____________________________________________\r\n");
		} else if (TAGS_WITH_IGNORED_CONTENT.contains(tag)) {
		handleIgnoredTag(opening, output);
		}
		}

		private static final String IGNORED_ANNOTATION_KEY = "K9_ANNOTATION";
		private static final String IGNORED_ANNOTATION_VALUE = "hiddenSpan";

		/**
		* When we come upon an ignored tag, we mark it with an Annotation object with a specific key
		* and value as above. We don't really need to be checking these values since Html.fromHtml()
		* doesn't use Annotation spans, but we should do it now to be safe in case they do start using
		* it in the future.
		* @param opening If this is an opening tag or not.
		* @param output Spannable string that we're working with.
		*/
		private void handleIgnoredTag(boolean opening, Editable output) {
		int len = output.length();
		if (opening) {
		output.setSpan(new Annotation(IGNORED_ANNOTATION_KEY, IGNORED_ANNOTATION_VALUE), len,
		len, Spannable.SPAN_MARK_MARK);
		} else {
		Object start = getOpeningAnnotation(output);
		if (start != null) {
		int where = output.getSpanStart(start);
		// Remove the temporary Annotation span.
		output.removeSpan(start);
		// Delete everything between the start of the Annotation and the end of the string
		// (what we've generated so far).
		output.delete(where, len);
		}
		}
		}

		/**
		* Fetch the matching opening Annotation object and verify that it's the one added by K9.
		* @param output Spannable string we're working with.
		* @return Starting Annotation object.
		*/
		private Object getOpeningAnnotation(Editable output) {
		Object[] objs = output.getSpans(0, output.length(), Annotation.class);
		for (int i = objs.length - 1; i >= 0; i--) {
		Annotation span = (Annotation) objs[i];
		if (output.getSpanFlags(objs[i]) == Spannable.SPAN_MARK_MARK
		&& span.getKey().equals(IGNORED_ANNOTATION_KEY)
		&& span.getValue().equals(IGNORED_ANNOTATION_VALUE)) {
		return objs[i];
		}
		}
		return null;
		}
		}

		/**
		* Convert a text string into an HTML document.
		*

app/core/src/main/java/com/fsck/k9/message/html/HtmlToPlainText.kt

0 → 100644

+124 −0

Original line number	Diff line number	Diff line
		package com.fsck.k9.message.html

		import org.jsoup.nodes.Element
		import org.jsoup.nodes.Node
		import org.jsoup.nodes.TextNode
		import org.jsoup.select.NodeTraversor
		import org.jsoup.select.NodeVisitor

		/**
		* Convert an HTML element to plain text.
		*
		* Based on Jsoup's HtmlToPlainText example.
		*/
		object HtmlToPlainText {
		@JvmStatic
		fun toPlainText(element: Element): String {
		val formatter = FormattingVisitor()
		NodeTraversor.traverse(formatter, element)

		return formatter.toString()
		}
		}

		private class FormattingVisitor : NodeVisitor {
		private var width = 0
		private val output = StringBuilder()

		override fun head(node: Node, depth: Int) {
		val name = node.nodeName()
		when {
		node is TextNode -> append(node.text())
		name == "li" -> {
		startNewLine()
		append("* ")
		}
		node is Element && node.isBlock -> startNewLine()
		}
		}

		override fun tail(node: Node, depth: Int) {
		val name = node.nodeName()
		when {
		name == "li" -> append("\n")
		node is Element && node.isBlock -> {
		if (node.hasText()) {
		addEmptyLine()
		}
		}
		name == "a" -> {
		if (node.absUrl("href").isNotEmpty()) {
		append(" <${node.attr("href")}>")
		}
		}
		}
		}

		private fun append(text: String) {
		if (text.startsWith("\n")) {
		width = 0
		}

		if (text == " " && (output.isEmpty() \|\| output.last() in listOf(' ', '\n'))) {
		return
		}

		if (text.length + width > MAX_WIDTH) {
		val words = text.split(Regex("\\s+"))
		for (i in words.indices) {
		var word = words[i]

		val last = i == words.size - 1
		if (!last) {
		word = "$word "
		}

		if (word.length + width > MAX_WIDTH) {
		output.append("\n").append(word)
		width = word.length
		} else {
		output.append(word)
		width += word.length
		}
		}
		} else {
		output.append(text)
		width += text.length
		}
		}

		private fun startNewLine() {
		if (output.isEmpty() \|\| output.last() == '\n') {
		return
		}

		append("\n")
		}

		private fun addEmptyLine() {
		if (output.isEmpty() \|\| output.endsWith("\n\n")) {
		return
		}

		startNewLine()
		append("\n")
		}

		override fun toString(): String {
		if (output.isEmpty()) {
		return ""
		}

		var lastIndex = output.lastIndex
		while (lastIndex >= 0 && output[lastIndex] == '\n') {
		lastIndex--
		}

		return output.substring(0, lastIndex + 1)
		}


		companion object {
		private const val MAX_WIDTH = 76
		}
		}