Loading app/core/src/main/java/com/fsck/k9/message/html/HtmlConverter.java +4 −85 Original line number Diff line number Diff line package com.fsck.k9.message.html; import java.util.Collections; import java.util.HashSet; import java.util.Locale; import java.util.Set; import android.text.Annotation; import android.text.Editable; import android.text.Html; import android.text.Html.TagHandler; import android.text.Spannable; import android.text.Spanned; import com.fsck.k9.K9; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.xml.sax.XMLReader; /** Loading Loading @@ -42,88 +37,12 @@ public class HtmlConverter { * @return Plain text result. */ public static String htmlToText(final String html) { return Html.fromHtml(html, null, new HtmlToTextTagHandler()).toString() Document document = Jsoup.parse(html); return HtmlToPlainText.toPlainText(document.body()) .replace(PREVIEW_OBJECT_CHARACTER, PREVIEW_OBJECT_REPLACEMENT) .replace(NBSP_CHARACTER, NBSP_REPLACEMENT); } /** * Custom tag handler to use when converting HTML messages to text. It currently handles text * representations of HTML tags that Android's built-in parser doesn't understand and hides code * contained in STYLE and SCRIPT blocks. */ private static class HtmlToTextTagHandler implements Html.TagHandler { // List of tags whose content should be ignored. private static final Set<String> TAGS_WITH_IGNORED_CONTENT; static { Set<String> set = new HashSet<>(); set.add("style"); set.add("script"); set.add("title"); set.add("!"); // comments TAGS_WITH_IGNORED_CONTENT = Collections.unmodifiableSet(set); } @Override public void handleTag(boolean opening, String tag, Editable output, XMLReader xmlReader) { tag = tag.toLowerCase(Locale.US); if (tag.equals("hr") && opening) { // In the case of an <hr>, replace it with a bunch of underscores. This is roughly // the behaviour of Outlook in Rich Text mode. output.append("_____________________________________________\r\n"); } else if (TAGS_WITH_IGNORED_CONTENT.contains(tag)) { handleIgnoredTag(opening, output); } } private static final String IGNORED_ANNOTATION_KEY = "K9_ANNOTATION"; private static final String IGNORED_ANNOTATION_VALUE = "hiddenSpan"; /** * When we come upon an ignored tag, we mark it with an Annotation object with a specific key * and value as above. We don't really need to be checking these values since Html.fromHtml() * doesn't use Annotation spans, but we should do it now to be safe in case they do start using * it in the future. * @param opening If this is an opening tag or not. * @param output Spannable string that we're working with. */ private void handleIgnoredTag(boolean opening, Editable output) { int len = output.length(); if (opening) { output.setSpan(new Annotation(IGNORED_ANNOTATION_KEY, IGNORED_ANNOTATION_VALUE), len, len, Spannable.SPAN_MARK_MARK); } else { Object start = getOpeningAnnotation(output); if (start != null) { int where = output.getSpanStart(start); // Remove the temporary Annotation span. output.removeSpan(start); // Delete everything between the start of the Annotation and the end of the string // (what we've generated so far). output.delete(where, len); } } } /** * Fetch the matching opening Annotation object and verify that it's the one added by K9. * @param output Spannable string we're working with. * @return Starting Annotation object. */ private Object getOpeningAnnotation(Editable output) { Object[] objs = output.getSpans(0, output.length(), Annotation.class); for (int i = objs.length - 1; i >= 0; i--) { Annotation span = (Annotation) objs[i]; if (output.getSpanFlags(objs[i]) == Spannable.SPAN_MARK_MARK && span.getKey().equals(IGNORED_ANNOTATION_KEY) && span.getValue().equals(IGNORED_ANNOTATION_VALUE)) { return objs[i]; } } return null; } } /** * Convert a text string into an HTML document. * Loading app/core/src/main/java/com/fsck/k9/message/html/HtmlToPlainText.kt 0 → 100644 +124 −0 Original line number Diff line number Diff line package com.fsck.k9.message.html import org.jsoup.nodes.Element import org.jsoup.nodes.Node import org.jsoup.nodes.TextNode import org.jsoup.select.NodeTraversor import org.jsoup.select.NodeVisitor /** * Convert an HTML element to plain text. * * Based on Jsoup's HtmlToPlainText example. */ object HtmlToPlainText { @JvmStatic fun toPlainText(element: Element): String { val formatter = FormattingVisitor() NodeTraversor.traverse(formatter, element) return formatter.toString() } } private class FormattingVisitor : NodeVisitor { private var width = 0 private val output = StringBuilder() override fun head(node: Node, depth: Int) { val name = node.nodeName() when { node is TextNode -> append(node.text()) name == "li" -> { startNewLine() append("* ") } node is Element && node.isBlock -> startNewLine() } } override fun tail(node: Node, depth: Int) { val name = node.nodeName() when { name == "li" -> append("\n") node is Element && node.isBlock -> { if (node.hasText()) { addEmptyLine() } } name == "a" -> { if (node.absUrl("href").isNotEmpty()) { append(" <${node.attr("href")}>") } } } } private fun append(text: String) { if (text.startsWith("\n")) { width = 0 } if (text == " " && (output.isEmpty() || output.last() in listOf(' ', '\n'))) { return } if (text.length + width > MAX_WIDTH) { val words = text.split(Regex("\\s+")) for (i in words.indices) { var word = words[i] val last = i == words.size - 1 if (!last) { word = "$word " } if (word.length + width > MAX_WIDTH) { output.append("\n").append(word) width = word.length } else { output.append(word) width += word.length } } } else { output.append(text) width += text.length } } private fun startNewLine() { if (output.isEmpty() || output.last() == '\n') { return } append("\n") } private fun addEmptyLine() { if (output.isEmpty() || output.endsWith("\n\n")) { return } startNewLine() append("\n") } override fun toString(): String { if (output.isEmpty()) { return "" } var lastIndex = output.lastIndex while (lastIndex >= 0 && output[lastIndex] == '\n') { lastIndex-- } return output.substring(0, lastIndex + 1) } companion object { private const val MAX_WIDTH = 76 } } Loading
app/core/src/main/java/com/fsck/k9/message/html/HtmlConverter.java +4 −85 Original line number Diff line number Diff line package com.fsck.k9.message.html; import java.util.Collections; import java.util.HashSet; import java.util.Locale; import java.util.Set; import android.text.Annotation; import android.text.Editable; import android.text.Html; import android.text.Html.TagHandler; import android.text.Spannable; import android.text.Spanned; import com.fsck.k9.K9; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.xml.sax.XMLReader; /** Loading Loading @@ -42,88 +37,12 @@ public class HtmlConverter { * @return Plain text result. */ public static String htmlToText(final String html) { return Html.fromHtml(html, null, new HtmlToTextTagHandler()).toString() Document document = Jsoup.parse(html); return HtmlToPlainText.toPlainText(document.body()) .replace(PREVIEW_OBJECT_CHARACTER, PREVIEW_OBJECT_REPLACEMENT) .replace(NBSP_CHARACTER, NBSP_REPLACEMENT); } /** * Custom tag handler to use when converting HTML messages to text. It currently handles text * representations of HTML tags that Android's built-in parser doesn't understand and hides code * contained in STYLE and SCRIPT blocks. */ private static class HtmlToTextTagHandler implements Html.TagHandler { // List of tags whose content should be ignored. private static final Set<String> TAGS_WITH_IGNORED_CONTENT; static { Set<String> set = new HashSet<>(); set.add("style"); set.add("script"); set.add("title"); set.add("!"); // comments TAGS_WITH_IGNORED_CONTENT = Collections.unmodifiableSet(set); } @Override public void handleTag(boolean opening, String tag, Editable output, XMLReader xmlReader) { tag = tag.toLowerCase(Locale.US); if (tag.equals("hr") && opening) { // In the case of an <hr>, replace it with a bunch of underscores. This is roughly // the behaviour of Outlook in Rich Text mode. output.append("_____________________________________________\r\n"); } else if (TAGS_WITH_IGNORED_CONTENT.contains(tag)) { handleIgnoredTag(opening, output); } } private static final String IGNORED_ANNOTATION_KEY = "K9_ANNOTATION"; private static final String IGNORED_ANNOTATION_VALUE = "hiddenSpan"; /** * When we come upon an ignored tag, we mark it with an Annotation object with a specific key * and value as above. We don't really need to be checking these values since Html.fromHtml() * doesn't use Annotation spans, but we should do it now to be safe in case they do start using * it in the future. * @param opening If this is an opening tag or not. * @param output Spannable string that we're working with. */ private void handleIgnoredTag(boolean opening, Editable output) { int len = output.length(); if (opening) { output.setSpan(new Annotation(IGNORED_ANNOTATION_KEY, IGNORED_ANNOTATION_VALUE), len, len, Spannable.SPAN_MARK_MARK); } else { Object start = getOpeningAnnotation(output); if (start != null) { int where = output.getSpanStart(start); // Remove the temporary Annotation span. output.removeSpan(start); // Delete everything between the start of the Annotation and the end of the string // (what we've generated so far). output.delete(where, len); } } } /** * Fetch the matching opening Annotation object and verify that it's the one added by K9. * @param output Spannable string we're working with. * @return Starting Annotation object. */ private Object getOpeningAnnotation(Editable output) { Object[] objs = output.getSpans(0, output.length(), Annotation.class); for (int i = objs.length - 1; i >= 0; i--) { Annotation span = (Annotation) objs[i]; if (output.getSpanFlags(objs[i]) == Spannable.SPAN_MARK_MARK && span.getKey().equals(IGNORED_ANNOTATION_KEY) && span.getValue().equals(IGNORED_ANNOTATION_VALUE)) { return objs[i]; } } return null; } } /** * Convert a text string into an HTML document. * Loading
app/core/src/main/java/com/fsck/k9/message/html/HtmlToPlainText.kt 0 → 100644 +124 −0 Original line number Diff line number Diff line package com.fsck.k9.message.html import org.jsoup.nodes.Element import org.jsoup.nodes.Node import org.jsoup.nodes.TextNode import org.jsoup.select.NodeTraversor import org.jsoup.select.NodeVisitor /** * Convert an HTML element to plain text. * * Based on Jsoup's HtmlToPlainText example. */ object HtmlToPlainText { @JvmStatic fun toPlainText(element: Element): String { val formatter = FormattingVisitor() NodeTraversor.traverse(formatter, element) return formatter.toString() } } private class FormattingVisitor : NodeVisitor { private var width = 0 private val output = StringBuilder() override fun head(node: Node, depth: Int) { val name = node.nodeName() when { node is TextNode -> append(node.text()) name == "li" -> { startNewLine() append("* ") } node is Element && node.isBlock -> startNewLine() } } override fun tail(node: Node, depth: Int) { val name = node.nodeName() when { name == "li" -> append("\n") node is Element && node.isBlock -> { if (node.hasText()) { addEmptyLine() } } name == "a" -> { if (node.absUrl("href").isNotEmpty()) { append(" <${node.attr("href")}>") } } } } private fun append(text: String) { if (text.startsWith("\n")) { width = 0 } if (text == " " && (output.isEmpty() || output.last() in listOf(' ', '\n'))) { return } if (text.length + width > MAX_WIDTH) { val words = text.split(Regex("\\s+")) for (i in words.indices) { var word = words[i] val last = i == words.size - 1 if (!last) { word = "$word " } if (word.length + width > MAX_WIDTH) { output.append("\n").append(word) width = word.length } else { output.append(word) width += word.length } } } else { output.append(text) width += text.length } } private fun startNewLine() { if (output.isEmpty() || output.last() == '\n') { return } append("\n") } private fun addEmptyLine() { if (output.isEmpty() || output.endsWith("\n\n")) { return } startNewLine() append("\n") } override fun toString(): String { if (output.isEmpty()) { return "" } var lastIndex = output.lastIndex while (lastIndex >= 0 && output[lastIndex] == '\n') { lastIndex-- } return output.substring(0, lastIndex + 1) } companion object { private const val MAX_WIDTH = 76 } }