Loading k9mail/src/main/java/com/fsck/k9/helper/jsoup/AdvancedNodeTraversor.java 0 → 100644 +139 −0 Original line number Diff line number Diff line /* * The MIT License * * © 2009-2017, Jonathan Hedley <jonathan@hedley.net> * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package com.fsck.k9.helper.jsoup; import com.fsck.k9.helper.jsoup.NodeFilter.HeadFilterDecision; import com.fsck.k9.helper.jsoup.NodeFilter.TailFilterDecision; import org.jsoup.nodes.Node; import org.jsoup.select.NodeTraversor; /** * Depth-first node traversor. * <p> * Based on {@link NodeTraversor}, but supports skipping sub trees, removing nodes, and stopping the traversal at any * point. * </p><p> * This is an enhancement of the <a href="https://github.com/jhy/jsoup/pull/849">jsoup pull request 'Improved node * traversal'</a> by <a href="https://github.com/kno10">Erich Schubert</a>. * </p> */ public class AdvancedNodeTraversor { /** * Filter result. */ public enum FilterResult { /** * Processing the tree was completed. */ ENDED, /** * Processing was stopped. */ STOPPED, /** * Processing the tree was completed and the root node was removed. */ ROOT_REMOVED } private NodeFilter filter; /** * Create a new traversor. * * @param filter * a class implementing the {@link NodeFilter} interface, to be called when visiting each node. */ public AdvancedNodeTraversor(NodeFilter filter) { this.filter = filter; } /** * Start a depth-first filtering of the root and all of its descendants. * * @param root * the root node point to traverse. * * @return The result of the filter operation. */ public FilterResult filter(Node root) { Node node = root; int depth = 0; while (node != null) { HeadFilterDecision headResult = filter.head(node, depth); if (headResult == HeadFilterDecision.STOP) { return FilterResult.STOPPED; } if (headResult == HeadFilterDecision.CONTINUE && node.childNodeSize() > 0) { node = node.childNode(0); ++depth; continue; } TailFilterDecision tailResult = TailFilterDecision.CONTINUE; while (node.nextSibling() == null && depth > 0) { if (headResult == HeadFilterDecision.CONTINUE || headResult == HeadFilterDecision.SKIP_CHILDREN) { tailResult = filter.tail(node, depth); if (tailResult == TailFilterDecision.STOP) { return FilterResult.STOPPED; } } Node prev = node; node = node.parentNode(); depth--; if (headResult == HeadFilterDecision.REMOVE || tailResult == TailFilterDecision.REMOVE) { prev.remove(); } headResult = HeadFilterDecision.CONTINUE; } if (headResult == HeadFilterDecision.CONTINUE || headResult == HeadFilterDecision.SKIP_CHILDREN) { tailResult = filter.tail(node, depth); if (tailResult == TailFilterDecision.STOP) { return FilterResult.STOPPED; } } Node prev = node; node = node.nextSibling(); if (headResult == HeadFilterDecision.REMOVE) { prev.remove(); } if (prev == root) { return headResult == HeadFilterDecision.REMOVE ? FilterResult.ROOT_REMOVED : FilterResult.ENDED; } } return FilterResult.ENDED; } } k9mail/src/main/java/com/fsck/k9/helper/jsoup/NodeFilter.java 0 → 100644 +111 −0 Original line number Diff line number Diff line package com.fsck.k9.helper.jsoup; import android.support.annotation.NonNull; import org.jsoup.nodes.Node; /** * Node filter interface. Provide an implementing class to {@link AdvancedNodeTraversor} to iterate through * nodes. * <p> * This interface provides two methods, {@code head} and {@code tail}. The head method is called when the node is first * seen, and the tail method when all of the node's children have been visited. As an example, head can be used to * create a start tag for a node, and tail to create the end tag. * </p> * <p> * For every node, the filter has to decide in {@link NodeFilter#head(Node, int)}) whether to * <ul> * <li>continue ({@link HeadFilterDecision#CONTINUE}),</li> * <li>skip all children ({@link HeadFilterDecision#SKIP_CHILDREN}),</li> * <li>skip node entirely ({@link HeadFilterDecision#SKIP_ENTIRELY}),</li> * <li>remove the subtree ({@link HeadFilterDecision#REMOVE}),</li> * <li>interrupt the iteration and return ({@link HeadFilterDecision#STOP}).</li> * </ul> * <p> * The difference between {@link HeadFilterDecision#SKIP_CHILDREN} and {@link HeadFilterDecision#SKIP_ENTIRELY} is that * the first will invoke {@link NodeFilter#tail(Node, int)} on the node, while the latter will not. * </p> * <p> * When {@link NodeFilter#tail(Node, int)} is called the filter has to decide whether to * <ul> * <li>continue ({@link TailFilterDecision#CONTINUE}),</li> * <li>remove the subtree ({@link TailFilterDecision#REMOVE}),</li> * <li>interrupt the iteration and return ({@link TailFilterDecision#STOP}).</li> * </ul> * </p> */ public interface NodeFilter { /** * Filter decision for {@link NodeFilter#head(Node, int)}. */ enum HeadFilterDecision { /** * Continue processing the tree. */ CONTINUE, /** * Skip the child nodes, but do call {@link NodeFilter#tail(Node, int)} next. */ SKIP_CHILDREN, /** * Skip the subtree, and do not call {@link NodeFilter#tail(Node, int)}. */ SKIP_ENTIRELY, /** * Remove the node and its children, and do not call {@link NodeFilter#tail(Node, int)}. */ REMOVE, /** * Stop processing. */ STOP } /** * Filter decision for {@link NodeFilter#tail(Node, int)}. */ enum TailFilterDecision { /** * Continue processing the tree. */ CONTINUE, /** * Remove the node and its children. */ REMOVE, /** * Stop processing. */ STOP } /** * Callback for when a node is first visited. * * @param node * the node being visited. * @param depth * the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node * of that will have depth 1. * * @return Filter decision */ @NonNull HeadFilterDecision head(Node node, int depth); /** * Callback for when a node is last visited, after all of its descendants have been visited. * * @param node * the node being visited. * @param depth * the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node * of that will have depth 1. * * @return Filter decision */ @NonNull TailFilterDecision tail(Node node, int depth); } k9mail/src/main/java/com/fsck/k9/message/html/HtmlProcessor.java +1 −1 Original line number Diff line number Diff line Loading @@ -30,7 +30,7 @@ public class HtmlProcessor { HtmlConverter.cssStylePre()); } static String toCompactString(Document document) { public static String toCompactString(Document document) { document.outputSettings() .prettyPrint(false) .indentAmount(0); Loading k9mail/src/main/java/com/fsck/k9/message/signature/HtmlSignatureRemover.java +81 −71 Original line number Diff line number Diff line package com.fsck.k9.message.signature; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import timber.log.Timber; import android.support.annotation.NonNull; import com.fsck.k9.K9; import org.htmlcleaner.CleanerProperties; import org.htmlcleaner.HtmlCleaner; import org.htmlcleaner.SimpleHtmlSerializer; import org.htmlcleaner.TagNode; import com.fsck.k9.helper.jsoup.AdvancedNodeTraversor; import com.fsck.k9.helper.jsoup.NodeFilter; import com.fsck.k9.message.html.HtmlProcessor; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; import org.jsoup.parser.Tag; public class HtmlSignatureRemover { private static final Pattern DASH_SIGNATURE_HTML = Pattern.compile("(<br( /)?>|\r?\n)-- <br( /)?>", Pattern.CASE_INSENSITIVE); private static final Pattern BLOCKQUOTE_START = Pattern.compile("<blockquote", Pattern.CASE_INSENSITIVE); private static final Pattern BLOCKQUOTE_END = Pattern.compile("</blockquote>", Pattern.CASE_INSENSITIVE); public static String stripSignature(String content) { return new HtmlSignatureRemover().stripSignatureInternal(content); } private String stripSignatureInternal(String content) { Document document = Jsoup.parse(content); public static String stripSignature(String content) { Matcher dashSignatureHtml = DASH_SIGNATURE_HTML.matcher(content); if (dashSignatureHtml.find()) { Matcher blockquoteStart = BLOCKQUOTE_START.matcher(content); Matcher blockquoteEnd = BLOCKQUOTE_END.matcher(content); List<Integer> start = new ArrayList<>(); List<Integer> end = new ArrayList<>(); while (blockquoteStart.find()) { start.add(blockquoteStart.start()); AdvancedNodeTraversor nodeTraversor = new AdvancedNodeTraversor(new StripSignatureFilter()); nodeTraversor.filter(document.body()); return HtmlProcessor.toCompactString(document); } static class StripSignatureFilter implements NodeFilter { private static final Pattern DASH_SIGNATURE_HTML = Pattern.compile("\\s*-- \\s*", Pattern.CASE_INSENSITIVE); private static final Tag BLOCKQUOTE = Tag.valueOf("blockquote"); private static final Tag BR = Tag.valueOf("br"); private static final Tag P = Tag.valueOf("p"); private boolean signatureFound = false; private boolean lastElementCausedLineBreak = false; private Element brElementPrecedingDashes; @NonNull @Override public HeadFilterDecision head(Node node, int depth) { if (signatureFound) { return HeadFilterDecision.REMOVE; } while (blockquoteEnd.find()) { end.add(blockquoteEnd.start()); if (node instanceof Element) { lastElementCausedLineBreak = false; Element element = (Element) node; if (element.tag().equals(BLOCKQUOTE)) { return HeadFilterDecision.SKIP_ENTIRELY; } if (start.size() != end.size()) { Timber.d("There are %d <blockquote> tags, but %d </blockquote> tags. Refusing to strip.", start.size(), end.size()); } else if (start.size() > 0) { // Ignore quoted signatures in blockquotes. dashSignatureHtml.region(0, start.get(0)); if (dashSignatureHtml.find()) { // before first <blockquote>. content = content.substring(0, dashSignatureHtml.start()); } else { for (int i = 0; i < start.size() - 1; i++) { // within blockquotes. if (end.get(i) < start.get(i + 1)) { dashSignatureHtml.region(end.get(i), start.get(i + 1)); if (dashSignatureHtml.find()) { content = content.substring(0, dashSignatureHtml.start()); break; } else if (node instanceof TextNode) { TextNode textNode = (TextNode) node; if (lastElementCausedLineBreak && DASH_SIGNATURE_HTML.matcher(textNode.getWholeText()).matches()) { Node nextNode = node.nextSibling(); if (nextNode instanceof Element && ((Element) nextNode).tag().equals(BR)) { signatureFound = true; if (brElementPrecedingDashes != null) { brElementPrecedingDashes.remove(); brElementPrecedingDashes = null; } return HeadFilterDecision.REMOVE; } } if (end.get(end.size() - 1) < content.length()) { // after last </blockquote>. dashSignatureHtml.region(end.get(end.size() - 1), content.length()); if (dashSignatureHtml.find()) { content = content.substring(0, dashSignatureHtml.start()); } return HeadFilterDecision.CONTINUE; } @NonNull @Override public TailFilterDecision tail(Node node, int depth) { if (signatureFound) { return TailFilterDecision.CONTINUE; } } else { // No blockquotes found. content = content.substring(0, dashSignatureHtml.start()); if (node instanceof Element) { Element element = (Element) node; boolean elementIsBr = element.tag().equals(BR); if (elementIsBr || element.tag().equals(P)) { lastElementCausedLineBreak = true; brElementPrecedingDashes = elementIsBr ? element : null; return TailFilterDecision.CONTINUE; } } // Fix the stripping off of closing tags if a signature was stripped, // as well as clean up the HTML of the quoted message. HtmlCleaner cleaner = new HtmlCleaner(); CleanerProperties properties = cleaner.getProperties(); // see http://htmlcleaner.sourceforge.net/parameters.php for descriptions properties.setNamespacesAware(false); properties.setAdvancedXmlEscape(false); properties.setOmitXmlDeclaration(true); properties.setOmitDoctypeDeclaration(false); properties.setTranslateSpecialEntities(false); properties.setRecognizeUnicodeChars(false); TagNode node = cleaner.clean(content); SimpleHtmlSerializer htmlSerialized = new SimpleHtmlSerializer(properties); content = htmlSerialized.getAsString(node, "UTF8"); return content; lastElementCausedLineBreak = false; return TailFilterDecision.CONTINUE; } } } k9mail/src/test/java/com/fsck/k9/message/signature/HtmlSignatureRemoverTest.java +3 −5 Original line number Diff line number Diff line Loading @@ -3,7 +3,6 @@ package com.fsck.k9.message.signature; import com.fsck.k9.K9RobolectricTestRunner; import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.robolectric.annotation.Config; Loading @@ -27,7 +26,6 @@ public class HtmlSignatureRemoverTest { assertEquals("This is the body text", extractText(withoutSignature)); } @Ignore @Test public void shouldStripSignatureFromThunderbirdStyleHtml() throws Exception { String html = "<html>\r\n" + Loading Loading @@ -88,8 +86,8 @@ public class HtmlSignatureRemoverTest { assertEquals("<html><head></head><body>" + "<blockquote>" + "This is some quoted text" + "<br />" + "-- <br />" + "<br>" + "-- <br>" + "Inner signature" + "</blockquote>" + "<div>This is the body text</div>" + Loading Loading @@ -141,7 +139,7 @@ public class HtmlSignatureRemoverTest { String withoutSignature = HtmlSignatureRemover.stripSignature(html); assertEquals("<html><head></head><body>" + "This is the body text<br />" + "This is the body text<br>" + "<blockquote>Some quote</blockquote>" + "</body></html>", withoutSignature); Loading Loading
k9mail/src/main/java/com/fsck/k9/helper/jsoup/AdvancedNodeTraversor.java 0 → 100644 +139 −0 Original line number Diff line number Diff line /* * The MIT License * * © 2009-2017, Jonathan Hedley <jonathan@hedley.net> * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package com.fsck.k9.helper.jsoup; import com.fsck.k9.helper.jsoup.NodeFilter.HeadFilterDecision; import com.fsck.k9.helper.jsoup.NodeFilter.TailFilterDecision; import org.jsoup.nodes.Node; import org.jsoup.select.NodeTraversor; /** * Depth-first node traversor. * <p> * Based on {@link NodeTraversor}, but supports skipping sub trees, removing nodes, and stopping the traversal at any * point. * </p><p> * This is an enhancement of the <a href="https://github.com/jhy/jsoup/pull/849">jsoup pull request 'Improved node * traversal'</a> by <a href="https://github.com/kno10">Erich Schubert</a>. * </p> */ public class AdvancedNodeTraversor { /** * Filter result. */ public enum FilterResult { /** * Processing the tree was completed. */ ENDED, /** * Processing was stopped. */ STOPPED, /** * Processing the tree was completed and the root node was removed. */ ROOT_REMOVED } private NodeFilter filter; /** * Create a new traversor. * * @param filter * a class implementing the {@link NodeFilter} interface, to be called when visiting each node. */ public AdvancedNodeTraversor(NodeFilter filter) { this.filter = filter; } /** * Start a depth-first filtering of the root and all of its descendants. * * @param root * the root node point to traverse. * * @return The result of the filter operation. */ public FilterResult filter(Node root) { Node node = root; int depth = 0; while (node != null) { HeadFilterDecision headResult = filter.head(node, depth); if (headResult == HeadFilterDecision.STOP) { return FilterResult.STOPPED; } if (headResult == HeadFilterDecision.CONTINUE && node.childNodeSize() > 0) { node = node.childNode(0); ++depth; continue; } TailFilterDecision tailResult = TailFilterDecision.CONTINUE; while (node.nextSibling() == null && depth > 0) { if (headResult == HeadFilterDecision.CONTINUE || headResult == HeadFilterDecision.SKIP_CHILDREN) { tailResult = filter.tail(node, depth); if (tailResult == TailFilterDecision.STOP) { return FilterResult.STOPPED; } } Node prev = node; node = node.parentNode(); depth--; if (headResult == HeadFilterDecision.REMOVE || tailResult == TailFilterDecision.REMOVE) { prev.remove(); } headResult = HeadFilterDecision.CONTINUE; } if (headResult == HeadFilterDecision.CONTINUE || headResult == HeadFilterDecision.SKIP_CHILDREN) { tailResult = filter.tail(node, depth); if (tailResult == TailFilterDecision.STOP) { return FilterResult.STOPPED; } } Node prev = node; node = node.nextSibling(); if (headResult == HeadFilterDecision.REMOVE) { prev.remove(); } if (prev == root) { return headResult == HeadFilterDecision.REMOVE ? FilterResult.ROOT_REMOVED : FilterResult.ENDED; } } return FilterResult.ENDED; } }
k9mail/src/main/java/com/fsck/k9/helper/jsoup/NodeFilter.java 0 → 100644 +111 −0 Original line number Diff line number Diff line package com.fsck.k9.helper.jsoup; import android.support.annotation.NonNull; import org.jsoup.nodes.Node; /** * Node filter interface. Provide an implementing class to {@link AdvancedNodeTraversor} to iterate through * nodes. * <p> * This interface provides two methods, {@code head} and {@code tail}. The head method is called when the node is first * seen, and the tail method when all of the node's children have been visited. As an example, head can be used to * create a start tag for a node, and tail to create the end tag. * </p> * <p> * For every node, the filter has to decide in {@link NodeFilter#head(Node, int)}) whether to * <ul> * <li>continue ({@link HeadFilterDecision#CONTINUE}),</li> * <li>skip all children ({@link HeadFilterDecision#SKIP_CHILDREN}),</li> * <li>skip node entirely ({@link HeadFilterDecision#SKIP_ENTIRELY}),</li> * <li>remove the subtree ({@link HeadFilterDecision#REMOVE}),</li> * <li>interrupt the iteration and return ({@link HeadFilterDecision#STOP}).</li> * </ul> * <p> * The difference between {@link HeadFilterDecision#SKIP_CHILDREN} and {@link HeadFilterDecision#SKIP_ENTIRELY} is that * the first will invoke {@link NodeFilter#tail(Node, int)} on the node, while the latter will not. * </p> * <p> * When {@link NodeFilter#tail(Node, int)} is called the filter has to decide whether to * <ul> * <li>continue ({@link TailFilterDecision#CONTINUE}),</li> * <li>remove the subtree ({@link TailFilterDecision#REMOVE}),</li> * <li>interrupt the iteration and return ({@link TailFilterDecision#STOP}).</li> * </ul> * </p> */ public interface NodeFilter { /** * Filter decision for {@link NodeFilter#head(Node, int)}. */ enum HeadFilterDecision { /** * Continue processing the tree. */ CONTINUE, /** * Skip the child nodes, but do call {@link NodeFilter#tail(Node, int)} next. */ SKIP_CHILDREN, /** * Skip the subtree, and do not call {@link NodeFilter#tail(Node, int)}. */ SKIP_ENTIRELY, /** * Remove the node and its children, and do not call {@link NodeFilter#tail(Node, int)}. */ REMOVE, /** * Stop processing. */ STOP } /** * Filter decision for {@link NodeFilter#tail(Node, int)}. */ enum TailFilterDecision { /** * Continue processing the tree. */ CONTINUE, /** * Remove the node and its children. */ REMOVE, /** * Stop processing. */ STOP } /** * Callback for when a node is first visited. * * @param node * the node being visited. * @param depth * the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node * of that will have depth 1. * * @return Filter decision */ @NonNull HeadFilterDecision head(Node node, int depth); /** * Callback for when a node is last visited, after all of its descendants have been visited. * * @param node * the node being visited. * @param depth * the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node * of that will have depth 1. * * @return Filter decision */ @NonNull TailFilterDecision tail(Node node, int depth); }
k9mail/src/main/java/com/fsck/k9/message/html/HtmlProcessor.java +1 −1 Original line number Diff line number Diff line Loading @@ -30,7 +30,7 @@ public class HtmlProcessor { HtmlConverter.cssStylePre()); } static String toCompactString(Document document) { public static String toCompactString(Document document) { document.outputSettings() .prettyPrint(false) .indentAmount(0); Loading
k9mail/src/main/java/com/fsck/k9/message/signature/HtmlSignatureRemover.java +81 −71 Original line number Diff line number Diff line package com.fsck.k9.message.signature; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import timber.log.Timber; import android.support.annotation.NonNull; import com.fsck.k9.K9; import org.htmlcleaner.CleanerProperties; import org.htmlcleaner.HtmlCleaner; import org.htmlcleaner.SimpleHtmlSerializer; import org.htmlcleaner.TagNode; import com.fsck.k9.helper.jsoup.AdvancedNodeTraversor; import com.fsck.k9.helper.jsoup.NodeFilter; import com.fsck.k9.message.html.HtmlProcessor; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; import org.jsoup.parser.Tag; public class HtmlSignatureRemover { private static final Pattern DASH_SIGNATURE_HTML = Pattern.compile("(<br( /)?>|\r?\n)-- <br( /)?>", Pattern.CASE_INSENSITIVE); private static final Pattern BLOCKQUOTE_START = Pattern.compile("<blockquote", Pattern.CASE_INSENSITIVE); private static final Pattern BLOCKQUOTE_END = Pattern.compile("</blockquote>", Pattern.CASE_INSENSITIVE); public static String stripSignature(String content) { return new HtmlSignatureRemover().stripSignatureInternal(content); } private String stripSignatureInternal(String content) { Document document = Jsoup.parse(content); public static String stripSignature(String content) { Matcher dashSignatureHtml = DASH_SIGNATURE_HTML.matcher(content); if (dashSignatureHtml.find()) { Matcher blockquoteStart = BLOCKQUOTE_START.matcher(content); Matcher blockquoteEnd = BLOCKQUOTE_END.matcher(content); List<Integer> start = new ArrayList<>(); List<Integer> end = new ArrayList<>(); while (blockquoteStart.find()) { start.add(blockquoteStart.start()); AdvancedNodeTraversor nodeTraversor = new AdvancedNodeTraversor(new StripSignatureFilter()); nodeTraversor.filter(document.body()); return HtmlProcessor.toCompactString(document); } static class StripSignatureFilter implements NodeFilter { private static final Pattern DASH_SIGNATURE_HTML = Pattern.compile("\\s*-- \\s*", Pattern.CASE_INSENSITIVE); private static final Tag BLOCKQUOTE = Tag.valueOf("blockquote"); private static final Tag BR = Tag.valueOf("br"); private static final Tag P = Tag.valueOf("p"); private boolean signatureFound = false; private boolean lastElementCausedLineBreak = false; private Element brElementPrecedingDashes; @NonNull @Override public HeadFilterDecision head(Node node, int depth) { if (signatureFound) { return HeadFilterDecision.REMOVE; } while (blockquoteEnd.find()) { end.add(blockquoteEnd.start()); if (node instanceof Element) { lastElementCausedLineBreak = false; Element element = (Element) node; if (element.tag().equals(BLOCKQUOTE)) { return HeadFilterDecision.SKIP_ENTIRELY; } if (start.size() != end.size()) { Timber.d("There are %d <blockquote> tags, but %d </blockquote> tags. Refusing to strip.", start.size(), end.size()); } else if (start.size() > 0) { // Ignore quoted signatures in blockquotes. dashSignatureHtml.region(0, start.get(0)); if (dashSignatureHtml.find()) { // before first <blockquote>. content = content.substring(0, dashSignatureHtml.start()); } else { for (int i = 0; i < start.size() - 1; i++) { // within blockquotes. if (end.get(i) < start.get(i + 1)) { dashSignatureHtml.region(end.get(i), start.get(i + 1)); if (dashSignatureHtml.find()) { content = content.substring(0, dashSignatureHtml.start()); break; } else if (node instanceof TextNode) { TextNode textNode = (TextNode) node; if (lastElementCausedLineBreak && DASH_SIGNATURE_HTML.matcher(textNode.getWholeText()).matches()) { Node nextNode = node.nextSibling(); if (nextNode instanceof Element && ((Element) nextNode).tag().equals(BR)) { signatureFound = true; if (brElementPrecedingDashes != null) { brElementPrecedingDashes.remove(); brElementPrecedingDashes = null; } return HeadFilterDecision.REMOVE; } } if (end.get(end.size() - 1) < content.length()) { // after last </blockquote>. dashSignatureHtml.region(end.get(end.size() - 1), content.length()); if (dashSignatureHtml.find()) { content = content.substring(0, dashSignatureHtml.start()); } return HeadFilterDecision.CONTINUE; } @NonNull @Override public TailFilterDecision tail(Node node, int depth) { if (signatureFound) { return TailFilterDecision.CONTINUE; } } else { // No blockquotes found. content = content.substring(0, dashSignatureHtml.start()); if (node instanceof Element) { Element element = (Element) node; boolean elementIsBr = element.tag().equals(BR); if (elementIsBr || element.tag().equals(P)) { lastElementCausedLineBreak = true; brElementPrecedingDashes = elementIsBr ? element : null; return TailFilterDecision.CONTINUE; } } // Fix the stripping off of closing tags if a signature was stripped, // as well as clean up the HTML of the quoted message. HtmlCleaner cleaner = new HtmlCleaner(); CleanerProperties properties = cleaner.getProperties(); // see http://htmlcleaner.sourceforge.net/parameters.php for descriptions properties.setNamespacesAware(false); properties.setAdvancedXmlEscape(false); properties.setOmitXmlDeclaration(true); properties.setOmitDoctypeDeclaration(false); properties.setTranslateSpecialEntities(false); properties.setRecognizeUnicodeChars(false); TagNode node = cleaner.clean(content); SimpleHtmlSerializer htmlSerialized = new SimpleHtmlSerializer(properties); content = htmlSerialized.getAsString(node, "UTF8"); return content; lastElementCausedLineBreak = false; return TailFilterDecision.CONTINUE; } } }
k9mail/src/test/java/com/fsck/k9/message/signature/HtmlSignatureRemoverTest.java +3 −5 Original line number Diff line number Diff line Loading @@ -3,7 +3,6 @@ package com.fsck.k9.message.signature; import com.fsck.k9.K9RobolectricTestRunner; import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.robolectric.annotation.Config; Loading @@ -27,7 +26,6 @@ public class HtmlSignatureRemoverTest { assertEquals("This is the body text", extractText(withoutSignature)); } @Ignore @Test public void shouldStripSignatureFromThunderbirdStyleHtml() throws Exception { String html = "<html>\r\n" + Loading Loading @@ -88,8 +86,8 @@ public class HtmlSignatureRemoverTest { assertEquals("<html><head></head><body>" + "<blockquote>" + "This is some quoted text" + "<br />" + "-- <br />" + "<br>" + "-- <br>" + "Inner signature" + "</blockquote>" + "<div>This is the body text</div>" + Loading Loading @@ -141,7 +139,7 @@ public class HtmlSignatureRemoverTest { String withoutSignature = HtmlSignatureRemover.stripSignature(html); assertEquals("<html><head></head><body>" + "This is the body text<br />" + "This is the body text<br>" + "<blockquote>Some quote</blockquote>" + "</body></html>", withoutSignature); Loading