Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 268189c1 authored by cketti's avatar cketti
Browse files

Use jsoup in HtmlProcessor and HtmlSanitizer

parent febb7448
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@ dependencies {
    compile 'commons-io:commons-io:2.4'
    compile "com.android.support:support-v4:${androidSupportLibraryVersion}"
    compile 'net.sourceforge.htmlcleaner:htmlcleaner:2.18'
    compile 'org.jsoup:jsoup:1.10.2'
    compile 'de.cketti.library.changelog:ckchangelog:1.2.1'
    compile 'com.github.bumptech.glide:glide:3.6.1'
    compile 'com.splitwise:tokenautocomplete:2.0.7'
@@ -41,7 +42,6 @@ dependencies {
    testCompile "org.robolectric:robolectric:${robolectricVersion}"
    testCompile "junit:junit:${junitVersion}"
    testCompile "org.mockito:mockito-core:${mockitoVersion}"
    testCompile 'org.jsoup:jsoup:1.10.2'
}

android {
@@ -96,6 +96,7 @@ android {
        exclude 'META-INF/LICENSE.txt'
        exclude 'META-INF/NOTICE'
        exclude 'META-INF/NOTICE.txt'
        exclude 'META-INF/README'
        exclude 'LICENSE.txt'
    }

+100 −0
Original line number Diff line number Diff line
package com.fsck.k9.message.html;


import java.util.List;
import java.util.Locale;

import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.parser.Tag;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;

import static java.util.Arrays.asList;


class HeadCleaner {
    private static final List<String> ALLOWED_TAGS = asList("style", "meta");


    public void clean(Document dirtyDocument, Document cleanedDocument) {
        copySafeNodes(dirtyDocument.head(), cleanedDocument.head());
    }

    private void copySafeNodes(Element source, Element destination) {
        CleaningVisitor cleaningVisitor = new CleaningVisitor(source, destination);
        NodeTraversor traversor = new NodeTraversor(cleaningVisitor);
        traversor.traverse(source);
    }


    static class CleaningVisitor implements NodeVisitor {
        private final Element root;
        private Element destination;
        private boolean skipChildren = false;


        CleaningVisitor(Element root, Element destination) {
            this.root = root;
            this.destination = destination;
        }

        public void head(Node source, int depth) {
            if (skipChildren) {
                return;
            }

            if (source instanceof Element) {
                Element sourceElement = (Element) source;

                if (isSafeTag(sourceElement)) {
                    String sourceTag = sourceElement.tagName();
                    Attributes destinationAttributes = sourceElement.attributes().clone();
                    Element destinationChild = new Element(Tag.valueOf(sourceTag), sourceElement.baseUri(), destinationAttributes);

                    destination.appendChild(destinationChild);
                    destination = destinationChild;
                } else if (source != root) {
                    skipChildren = true;
                }
            } else if (source instanceof TextNode) {
                TextNode sourceText = (TextNode) source;
                TextNode destinationText = new TextNode(sourceText.getWholeText(), source.baseUri());
                destination.appendChild(destinationText);
            } else if (source instanceof DataNode && isSafeTag(source.parent())) {
                DataNode sourceData = (DataNode) source;
                DataNode destinationData = new DataNode(sourceData.getWholeData(), source.baseUri());
                destination.appendChild(destinationData);
            }
        }

        public void tail(Node source, int depth) {
            if (source == destination) {
                destination = destination.parent();
                skipChildren = false;
            }
        }

        private boolean isSafeTag(Node node) {
            if (isMetaRefresh(node)) {
                return false;
            }

            String tag = node.nodeName().toLowerCase(Locale.ROOT);
            return ALLOWED_TAGS.contains(tag);
        }

        private boolean isMetaRefresh(Node node) {
            if (!"meta".equalsIgnoreCase(node.nodeName())) {
                return false;
            }

            String attributeValue = node.attributes().getIgnoreCase("http-equiv");
            return "refresh".equalsIgnoreCase(attributeValue.trim());
        }
    }
}
+2 −2
Original line number Diff line number Diff line
@@ -1260,7 +1260,7 @@ public class HtmlConverter {
                "</body></html>";
    }

    private static String cssStyleTheme() {
    static String cssStyleTheme() {
        if (K9.getK9MessageViewTheme() == K9.Theme.DARK)  {
            return "<style type=\"text/css\">" +
                    "* { background: black ! important; color: #F3F3F3 !important }" +
@@ -1283,7 +1283,7 @@ public class HtmlConverter {
     *      A {@code <style>} element that can be dynamically included in the HTML
     *      {@code <head>} element when messages are displayed.
     */
    private static String cssStylePre() {
    static String cssStylePre() {
        final String font = K9.messageViewFixedWidthFont()
                ? "monospace"
                : "sans-serif";
+22 −3
Original line number Diff line number Diff line
package com.fsck.k9.message.html;


import org.jsoup.nodes.Document;


public class HtmlProcessor {
    private final HtmlSanitizer htmlSanitizer;


    public static HtmlProcessor newInstance() {
        HtmlSanitizer htmlSanitizer = HtmlSanitizer.getInstance();
        HtmlSanitizer htmlSanitizer = new HtmlSanitizer();
        return new HtmlProcessor(htmlSanitizer);
    }

@@ -15,7 +18,23 @@ public class HtmlProcessor {
    }

    public String processForDisplay(String html) {
        String wrappedHtml = HtmlConverter.wrapMessageContent(html);
        return htmlSanitizer.sanitize(wrappedHtml);
        Document document = htmlSanitizer.sanitize(html);
        addCustomHeadContents(document);

        return toCompactString(document);
    }

    private void addCustomHeadContents(Document document) {
        document.head().append("<meta name=\"viewport\" content=\"width=device-width\"/>" +
                HtmlConverter.cssStyleTheme() +
                HtmlConverter.cssStylePre());
    }

    static String toCompactString(Document document) {
        document.outputSettings()
                .prettyPrint(false)
                .indentAmount(0);

        return document.html();
    }
}
+21 −53
Original line number Diff line number Diff line
package com.fsck.k9.message.html;


import android.support.annotation.VisibleForTesting;

import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.HtmlSerializer;
import org.htmlcleaner.SimpleHtmlSerializer;
import org.htmlcleaner.TagNode;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.safety.Cleaner;
import org.jsoup.safety.Whitelist;


public class HtmlSanitizer {
    private static final HtmlCleaner HTML_CLEANER;
    private static final HtmlSerializer HTML_SERIALIZER;

    static {
        CleanerProperties properties = createCleanerProperties();
        HTML_CLEANER = new HtmlCleaner(properties);
        HTML_SERIALIZER = new SimpleHtmlSerializer(properties);
    }
    private final HeadCleaner headCleaner;
    private final Cleaner cleaner;

    HtmlSanitizer() {
        Whitelist whitelist = Whitelist.relaxed()
                .addTags("font")
                .addAttributes("table", "align", "bgcolor", "border", "cellpadding", "cellspacing", "width")
                .addAttributes(":all", "class", "style", "id")
                .addProtocols("img", "src", "http", "https", "cid", "data");

    public static HtmlSanitizer getInstance() {
        return new HtmlSanitizer();
        cleaner = new Cleaner(whitelist);
        headCleaner = new HeadCleaner();
    }

    @VisibleForTesting
    HtmlSanitizer() {}


    public String sanitize(String html) {
        TagNode rootNode = HTML_CLEANER.clean(html);

        removeMetaRefresh(rootNode);

        return HTML_SERIALIZER.getAsString(rootNode, "UTF8");
    }

    private static CleanerProperties createCleanerProperties() {
        CleanerProperties properties = new CleanerProperties();

        // See http://htmlcleaner.sourceforge.net/parameters.php for descriptions
        properties.setNamespacesAware(false);
        properties.setAdvancedXmlEscape(false);
        properties.setOmitXmlDeclaration(true);
        properties.setOmitDoctypeDeclaration(false);
        properties.setTranslateSpecialEntities(false);
        properties.setRecognizeUnicodeChars(false);
        properties.setIgnoreQuestAndExclam(false);

        return properties;
    }

    private void removeMetaRefresh(TagNode rootNode) {
        for (TagNode element : rootNode.getElementListByName("meta", true)) {
            String httpEquiv = element.getAttributeByName("http-equiv");
            if (httpEquiv != null && httpEquiv.trim().equalsIgnoreCase("refresh")) {
                element.removeFromTree();
            }
        }
    public Document sanitize(String html) {
        Document dirtyDocument = Jsoup.parse(html);
        Document cleanedDocument = cleaner.clean(dirtyDocument);
        headCleaner.clean(dirtyDocument, cleanedDocument);
        return cleanedDocument;
    }
}
Loading