Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 63abf057 authored by cketti's avatar cketti
Browse files

Sanitize HTML to remove meta refresh

Using
  <meta http-equiv="Refresh" content="1; URL=http://example.com/">
in a HTML message causes WebView to load the URL in the default browser.
Overriding WebViewClient.shouldOverrideUrlLoading() allows us to cancel
loading this URL. Sadly, I found no way to find out whether the method was
called because of a meta refresh or because the user clicked on a link.

So now we're using HtmlCleaner to parse the HTML and remove all "meta" elements
containing an "http-equiv" attribute with a value of "refresh".
parent c7229e47
Loading
Loading
Loading
Loading
+54 −0
Original line number Diff line number Diff line
package com.fsck.k9.helper;


import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.HtmlSerializer;
import org.htmlcleaner.SimpleHtmlSerializer;
import org.htmlcleaner.TagNode;


public class HtmlSanitizer {
    private static final HtmlCleaner HTML_CLEANER;
    private static final HtmlSerializer HTML_SERIALIZER;

    static {
        CleanerProperties properties = createCleanerProperties();
        HTML_CLEANER = new HtmlCleaner(properties);
        HTML_SERIALIZER = new SimpleHtmlSerializer(properties);
    }


    private HtmlSanitizer() {}

    public static String sanitize(String html) {
        TagNode rootNode = HTML_CLEANER.clean(html);

        removeMetaRefresh(rootNode);

        return HTML_SERIALIZER.getAsString(rootNode, "UTF8");
    }

    private static CleanerProperties createCleanerProperties() {
        CleanerProperties properties = new CleanerProperties();

        // See http://htmlcleaner.sourceforge.net/parameters.php for descriptions
        properties.setNamespacesAware(false);
        properties.setAdvancedXmlEscape(false);
        properties.setOmitXmlDeclaration(true);
        properties.setOmitDoctypeDeclaration(false);
        properties.setTranslateSpecialEntities(false);
        properties.setRecognizeUnicodeChars(false);

        return properties;
    }

    private static void removeMetaRefresh(TagNode rootNode) {
        for (TagNode element : rootNode.getElementListByName("meta", true)) {
            String httpEquiv = element.getAttributeByName("http-equiv");
            if (httpEquiv != null && httpEquiv.trim().equalsIgnoreCase("refresh")) {
                element.removeFromTree();
            }
        }
    }
}
+5 −1
Original line number Diff line number Diff line
@@ -11,6 +11,8 @@ import android.widget.Toast;
import com.fsck.k9.K9;
import com.fsck.k9.R;
import com.fsck.k9.helper.HtmlConverter;
import com.fsck.k9.helper.HtmlSanitizer;


public class MessageWebView extends RigidWebView {

@@ -123,7 +125,9 @@ public class MessageWebView extends RigidWebView {
        }
        content += HtmlConverter.cssStylePre();
        content += "</head><body>" + text + "</body></html>";
        loadDataWithBaseURL("http://", content, "text/html", "utf-8", null);

        String sanitizedContent = HtmlSanitizer.sanitize(content);
        loadDataWithBaseURL("http://", sanitizedContent, "text/html", "utf-8", null);
        resumeTimers();
    }

+94 −0
Original line number Diff line number Diff line
package com.fsck.k9.helper;


import org.junit.Test;

import static org.junit.Assert.assertEquals;


public class HtmlSanitizerTest {
    @Test
    public void shouldRemoveMetaRefreshInHead() {
        String html = "<html>" +
                "<head><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\"></head>" +
                "<body>Message</body>" +
                "</html>";
        assertEquals("<html><head></head><body>Message</body></html>", HtmlSanitizer.sanitize(html));
    }

    @Test
    public void shouldRemoveMetaRefreshBetweenHeadAndBody() {
        String html = "<html>" +
                "<head></head><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\">" +
                "<body>Message</body>" +
                "</html>";
        assertEquals("<html><head></head><body>Message</body></html>", HtmlSanitizer.sanitize(html));
    }

    @Test
    public void shouldRemoveMetaRefreshInBody() {
        String html = "<html>" +
                "<head></head>" +
                "<body><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\">Message</body>" +
                "</html>";
        assertEquals("<html><head></head><body>Message</body></html>", HtmlSanitizer.sanitize(html));
    }

    @Test
    public void shouldRemoveMetaRefreshWithUpperCaseAttributeValue() {
        String html = "<html>" +
                "<head><meta http-equiv=\"REFRESH\" content=\"1; URL=http://example.com/\"></head>" +
                "<body>Message</body>" +
                "</html>";
        assertEquals("<html><head></head><body>Message</body></html>", HtmlSanitizer.sanitize(html));
    }

    @Test
    public void shouldRemoveMetaRefreshWithMixedCaseAttributeValue() {
        String html = "<html>" +
                "<head><meta http-equiv=\"Refresh\" content=\"1; URL=http://example.com/\"></head>" +
                "<body>Message</body>" +
                "</html>";
        assertEquals("<html><head></head><body>Message</body></html>", HtmlSanitizer.sanitize(html));
    }

    @Test
    public void shouldRemoveMetaRefreshWithoutQuotesAroundAttributeValue() {
        String html = "<html>" +
                "<head><meta http-equiv=refresh content=\"1; URL=http://example.com/\"></head>" +
                "<body>Message</body>" +
                "</html>";
        assertEquals("<html><head></head><body>Message</body></html>", HtmlSanitizer.sanitize(html));
    }

    @Test
    public void shouldRemoveMetaRefreshWithSpacesInAttributeValue() {
        String html = "<html>" +
                "<head><meta http-equiv=\"refresh \" content=\"1; URL=http://example.com/\"></head>" +
                "<body>Message</body>" +
                "</html>";
        assertEquals("<html><head></head><body>Message</body></html>", HtmlSanitizer.sanitize(html));
    }

    @Test
    public void shouldRemoveMultipleMetaRefreshTags() {
        String html = "<html>" +
                "<head><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\"></head>" +
                "<body><meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\">Message</body>" +
                "</html>";
        assertEquals("<html><head></head><body>Message</body></html>", HtmlSanitizer.sanitize(html));
    }

    @Test
    public void shouldRemoveMetaRefreshButKeepOtherMetaTags() {
        String html = "<html>" +
                "<head>" +
                "<meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\">" +
                "<meta http-equiv=\"refresh\" content=\"1; URL=http://example.com/\">" +
                "</head>" +
                "<body>Message</body>" +
                "</html>";
        assertEquals("<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" /></head>" +
                "<body>Message</body></html>", HtmlSanitizer.sanitize(html));
    }
}