Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 924a7e31 authored by Peter Birk Pakkenberg's avatar Peter Birk Pakkenberg
Browse files

Update guessFileName to use RFC 6266 starting in V

Bug: 319400769
Test: New host-side test added in companion CL on same topic
Change-Id: I1d068b62e29f7f90c6e54cef27755ab47b8971ef
parent 190bf251
Loading
Loading
Loading
Loading
+321 −11
Original line number Diff line number Diff line
@@ -16,20 +16,40 @@

package android.webkit;

import android.annotation.FlaggedApi;
import android.annotation.NonNull;
import android.annotation.Nullable;
import android.compat.Compatibility;
import android.compat.annotation.ChangeId;
import android.compat.annotation.EnabledSince;
import android.compat.annotation.UnsupportedAppUsage;
import android.net.ParseException;
import android.net.Uri;
import android.net.WebAddress;
import android.os.Build;
import android.util.Log;

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.Charset;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public final class URLUtil {

    /**
     * This feature enables parsing of Content-Disposition headers that conform to RFC 6266. In
     * particular, this enables parsing of {@code filename*} values which can use a different
     * character encoding.
     *
     * @hide
     */
    @ChangeId
    @EnabledSince(targetSdkVersion = Build.VERSION_CODES.VANILLA_ICE_CREAM)
    @FlaggedApi(android.os.Flags.FLAG_ANDROID_OS_BUILD_VANILLA_ICE_CREAM)
    public static final long PARSE_CONTENT_DISPOSITION_USING_RFC_6266 = 319400769L;

    private static final String LOGTAG = "webkit";
    private static final boolean TRACE = false;

@@ -293,21 +313,58 @@ public final class URLUtil {

    /**
     * Guesses canonical filename that a download would have, using the URL and contentDisposition.
     * File extension, if not defined, is added based on the mimetype
     *
     * <p>File extension, if not defined, is added based on the mimetype.
     *
     * <p>The {@code contentDisposition} argument will be treated differently depending on
     * targetSdkVersion.
     *
     * <ul>
     *   <li>For targetSDK versions &lt; {@code VANILLA_ICE_CREAM} it will be parsed based on RFC
     *       2616.
     *   <li>For targetSDK versions &gt;= {@code VANILLA_ICE_CREAM} it will be parsed based on RFC
     *       6266.
     * </ul>
     *
     * In practice, this means that from {@code VANILLA_ICE_CREAM}, this method will be able to
     * parse {@code filename*} directives in the {@code contentDisposition} string.
     *
     * <p>The function also changed in the following ways in {@code VANILLA_ICE_CREAM}:
     *
     * <ul>
     *   <li>If the suggested file type extension doesn't match the passed {@code mimeType}, the
     *       method will append the appropriate extension instead of replacing the current
     *       extension.
     *   <li>If the suggested file name contains a path separator ({@code "/"}), the method will
     *       replace this with the underscore character ({@code "_"}) instead of splitting the
     *       result and only using the last part.
     * </ul>
     *
     * @param url Url to the content
     * @param contentDisposition Content-Disposition HTTP header or {@code null}
     * @param mimeType Mime-type of the content or {@code null}
     * @return suggested filename
     */
    public static final String guessFileName(
    public static String guessFileName(
            String url, @Nullable String contentDisposition, @Nullable String mimeType) {
        if (android.os.Flags.androidOsBuildVanillaIceCream()) {
            if (Compatibility.isChangeEnabled(PARSE_CONTENT_DISPOSITION_USING_RFC_6266)) {
                return guessFileNameRfc6266(url, contentDisposition, mimeType);
            }
        }

        return guessFileNameRfc2616(url, contentDisposition, mimeType);
    }

    /** Legacy implementation of guessFileName, based on RFC 2616. */
    private static String guessFileNameRfc2616(
            String url, @Nullable String contentDisposition, @Nullable String mimeType) {
        String filename = null;
        String extension = null;

        // If we couldn't do anything with the hint, move toward the content disposition
        if (contentDisposition != null) {
            filename = parseContentDisposition(contentDisposition);
            filename = parseContentDispositionRfc2616(contentDisposition);
            if (filename != null) {
                int index = filename.lastIndexOf('/') + 1;
                if (index > 0) {
@@ -384,6 +441,128 @@ public final class URLUtil {
        return filename + extension;
    }

    /**
     * Guesses canonical filename that a download would have, using the URL and contentDisposition.
     * Uses RFC 6266 for parsing the contentDisposition header value.
     */
    @NonNull
    private static String guessFileNameRfc6266(
            @NonNull String url, @Nullable String contentDisposition, @Nullable String mimeType) {
        String filename = getFilenameSuggestion(url, contentDisposition);
        // Split filename between base and extension
        // Add an extension if filename does not have one
        String extensionFromMimeType = suggestExtensionFromMimeType(mimeType);

        if (filename.indexOf('.') < 0) {
            // Filename does not have an extension, use the suggested one.
            return filename + extensionFromMimeType;
        }

        // Filename already contains at least one dot.
        // Compare the last segment of the extension against the mime type.
        // If there's a mismatch, add the suggested extension instead.
        if (mimeType != null && extensionDifferentFromMimeType(filename, mimeType)) {
            return filename + extensionFromMimeType;
        }
        return filename;
    }

    /**
     * Get the suggested file name from the {@code contentDisposition} or {@code url}. Will ensure
     * that the filename contains no path separators by replacing them with the {@code "_"}
     * character.
     */
    @NonNull
    private static String getFilenameSuggestion(String url, @Nullable String contentDisposition) {
        // First attempt to parse the Content-Disposition header if available
        if (contentDisposition != null) {
            String filename = getFilenameFromContentDispositionRfc6266(contentDisposition);
            if (filename != null) {
                return replacePathSeparators(filename);
            }
        }

        // Try to generate a filename based on the URL.
        if (url != null) {
            Uri parsedUri = Uri.parse(url);
            String lastPathSegment = parsedUri.getLastPathSegment();
            if (lastPathSegment != null) {
                return replacePathSeparators(lastPathSegment);
            }
        }

        // Finally, if couldn't get filename from URI, get a generic filename.
        return "downloadfile";
    }

    /**
     * Replace all instances of {@code "/"} with {@code "_"} to avoid filenames that navigate the
     * path.
     */
    @NonNull
    private static String replacePathSeparators(@NonNull String raw) {
        return raw.replaceAll("/", "_");
    }

    /**
     * Check if the {@code filename} has an extension that is different from the expected one based
     * on the {@code mimeType}.
     */
    private static boolean extensionDifferentFromMimeType(
            @NonNull String filename, @NonNull String mimeType) {
        int lastDotIndex = filename.lastIndexOf('.');
        String typeFromExt =
                MimeTypeMap.getSingleton()
                        .getMimeTypeFromExtension(filename.substring(lastDotIndex + 1));
        return typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType);
    }

    /**
     * Get a candidate file extension (including the {@code .}) for the given mimeType. will return
     * {@code ".bin"} if {@code mimeType} is {@code null}
     *
     * @param mimeType Reported mimetype
     * @return A file extension, including the {@code .}
     */
    @NonNull
    private static String suggestExtensionFromMimeType(@Nullable String mimeType) {
        if (mimeType == null) {
            return ".bin";
        }
        String extensionFromMimeType =
                MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
        if (extensionFromMimeType != null) {
            return "." + extensionFromMimeType;
        }
        if (mimeType.equalsIgnoreCase("text/html")) {
            return ".html";
        } else if (mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) {
            return ".txt";
        } else {
            return ".bin";
        }
    }

    /**
     * Parse the Content-Disposition HTTP Header.
     *
     * <p>Behavior depends on targetSdkVersion.
     *
     * <ul>
     *   <li>For targetSDK versions &lt; {@code VANILLA_ICE_CREAM} it will parse based on RFC 2616.
     *   <li>For targetSDK versions &gt;= {@code VANILLA_ICE_CREAM} it will parse based on RFC 6266.
     * </ul>
     */
    @UnsupportedAppUsage
    static String parseContentDisposition(String contentDisposition) {
        if (android.os.Flags.androidOsBuildVanillaIceCream()) {
            if (Compatibility.isChangeEnabled(PARSE_CONTENT_DISPOSITION_USING_RFC_6266)) {
                return getFilenameFromContentDispositionRfc6266(contentDisposition);
            }
        }
        return parseContentDispositionRfc2616(contentDisposition);
    }

    /** Regex used to parse content-disposition headers */
    private static final Pattern CONTENT_DISPOSITION_PATTERN =
            Pattern.compile(
@@ -391,15 +570,14 @@ public final class URLUtil {
                    Pattern.CASE_INSENSITIVE);

    /**
     * Parse the Content-Disposition HTTP Header. The format of the header is defined here:
     * http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html This header provides a filename for
     * content that is going to be downloaded to the file system. We only support the attachment
     * type. Note that RFC 2616 specifies the filename value must be double-quoted. Unfortunately
     * some servers do not quote the value so to maintain consistent behaviour with other browsers,
     * we allow unquoted values too.
     * Parse the Content-Disposition HTTP Header. The format of the header is defined here: <a
     * href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html">rfc2616 Section 19</a>. This
     * header provides a filename for content that is going to be downloaded to the file system. We
     * only support the attachment type. Note that RFC 2616 specifies the filename value must be
     * double-quoted. Unfortunately some servers do not quote the value so to maintain consistent
     * behaviour with other browsers, we allow unquoted values too.
     */
    @UnsupportedAppUsage
    static String parseContentDisposition(String contentDisposition) {
    private static String parseContentDispositionRfc2616(String contentDisposition) {
        try {
            Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
            if (m.find()) {
@@ -410,4 +588,136 @@ public final class URLUtil {
        }
        return null;
    }

    /**
     * Pattern for parsing individual content disposition key-value pairs.
     *
     * <p>The pattern will attempt to parse the value as either single-, double-, or unquoted. For
     * the single- and double-quoted options, the pattern allows escaped quotes as part of the
     * value, as per <a href="https://datatracker.ietf.org/doc/html/rfc2616#section-2.2">rfc2616
     * section-2.2</a>
     */
    @SuppressWarnings("RegExpRepeatedSpace") // Spaces are only for readability.
    private static final Pattern DISPOSITION_PATTERN =
            Pattern.compile(
                    """
                            \\s*(\\S+?) # Group 1: parameter name
                            \\s*=\\s* # Match equals sign
                            (?: # non-capturing group of options
                               '( (?: [^'\\\\] | \\\\. )* )' # Group 2: single-quoted
                             | "( (?: [^"\\\\] | \\\\. )*  )" # Group 3: double-quoted
                             | ( [^'"][^;\\s]* ) # Group 4: un-quoted parameter
                            )\\s*;? # Optional end semicolon""",
                    Pattern.COMMENTS);

    /**
     * Extract filename from a {@code Content-Disposition} header value.
     *
     * <p>This method implements the parsing defined in <a
     * href="https://datatracker.ietf.org/doc/html/rfc6266">RFC 6266</a>, supporting both the {@code
     * filename} and {@code filename*} disposition parameters. If the passed header value has the
     * {@code "inline"} disposition type, this method will return {@code null} to indicate that a
     * download was not intended.
     *
     * <p>If both {@code filename*} and {@code filename} is present, the former will be returned, as
     * per the RFC. Invalid encoded values will be ignored.
     *
     * @param contentDisposition Value of {@code Content-Disposition} header.
     * @return The filename suggested by the header or {@code null} if no filename could be parsed
     *     from the header value.
     */
    @Nullable
    private static String getFilenameFromContentDispositionRfc6266(
            @NonNull String contentDisposition) {
        String[] parts = contentDisposition.trim().split(";", 2);
        if (parts.length < 2) {
            // Need at least 2 parts, the `disposition-type` and at least one `disposition-parm`.
            return null;
        }
        String dispositionType = parts[0].trim();
        if ("inline".equalsIgnoreCase(dispositionType)) {
            // "inline" should not result in a download.
            // Unknown disposition types should be handles as "attachment"
            // https://datatracker.ietf.org/doc/html/rfc6266#section-4.2
            return null;
        }
        String dispositionParameters = parts[1];
        Matcher matcher = DISPOSITION_PATTERN.matcher(dispositionParameters);
        String filename = null;
        String filenameExt = null;
        while (matcher.find()) {
            String parameter = matcher.group(1);
            String value;
            if (matcher.group(2) != null) {
                value = removeSlashEscapes(matcher.group(2)); // Value was single-quoted
            } else if (matcher.group(3) != null) {
                value = removeSlashEscapes(matcher.group(3)); // Value was double-quoted
            } else {
                value = matcher.group(4); // Value was un-quoted
            }

            if (parameter == null || value == null) {
                continue;
            }

            if ("filename*".equalsIgnoreCase(parameter)) {
                filenameExt = parseExtValueString(value);
            } else if ("filename".equalsIgnoreCase(parameter)) {
                filename = value;
            }
        }

        // RFC 6266 dictates the filenameExt should be preferred if present.
        if (filenameExt != null) {
            return filenameExt;
        }
        return filename;
    }

    /** Replace escapes of the \X form with X. */
    private static String removeSlashEscapes(String raw) {
        if (raw == null) {
            return null;
        }
        return raw.replaceAll("\\\\(.)", "$1");
    }

    /**
     * Parse an extended value string which can be percent-encoded. Return {@code} null if unable to
     * parse the string.
     */
    private static String parseExtValueString(String raw) {
        String[] parts = raw.split("'", 3);
        if (parts.length < 3) {
            return null;
        }

        String encoding = parts[0];
        // Intentionally ignore parts[1] (language).
        String valueChars = parts[2];

        try {
            // The URLDecoder force-decodes + as " "
            // so preemptively replace all values with the encoded value to preserve them.
            Charset charset = Charset.forName(encoding);
            String valueWithEncodedPlus = encodePlusCharacters(valueChars, charset);
            return URLDecoder.decode(valueWithEncodedPlus, charset);
        } catch (RuntimeException ignored) {
            return null; // Ignoring an un-parsable value is within spec.
        }
    }

    /**
     * Replace all instances of {@code "+"} with the percent-encoded equivalent for the given {@code
     * charset}.
     */
    @NonNull
    private static String encodePlusCharacters(@NonNull String valueChars, Charset charset) {
        StringBuilder sb = new StringBuilder();
        for (byte b : charset.encode("+").array()) {
            // Formatting a byte is not possible with TextUtils.formatSimple
            sb.append(String.format("%02x", b));
        }
        return valueChars.replaceAll("\\+", sb.toString());
    }
}