Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1ef9432f authored by Peter Birk Pakkenberg's avatar Peter Birk Pakkenberg Committed by Android (Google) Code Review
Browse files

Merge "Update guessFileName to use RFC 6266 starting in V" into main

parents d73646d4 924a7e31
Loading
Loading
Loading
Loading
+321 −11
Original line number Diff line number Diff line
@@ -16,20 +16,40 @@

package android.webkit;

import android.annotation.FlaggedApi;
import android.annotation.NonNull;
import android.annotation.Nullable;
import android.compat.Compatibility;
import android.compat.annotation.ChangeId;
import android.compat.annotation.EnabledSince;
import android.compat.annotation.UnsupportedAppUsage;
import android.net.ParseException;
import android.net.Uri;
import android.net.WebAddress;
import android.os.Build;
import android.util.Log;

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.Charset;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public final class URLUtil {

    /**
     * This feature enables parsing of Content-Disposition headers that conform to RFC 6266. In
     * particular, this enables parsing of {@code filename*} values which can use a different
     * character encoding.
     *
     * @hide
     */
    @ChangeId
    @EnabledSince(targetSdkVersion = Build.VERSION_CODES.VANILLA_ICE_CREAM)
    @FlaggedApi(android.os.Flags.FLAG_ANDROID_OS_BUILD_VANILLA_ICE_CREAM)
    public static final long PARSE_CONTENT_DISPOSITION_USING_RFC_6266 = 319400769L;

    private static final String LOGTAG = "webkit";
    private static final boolean TRACE = false;

@@ -293,21 +313,58 @@ public final class URLUtil {

    /**
     * Guesses canonical filename that a download would have, using the URL and contentDisposition.
     * File extension, if not defined, is added based on the mimetype
     *
     * <p>File extension, if not defined, is added based on the mimetype.
     *
     * <p>The {@code contentDisposition} argument will be treated differently depending on
     * targetSdkVersion.
     *
     * <ul>
     *   <li>For targetSDK versions &lt; {@code VANILLA_ICE_CREAM} it will be parsed based on RFC
     *       2616.
     *   <li>For targetSDK versions &gt;= {@code VANILLA_ICE_CREAM} it will be parsed based on RFC
     *       6266.
     * </ul>
     *
     * In practice, this means that from {@code VANILLA_ICE_CREAM}, this method will be able to
     * parse {@code filename*} directives in the {@code contentDisposition} string.
     *
     * <p>The function also changed in the following ways in {@code VANILLA_ICE_CREAM}:
     *
     * <ul>
     *   <li>If the suggested file type extension doesn't match the passed {@code mimeType}, the
     *       method will append the appropriate extension instead of replacing the current
     *       extension.
     *   <li>If the suggested file name contains a path separator ({@code "/"}), the method will
     *       replace this with the underscore character ({@code "_"}) instead of splitting the
     *       result and only using the last part.
     * </ul>
     *
     * @param url Url to the content
     * @param contentDisposition Content-Disposition HTTP header or {@code null}
     * @param mimeType Mime-type of the content or {@code null}
     * @return suggested filename
     */
    public static final String guessFileName(
    public static String guessFileName(
            String url, @Nullable String contentDisposition, @Nullable String mimeType) {
        if (android.os.Flags.androidOsBuildVanillaIceCream()) {
            if (Compatibility.isChangeEnabled(PARSE_CONTENT_DISPOSITION_USING_RFC_6266)) {
                return guessFileNameRfc6266(url, contentDisposition, mimeType);
            }
        }

        return guessFileNameRfc2616(url, contentDisposition, mimeType);
    }

    /** Legacy implementation of guessFileName, based on RFC 2616. */
    private static String guessFileNameRfc2616(
            String url, @Nullable String contentDisposition, @Nullable String mimeType) {
        String filename = null;
        String extension = null;

        // If we couldn't do anything with the hint, move toward the content disposition
        if (contentDisposition != null) {
            filename = parseContentDisposition(contentDisposition);
            filename = parseContentDispositionRfc2616(contentDisposition);
            if (filename != null) {
                int index = filename.lastIndexOf('/') + 1;
                if (index > 0) {
@@ -384,6 +441,128 @@ public final class URLUtil {
        return filename + extension;
    }

    /**
     * Guesses canonical filename that a download would have, using the URL and contentDisposition.
     * Uses RFC 6266 for parsing the contentDisposition header value.
     */
    @NonNull
    private static String guessFileNameRfc6266(
            @NonNull String url, @Nullable String contentDisposition, @Nullable String mimeType) {
        String filename = getFilenameSuggestion(url, contentDisposition);
        // Split filename between base and extension
        // Add an extension if filename does not have one
        String extensionFromMimeType = suggestExtensionFromMimeType(mimeType);

        if (filename.indexOf('.') < 0) {
            // Filename does not have an extension, use the suggested one.
            return filename + extensionFromMimeType;
        }

        // Filename already contains at least one dot.
        // Compare the last segment of the extension against the mime type.
        // If there's a mismatch, add the suggested extension instead.
        if (mimeType != null && extensionDifferentFromMimeType(filename, mimeType)) {
            return filename + extensionFromMimeType;
        }
        return filename;
    }

    /**
     * Get the suggested file name from the {@code contentDisposition} or {@code url}. Will ensure
     * that the filename contains no path separators by replacing them with the {@code "_"}
     * character.
     */
    @NonNull
    private static String getFilenameSuggestion(String url, @Nullable String contentDisposition) {
        // First attempt to parse the Content-Disposition header if available
        if (contentDisposition != null) {
            String filename = getFilenameFromContentDispositionRfc6266(contentDisposition);
            if (filename != null) {
                return replacePathSeparators(filename);
            }
        }

        // Try to generate a filename based on the URL.
        if (url != null) {
            Uri parsedUri = Uri.parse(url);
            String lastPathSegment = parsedUri.getLastPathSegment();
            if (lastPathSegment != null) {
                return replacePathSeparators(lastPathSegment);
            }
        }

        // Finally, if couldn't get filename from URI, get a generic filename.
        return "downloadfile";
    }

    /**
     * Replace all instances of {@code "/"} with {@code "_"} to avoid filenames that navigate the
     * path.
     */
    @NonNull
    private static String replacePathSeparators(@NonNull String raw) {
        return raw.replaceAll("/", "_");
    }

    /**
     * Check if the {@code filename} has an extension that is different from the expected one based
     * on the {@code mimeType}.
     */
    private static boolean extensionDifferentFromMimeType(
            @NonNull String filename, @NonNull String mimeType) {
        int lastDotIndex = filename.lastIndexOf('.');
        String typeFromExt =
                MimeTypeMap.getSingleton()
                        .getMimeTypeFromExtension(filename.substring(lastDotIndex + 1));
        return typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType);
    }

    /**
     * Get a candidate file extension (including the {@code .}) for the given mimeType. will return
     * {@code ".bin"} if {@code mimeType} is {@code null}
     *
     * @param mimeType Reported mimetype
     * @return A file extension, including the {@code .}
     */
    @NonNull
    private static String suggestExtensionFromMimeType(@Nullable String mimeType) {
        if (mimeType == null) {
            return ".bin";
        }
        String extensionFromMimeType =
                MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
        if (extensionFromMimeType != null) {
            return "." + extensionFromMimeType;
        }
        if (mimeType.equalsIgnoreCase("text/html")) {
            return ".html";
        } else if (mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) {
            return ".txt";
        } else {
            return ".bin";
        }
    }

    /**
     * Parse the Content-Disposition HTTP Header.
     *
     * <p>Behavior depends on targetSdkVersion.
     *
     * <ul>
     *   <li>For targetSDK versions &lt; {@code VANILLA_ICE_CREAM} it will parse based on RFC 2616.
     *   <li>For targetSDK versions &gt;= {@code VANILLA_ICE_CREAM} it will parse based on RFC 6266.
     * </ul>
     */
    @UnsupportedAppUsage
    static String parseContentDisposition(String contentDisposition) {
        if (android.os.Flags.androidOsBuildVanillaIceCream()) {
            if (Compatibility.isChangeEnabled(PARSE_CONTENT_DISPOSITION_USING_RFC_6266)) {
                return getFilenameFromContentDispositionRfc6266(contentDisposition);
            }
        }
        return parseContentDispositionRfc2616(contentDisposition);
    }

    /** Regex used to parse content-disposition headers */
    private static final Pattern CONTENT_DISPOSITION_PATTERN =
            Pattern.compile(
@@ -391,15 +570,14 @@ public final class URLUtil {
                    Pattern.CASE_INSENSITIVE);

    /**
     * Parse the Content-Disposition HTTP Header. The format of the header is defined here:
     * http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html This header provides a filename for
     * content that is going to be downloaded to the file system. We only support the attachment
     * type. Note that RFC 2616 specifies the filename value must be double-quoted. Unfortunately
     * some servers do not quote the value so to maintain consistent behaviour with other browsers,
     * we allow unquoted values too.
     * Parse the Content-Disposition HTTP Header. The format of the header is defined here: <a
     * href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html">rfc2616 Section 19</a>. This
     * header provides a filename for content that is going to be downloaded to the file system. We
     * only support the attachment type. Note that RFC 2616 specifies the filename value must be
     * double-quoted. Unfortunately some servers do not quote the value so to maintain consistent
     * behaviour with other browsers, we allow unquoted values too.
     */
    @UnsupportedAppUsage
    static String parseContentDisposition(String contentDisposition) {
    private static String parseContentDispositionRfc2616(String contentDisposition) {
        try {
            Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
            if (m.find()) {
@@ -410,4 +588,136 @@ public final class URLUtil {
        }
        return null;
    }

    /**
     * Pattern for parsing individual content disposition key-value pairs.
     *
     * <p>The pattern will attempt to parse the value as either single-, double-, or unquoted. For
     * the single- and double-quoted options, the pattern allows escaped quotes as part of the
     * value, as per <a href="https://datatracker.ietf.org/doc/html/rfc2616#section-2.2">rfc2616
     * section-2.2</a>
     */
    @SuppressWarnings("RegExpRepeatedSpace") // Spaces are only for readability.
    private static final Pattern DISPOSITION_PATTERN =
            Pattern.compile(
                    """
                            \\s*(\\S+?) # Group 1: parameter name
                            \\s*=\\s* # Match equals sign
                            (?: # non-capturing group of options
                               '( (?: [^'\\\\] | \\\\. )* )' # Group 2: single-quoted
                             | "( (?: [^"\\\\] | \\\\. )*  )" # Group 3: double-quoted
                             | ( [^'"][^;\\s]* ) # Group 4: un-quoted parameter
                            )\\s*;? # Optional end semicolon""",
                    Pattern.COMMENTS);

    /**
     * Extract filename from a {@code Content-Disposition} header value.
     *
     * <p>This method implements the parsing defined in <a
     * href="https://datatracker.ietf.org/doc/html/rfc6266">RFC 6266</a>, supporting both the {@code
     * filename} and {@code filename*} disposition parameters. If the passed header value has the
     * {@code "inline"} disposition type, this method will return {@code null} to indicate that a
     * download was not intended.
     *
     * <p>If both {@code filename*} and {@code filename} is present, the former will be returned, as
     * per the RFC. Invalid encoded values will be ignored.
     *
     * @param contentDisposition Value of {@code Content-Disposition} header.
     * @return The filename suggested by the header or {@code null} if no filename could be parsed
     *     from the header value.
     */
    @Nullable
    private static String getFilenameFromContentDispositionRfc6266(
            @NonNull String contentDisposition) {
        String[] parts = contentDisposition.trim().split(";", 2);
        if (parts.length < 2) {
            // Need at least 2 parts, the `disposition-type` and at least one `disposition-parm`.
            return null;
        }
        String dispositionType = parts[0].trim();
        if ("inline".equalsIgnoreCase(dispositionType)) {
            // "inline" should not result in a download.
            // Unknown disposition types should be handles as "attachment"
            // https://datatracker.ietf.org/doc/html/rfc6266#section-4.2
            return null;
        }
        String dispositionParameters = parts[1];
        Matcher matcher = DISPOSITION_PATTERN.matcher(dispositionParameters);
        String filename = null;
        String filenameExt = null;
        while (matcher.find()) {
            String parameter = matcher.group(1);
            String value;
            if (matcher.group(2) != null) {
                value = removeSlashEscapes(matcher.group(2)); // Value was single-quoted
            } else if (matcher.group(3) != null) {
                value = removeSlashEscapes(matcher.group(3)); // Value was double-quoted
            } else {
                value = matcher.group(4); // Value was un-quoted
            }

            if (parameter == null || value == null) {
                continue;
            }

            if ("filename*".equalsIgnoreCase(parameter)) {
                filenameExt = parseExtValueString(value);
            } else if ("filename".equalsIgnoreCase(parameter)) {
                filename = value;
            }
        }

        // RFC 6266 dictates the filenameExt should be preferred if present.
        if (filenameExt != null) {
            return filenameExt;
        }
        return filename;
    }

    /** Replace escapes of the \X form with X. */
    private static String removeSlashEscapes(String raw) {
        if (raw == null) {
            return null;
        }
        return raw.replaceAll("\\\\(.)", "$1");
    }

    /**
     * Parse an extended value string which can be percent-encoded. Return {@code} null if unable to
     * parse the string.
     */
    private static String parseExtValueString(String raw) {
        String[] parts = raw.split("'", 3);
        if (parts.length < 3) {
            return null;
        }

        String encoding = parts[0];
        // Intentionally ignore parts[1] (language).
        String valueChars = parts[2];

        try {
            // The URLDecoder force-decodes + as " "
            // so preemptively replace all values with the encoded value to preserve them.
            Charset charset = Charset.forName(encoding);
            String valueWithEncodedPlus = encodePlusCharacters(valueChars, charset);
            return URLDecoder.decode(valueWithEncodedPlus, charset);
        } catch (RuntimeException ignored) {
            return null; // Ignoring an un-parsable value is within spec.
        }
    }

    /**
     * Replace all instances of {@code "+"} with the percent-encoded equivalent for the given {@code
     * charset}.
     */
    @NonNull
    private static String encodePlusCharacters(@NonNull String valueChars, Charset charset) {
        StringBuilder sb = new StringBuilder();
        for (byte b : charset.encode("+").array()) {
            // Formatting a byte is not possible with TextUtils.formatSimple
            sb.append(String.format("%02x", b));
        }
        return valueChars.replaceAll("\\+", sb.toString());
    }
}