Merge "Update guessFileName to use RFC 6266 starting in V" into main (1ef9432f) · Commits · e / os / android_frameworks_base

core/java/android/webkit/URLUtil.java

+321 −11

Original line number	Diff line number	Diff line
		@@ -16,20 +16,40 @@

		package android.webkit;

		import android.annotation.FlaggedApi;
		import android.annotation.NonNull;
		import android.annotation.Nullable;
		import android.compat.Compatibility;
		import android.compat.annotation.ChangeId;
		import android.compat.annotation.EnabledSince;
		import android.compat.annotation.UnsupportedAppUsage;
		import android.net.ParseException;
		import android.net.Uri;
		import android.net.WebAddress;
		import android.os.Build;
		import android.util.Log;

		import java.io.UnsupportedEncodingException;
		import java.net.URLDecoder;
		import java.nio.charset.Charset;
		import java.util.Locale;
		import java.util.regex.Matcher;
		import java.util.regex.Pattern;

		public final class URLUtil {

		/**
		* This feature enables parsing of Content-Disposition headers that conform to RFC 6266. In
		* particular, this enables parsing of {@code filename*} values which can use a different
		* character encoding.
		*
		* @hide
		*/
		@ChangeId
		@EnabledSince(targetSdkVersion = Build.VERSION_CODES.VANILLA_ICE_CREAM)
		@FlaggedApi(android.os.Flags.FLAG_ANDROID_OS_BUILD_VANILLA_ICE_CREAM)
		public static final long PARSE_CONTENT_DISPOSITION_USING_RFC_6266 = 319400769L;

		private static final String LOGTAG = "webkit";
		private static final boolean TRACE = false;

		@@ -293,21 +313,58 @@ public final class URLUtil {

		/**
		* Guesses canonical filename that a download would have, using the URL and contentDisposition.
		* File extension, if not defined, is added based on the mimetype
		*
		* <p>File extension, if not defined, is added based on the mimetype.
		*
		* <p>The {@code contentDisposition} argument will be treated differently depending on
		* targetSdkVersion.
		*
		* <ul>
		* <li>For targetSDK versions < {@code VANILLA_ICE_CREAM} it will be parsed based on RFC
		* 2616.
		* <li>For targetSDK versions >= {@code VANILLA_ICE_CREAM} it will be parsed based on RFC
		* 6266.
		* </ul>
		*
		* In practice, this means that from {@code VANILLA_ICE_CREAM}, this method will be able to
		* parse {@code filename*} directives in the {@code contentDisposition} string.
		*
		* <p>The function also changed in the following ways in {@code VANILLA_ICE_CREAM}:
		*
		* <ul>
		* <li>If the suggested file type extension doesn't match the passed {@code mimeType}, the
		* method will append the appropriate extension instead of replacing the current
		* extension.
		* <li>If the suggested file name contains a path separator ({@code "/"}), the method will
		* replace this with the underscore character ({@code "_"}) instead of splitting the
		* result and only using the last part.
		* </ul>
		*
		* @param url Url to the content
		* @param contentDisposition Content-Disposition HTTP header or {@code null}
		* @param mimeType Mime-type of the content or {@code null}
		* @return suggested filename
		*/
		public static final String guessFileName(
		public static String guessFileName(
		String url, @Nullable String contentDisposition, @Nullable String mimeType) {
		if (android.os.Flags.androidOsBuildVanillaIceCream()) {
		if (Compatibility.isChangeEnabled(PARSE_CONTENT_DISPOSITION_USING_RFC_6266)) {
		return guessFileNameRfc6266(url, contentDisposition, mimeType);
		}
		}

		return guessFileNameRfc2616(url, contentDisposition, mimeType);
		}

		/** Legacy implementation of guessFileName, based on RFC 2616. */
		private static String guessFileNameRfc2616(
		String url, @Nullable String contentDisposition, @Nullable String mimeType) {
		String filename = null;
		String extension = null;

		// If we couldn't do anything with the hint, move toward the content disposition
		if (contentDisposition != null) {
		filename = parseContentDisposition(contentDisposition);
		filename = parseContentDispositionRfc2616(contentDisposition);
		if (filename != null) {
		int index = filename.lastIndexOf('/') + 1;
		if (index > 0) {
		@@ -384,6 +441,128 @@ public final class URLUtil {
		return filename + extension;
		}

		/**
		* Guesses canonical filename that a download would have, using the URL and contentDisposition.
		* Uses RFC 6266 for parsing the contentDisposition header value.
		*/
		@NonNull
		private static String guessFileNameRfc6266(
		@NonNull String url, @Nullable String contentDisposition, @Nullable String mimeType) {
		String filename = getFilenameSuggestion(url, contentDisposition);
		// Split filename between base and extension
		// Add an extension if filename does not have one
		String extensionFromMimeType = suggestExtensionFromMimeType(mimeType);

		if (filename.indexOf('.') < 0) {
		// Filename does not have an extension, use the suggested one.
		return filename + extensionFromMimeType;
		}

		// Filename already contains at least one dot.
		// Compare the last segment of the extension against the mime type.
		// If there's a mismatch, add the suggested extension instead.
		if (mimeType != null && extensionDifferentFromMimeType(filename, mimeType)) {
		return filename + extensionFromMimeType;
		}
		return filename;
		}

		/**
		* Get the suggested file name from the {@code contentDisposition} or {@code url}. Will ensure
		* that the filename contains no path separators by replacing them with the {@code "_"}
		* character.
		*/
		@NonNull
		private static String getFilenameSuggestion(String url, @Nullable String contentDisposition) {
		// First attempt to parse the Content-Disposition header if available
		if (contentDisposition != null) {
		String filename = getFilenameFromContentDispositionRfc6266(contentDisposition);
		if (filename != null) {
		return replacePathSeparators(filename);
		}
		}

		// Try to generate a filename based on the URL.
		if (url != null) {
		Uri parsedUri = Uri.parse(url);
		String lastPathSegment = parsedUri.getLastPathSegment();
		if (lastPathSegment != null) {
		return replacePathSeparators(lastPathSegment);
		}
		}

		// Finally, if couldn't get filename from URI, get a generic filename.
		return "downloadfile";
		}

		/**
		* Replace all instances of {@code "/"} with {@code "_"} to avoid filenames that navigate the
		* path.
		*/
		@NonNull
		private static String replacePathSeparators(@NonNull String raw) {
		return raw.replaceAll("/", "_");
		}

		/**
		* Check if the {@code filename} has an extension that is different from the expected one based
		* on the {@code mimeType}.
		*/
		private static boolean extensionDifferentFromMimeType(
		@NonNull String filename, @NonNull String mimeType) {
		int lastDotIndex = filename.lastIndexOf('.');
		String typeFromExt =
		MimeTypeMap.getSingleton()
		.getMimeTypeFromExtension(filename.substring(lastDotIndex + 1));
		return typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType);
		}

		/**
		* Get a candidate file extension (including the {@code .}) for the given mimeType. will return
		* {@code ".bin"} if {@code mimeType} is {@code null}
		*
		* @param mimeType Reported mimetype
		* @return A file extension, including the {@code .}
		*/
		@NonNull
		private static String suggestExtensionFromMimeType(@Nullable String mimeType) {
		if (mimeType == null) {
		return ".bin";
		}
		String extensionFromMimeType =
		MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
		if (extensionFromMimeType != null) {
		return "." + extensionFromMimeType;
		}
		if (mimeType.equalsIgnoreCase("text/html")) {
		return ".html";
		} else if (mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) {
		return ".txt";
		} else {
		return ".bin";
		}
		}

		/**
		* Parse the Content-Disposition HTTP Header.
		*
		* <p>Behavior depends on targetSdkVersion.
		*
		* <ul>
		* <li>For targetSDK versions < {@code VANILLA_ICE_CREAM} it will parse based on RFC 2616.
		* <li>For targetSDK versions >= {@code VANILLA_ICE_CREAM} it will parse based on RFC 6266.
		* </ul>
		*/
		@UnsupportedAppUsage
		static String parseContentDisposition(String contentDisposition) {
		if (android.os.Flags.androidOsBuildVanillaIceCream()) {
		if (Compatibility.isChangeEnabled(PARSE_CONTENT_DISPOSITION_USING_RFC_6266)) {
		return getFilenameFromContentDispositionRfc6266(contentDisposition);
		}
		}
		return parseContentDispositionRfc2616(contentDisposition);
		}

		/** Regex used to parse content-disposition headers */
		private static final Pattern CONTENT_DISPOSITION_PATTERN =
		Pattern.compile(
		@@ -391,15 +570,14 @@ public final class URLUtil {
		Pattern.CASE_INSENSITIVE);

		/**
		* Parse the Content-Disposition HTTP Header. The format of the header is defined here:
		* http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html This header provides a filename for
		* content that is going to be downloaded to the file system. We only support the attachment
		* type. Note that RFC 2616 specifies the filename value must be double-quoted. Unfortunately
		* some servers do not quote the value so to maintain consistent behaviour with other browsers,
		* we allow unquoted values too.
		* Parse the Content-Disposition HTTP Header. The format of the header is defined here: <a
		* href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html">rfc2616 Section 19</a>. This
		* header provides a filename for content that is going to be downloaded to the file system. We
		* only support the attachment type. Note that RFC 2616 specifies the filename value must be
		* double-quoted. Unfortunately some servers do not quote the value so to maintain consistent
		* behaviour with other browsers, we allow unquoted values too.
		*/
		@UnsupportedAppUsage
		static String parseContentDisposition(String contentDisposition) {
		private static String parseContentDispositionRfc2616(String contentDisposition) {
		try {
		Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
		if (m.find()) {
		@@ -410,4 +588,136 @@ public final class URLUtil {
		}
		return null;
		}

		/**
		* Pattern for parsing individual content disposition key-value pairs.
		*
		* <p>The pattern will attempt to parse the value as either single-, double-, or unquoted. For
		* the single- and double-quoted options, the pattern allows escaped quotes as part of the
		* value, as per <a href="https://datatracker.ietf.org/doc/html/rfc2616#section-2.2">rfc2616
		* section-2.2</a>
		*/
		@SuppressWarnings("RegExpRepeatedSpace") // Spaces are only for readability.
		private static final Pattern DISPOSITION_PATTERN =
		Pattern.compile(
		"""
		\\s*(\\S+?) # Group 1: parameter name
		\\s=\\s # Match equals sign
		(?: # non-capturing group of options
		'( (?: [^'\\\\] \| \\\\. )* )' # Group 2: single-quoted
		\| "( (?: [^"\\\\] \| \\\\. )* )" # Group 3: double-quoted
		\| ( [^'"][^;\\s]* ) # Group 4: un-quoted parameter
		)\\s*;? # Optional end semicolon""",
		Pattern.COMMENTS);

		/**
		* Extract filename from a {@code Content-Disposition} header value.
		*
		* <p>This method implements the parsing defined in <a
		* href="https://datatracker.ietf.org/doc/html/rfc6266">RFC 6266</a>, supporting both the {@code
		* filename} and {@code filename*} disposition parameters. If the passed header value has the
		* {@code "inline"} disposition type, this method will return {@code null} to indicate that a
		* download was not intended.
		*
		* <p>If both {@code filename*} and {@code filename} is present, the former will be returned, as
		* per the RFC. Invalid encoded values will be ignored.
		*
		* @param contentDisposition Value of {@code Content-Disposition} header.
		* @return The filename suggested by the header or {@code null} if no filename could be parsed
		* from the header value.
		*/
		@Nullable
		private static String getFilenameFromContentDispositionRfc6266(
		@NonNull String contentDisposition) {
		String[] parts = contentDisposition.trim().split(";", 2);
		if (parts.length < 2) {
		// Need at least 2 parts, the `disposition-type` and at least one `disposition-parm`.
		return null;
		}
		String dispositionType = parts[0].trim();
		if ("inline".equalsIgnoreCase(dispositionType)) {
		// "inline" should not result in a download.
		// Unknown disposition types should be handles as "attachment"
		// https://datatracker.ietf.org/doc/html/rfc6266#section-4.2
		return null;
		}
		String dispositionParameters = parts[1];
		Matcher matcher = DISPOSITION_PATTERN.matcher(dispositionParameters);
		String filename = null;
		String filenameExt = null;
		while (matcher.find()) {
		String parameter = matcher.group(1);
		String value;
		if (matcher.group(2) != null) {
		value = removeSlashEscapes(matcher.group(2)); // Value was single-quoted
		} else if (matcher.group(3) != null) {
		value = removeSlashEscapes(matcher.group(3)); // Value was double-quoted
		} else {
		value = matcher.group(4); // Value was un-quoted
		}

		if (parameter == null \|\| value == null) {
		continue;
		}

		if ("filename*".equalsIgnoreCase(parameter)) {
		filenameExt = parseExtValueString(value);
		} else if ("filename".equalsIgnoreCase(parameter)) {
		filename = value;
		}
		}

		// RFC 6266 dictates the filenameExt should be preferred if present.
		if (filenameExt != null) {
		return filenameExt;
		}
		return filename;
		}

		/** Replace escapes of the \X form with X. */
		private static String removeSlashEscapes(String raw) {
		if (raw == null) {
		return null;
		}
		return raw.replaceAll("\\\\(.)", "$1");
		}

		/**
		* Parse an extended value string which can be percent-encoded. Return {@code} null if unable to
		* parse the string.
		*/
		private static String parseExtValueString(String raw) {
		String[] parts = raw.split("'", 3);
		if (parts.length < 3) {
		return null;
		}

		String encoding = parts[0];
		// Intentionally ignore parts[1] (language).
		String valueChars = parts[2];

		try {
		// The URLDecoder force-decodes + as " "
		// so preemptively replace all values with the encoded value to preserve them.
		Charset charset = Charset.forName(encoding);
		String valueWithEncodedPlus = encodePlusCharacters(valueChars, charset);
		return URLDecoder.decode(valueWithEncodedPlus, charset);
		} catch (RuntimeException ignored) {
		return null; // Ignoring an un-parsable value is within spec.
		}
		}

		/**
		* Replace all instances of {@code "+"} with the percent-encoded equivalent for the given {@code
		* charset}.
		*/
		@NonNull
		private static String encodePlusCharacters(@NonNull String valueChars, Charset charset) {
		StringBuilder sb = new StringBuilder();
		for (byte b : charset.encode("+").array()) {
		// Formatting a byte is not possible with TextUtils.formatSimple
		sb.append(String.format("%02x", b));
		}
		return valueChars.replaceAll("\\+", sb.toString());
		}
		}