Consistent handling of modified UTF-8. (50a99364) · Commits · e / os / android_frameworks_base

apct-tests/perftests/core/src/android/util/CharsetUtilsPerfTest.java

+2 −2

Original line number	Diff line number	Diff line
		@@ -70,7 +70,7 @@ public class CharsetUtilsPerfTest {

		final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
		while (state.keepRunning()) {
		CharsetUtils.toUtf8Bytes(mValue, destPtr, 0, dest.length);
		CharsetUtils.toModifiedUtf8Bytes(mValue, destPtr, 0, dest.length);
		}
		}

		@@ -85,7 +85,7 @@ public class CharsetUtilsPerfTest {

		final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
		while (state.keepRunning()) {
		CharsetUtils.toUtf8Bytes(mValue, destPtr, 0, dest.length);
		CharsetUtils.toModifiedUtf8Bytes(mValue, destPtr, 0, dest.length);
		}
		}
		}

core/java/android/util/CharsetUtils.java

+25 −11

Original line number	Diff line number	Diff line
		@@ -31,34 +31,48 @@ import dalvik.annotation.optimization.FastNative;
		*/
		public class CharsetUtils {
		/**
		* Attempt to encode the given string as UTF-8 into the destination byte
		* array without making any new allocations.
		* Attempt to encode the given string as modified UTF-8 into the destination
		* byte array without making any new allocations.
		*
		* @param src string value to be encoded
		* @param dest destination byte array to encode into
		* @param destOff offset into destination where encoding should begin
		* @param destLen length of destination
		* @return the number of bytes written to the destination when encoded
		* successfully, otherwise {@code -1} if not large enough
		* @return positive value when encoding succeeded, or negative value when
		* failed; the magnitude of the value is the number of bytes
		* required to encode the string.
		*/
		public static int toUtf8Bytes(@NonNull String src,
		public static int toModifiedUtf8Bytes(@NonNull String src,
		long dest, int destOff, int destLen) {
		return toUtf8Bytes(src, src.length(), dest, destOff, destLen);
		return toModifiedUtf8Bytes(src, src.length(), dest, destOff, destLen);
		}

		/**
		* Attempt to encode the given string as UTF-8 into the destination byte
		* array without making any new allocations.
		* Attempt to encode the given string as modified UTF-8 into the destination
		* byte array without making any new allocations.
		*
		* @param src string value to be encoded
		* @param srcLen exact length of string to be encoded
		* @param dest destination byte array to encode into
		* @param destOff offset into destination where encoding should begin
		* @param destLen length of destination
		* @return the number of bytes written to the destination when encoded
		* successfully, otherwise {@code -1} if not large enough
		* @return positive value when encoding succeeded, or negative value when
		* failed; the magnitude of the value is the number of bytes
		* required to encode the string.
		*/
		@FastNative
		private static native int toUtf8Bytes(@NonNull String src, int srcLen,
		private static native int toModifiedUtf8Bytes(@NonNull String src, int srcLen,
		long dest, int destOff, int destLen);

		/**
		* Attempt to decode a modified UTF-8 string from the source byte array.
		*
		* @param src source byte array to decode from
		* @param srcOff offset into source where decoding should begin
		* @param srcLen length of source that should be decoded
		* @return the successfully decoded string
		*/
		@FastNative
		public static native @NonNull String fromModifiedUtf8Bytes(
		long src, int srcOff, int srcLen);
		}

core/java/com/android/internal/util/FastDataInput.java

+13 −7

Original line number	Diff line number	Diff line
		@@ -17,6 +17,9 @@
		package com.android.internal.util;

		import android.annotation.NonNull;
		import android.util.CharsetUtils;

		import dalvik.system.VMRuntime;

		import java.io.BufferedInputStream;
		import java.io.Closeable;
		@@ -25,7 +28,6 @@ import java.io.DataInputStream;
		import java.io.EOFException;
		import java.io.IOException;
		import java.io.InputStream;
		import java.nio.charset.StandardCharsets;
		import java.util.Arrays;
		import java.util.Objects;

		@@ -39,9 +41,11 @@ import java.util.Objects;
		public class FastDataInput implements DataInput, Closeable {
		private static final int MAX_UNSIGNED_SHORT = 65_535;

		private final VMRuntime mRuntime;
		private final InputStream mIn;

		private final byte[] mBuffer;
		private final long mBufferPtr;
		private final int mBufferCap;

		private int mBufferPos;
		@@ -54,12 +58,14 @@ public class FastDataInput implements DataInput, Closeable {
		private String[] mStringRefs = new String[32];

		public FastDataInput(@NonNull InputStream in, int bufferSize) {
		mRuntime = VMRuntime.getRuntime();
		mIn = Objects.requireNonNull(in);
		if (bufferSize < 8) {
		throw new IllegalArgumentException();
		}

		mBuffer = new byte[bufferSize];
		mBuffer = (byte[]) mRuntime.newNonMovableArray(byte.class, bufferSize);
		mBufferPtr = mRuntime.addressOf(mBuffer);
		mBufferCap = mBuffer.length;
		}

		@@ -123,15 +129,15 @@ public class FastDataInput implements DataInput, Closeable {
		// Attempt to read directly from buffer space if there's enough room,
		// otherwise fall back to chunking into place
		final int len = readUnsignedShort();
		if (mBufferCap >= len) {
		if (mBufferCap > len) {
		if (mBufferLim - mBufferPos < len) fill(len);
		final String res = new String(mBuffer, mBufferPos, len, StandardCharsets.UTF_8);
		final String res = CharsetUtils.fromModifiedUtf8Bytes(mBufferPtr, mBufferPos, len);
		mBufferPos += len;
		return res;
		} else {
		final byte[] tmp = new byte[len];
		readFully(tmp, 0, tmp.length);
		return new String(tmp, StandardCharsets.UTF_8);
		final byte[] tmp = (byte[]) mRuntime.newNonMovableArray(byte.class, len + 1);
		readFully(tmp, 0, len);
		return CharsetUtils.fromModifiedUtf8Bytes(mRuntime.addressOf(tmp), 0, len);
		}
		}

core/java/com/android/internal/util/FastDataOutput.java

+24 −16

Original line number	Diff line number	Diff line
		@@ -28,7 +28,6 @@ import java.io.DataOutputStream;
		import java.io.Flushable;
		import java.io.IOException;
		import java.io.OutputStream;
		import java.nio.charset.StandardCharsets;
		import java.util.HashMap;
		import java.util.Objects;

		@@ -42,6 +41,7 @@ import java.util.Objects;
		public class FastDataOutput implements DataOutput, Flushable, Closeable {
		private static final int MAX_UNSIGNED_SHORT = 65_535;

		private final VMRuntime mRuntime;
		private final OutputStream mOut;

		private final byte[] mBuffer;
		@@ -56,13 +56,14 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
		private HashMap<String, Short> mStringRefs = new HashMap<>();

		public FastDataOutput(@NonNull OutputStream out, int bufferSize) {
		mRuntime = VMRuntime.getRuntime();
		mOut = Objects.requireNonNull(out);
		if (bufferSize < 8) {
		throw new IllegalArgumentException();
		}

		mBuffer = (byte[]) VMRuntime.getRuntime().newNonMovableArray(byte.class, bufferSize);
		mBufferPtr = VMRuntime.getRuntime().addressOf(mBuffer);
		mBuffer = (byte[]) mRuntime.newNonMovableArray(byte.class, bufferSize);
		mBufferPtr = mRuntime.addressOf(mBuffer);
		mBufferCap = mBuffer.length;
		}

		@@ -111,21 +112,28 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
		// Attempt to write directly to buffer space if there's enough room,
		// otherwise fall back to chunking into place
		if (mBufferCap - mBufferPos < 2 + s.length()) drain();
		final int res = CharsetUtils.toUtf8Bytes(s, mBufferPtr, mBufferPos + 2,

		// Magnitude of this returned value indicates the number of bytes
		// required to encode the string; sign indicates success/failure
		int len = CharsetUtils.toModifiedUtf8Bytes(s, mBufferPtr, mBufferPos + 2,
		mBufferCap - mBufferPos - 2);
		if (res >= 0) {
		if (res > MAX_UNSIGNED_SHORT) {
		throw new IOException("UTF-8 length too large: " + res);
		if (Math.abs(len) > MAX_UNSIGNED_SHORT) {
		throw new IOException("Modified UTF-8 length too large: " + len);
		}
		writeShort(res);
		mBufferPos += res;

		if (len >= 0) {
		// Positive value indicates the string was encoded into the buffer
		// successfully, so we only need to prefix with length
		writeShort(len);
		mBufferPos += len;
		} else {
		final byte[] tmp = s.getBytes(StandardCharsets.UTF_8);
		if (tmp.length > MAX_UNSIGNED_SHORT) {
		throw new IOException("UTF-8 length too large: " + res);
		}
		writeShort(tmp.length);
		write(tmp, 0, tmp.length);
		// Negative value indicates buffer was too small and we need to
		// allocate a temporary buffer for encoding
		len = -len;
		final byte[] tmp = (byte[]) mRuntime.newNonMovableArray(byte.class, len + 1);
		CharsetUtils.toModifiedUtf8Bytes(s, mRuntime.addressOf(tmp), 0, tmp.length);
		writeShort(len);
		write(tmp, 0, len);
		}
		}

core/jni/android_util_CharsetUtils.cpp

+22 −5

Original line number	Diff line number	Diff line
		@@ -19,13 +19,14 @@

		namespace android {

		static jint android_util_CharsetUtils_toUtf8Bytes(JNIEnv *env, jobject clazz,
		static jint android_util_CharsetUtils_toModifiedUtf8Bytes(JNIEnv *env, jobject clazz,
		jstring src, jint srcLen, jlong dest, jint destOff, jint destLen) {
		char destPtr = reinterpret_cast<char>(dest);

		// Quickly check if destination has plenty of room for worst-case
		// 4-bytes-per-char encoded size
		if (destOff >= 0 && destOff + (srcLen * 4) < destLen) {
		const size_t worstLen = (srcLen * 4);
		if (destOff >= 0 && destOff + worstLen < destLen) {
		env->GetStringUTFRegion(src, 0, srcLen, destPtr + destOff);
		return strlen(destPtr + destOff + srcLen) + srcLen;
		}
		@@ -38,13 +39,29 @@ static jint android_util_CharsetUtils_toUtf8Bytes(JNIEnv *env, jobject clazz,
		return encodedLen;
		}

		return -1;
		return -encodedLen;
		}

		static jstring android_util_CharsetUtils_fromModifiedUtf8Bytes(JNIEnv *env, jobject clazz,
		jlong src, jint srcOff, jint srcLen) {
		char srcPtr = reinterpret_cast<char>(src);

		// This is funky, but we need to temporarily swap a null byte so that
		// JNI knows where the string ends; we'll put it back, we promise
		char tmp = srcPtr[srcOff + srcLen];
		srcPtr[srcOff + srcLen] = '\0';
		jstring res = env->NewStringUTF(srcPtr + srcOff);
		srcPtr[srcOff + srcLen] = tmp;
		return res;
		}

		static const JNINativeMethod methods[] = {
		// @FastNative
		{"toUtf8Bytes", "(Ljava/lang/String;IJII)I",
		(void*)android_util_CharsetUtils_toUtf8Bytes},
		{"toModifiedUtf8Bytes", "(Ljava/lang/String;IJII)I",
		(void*)android_util_CharsetUtils_toModifiedUtf8Bytes},
		// @FastNative
		{"fromModifiedUtf8Bytes", "(JII)Ljava/lang/String;",
		(void*)android_util_CharsetUtils_fromModifiedUtf8Bytes},
		};

		int register_android_util_CharsetUtils(JNIEnv *env) {