Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 50a99364 authored by Jeff Sharkey's avatar Jeff Sharkey
Browse files

Consistent handling of modified UTF-8.

A recent set of patches had mismatched handling of UTF-8 vs modified
UTF-8; this change converges all paths towards using modified UTF-8
to match the DataInput/Output API contract.

New tests verify that underlying raw data is compatible between the
upstream and local implementations.

Bug: 171832118
Test: atest FrameworksCoreTests:android.util.CharsetUtilsTest
Test: atest FrameworksCoreTests:android.util.XmlTest
Test: atest FrameworksCoreTests:android.util.BinaryXmlTest
Test: atest FrameworksCoreTests:com.android.internal.util.FastDataTest
Change-Id: I49423edc867839fb6626cd8bd361abe7bc512633
parent 82fe6246
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -70,7 +70,7 @@ public class CharsetUtilsPerfTest {

        final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
        while (state.keepRunning()) {
            CharsetUtils.toUtf8Bytes(mValue, destPtr, 0, dest.length);
            CharsetUtils.toModifiedUtf8Bytes(mValue, destPtr, 0, dest.length);
        }
    }

@@ -85,7 +85,7 @@ public class CharsetUtilsPerfTest {

        final BenchmarkState state = mPerfStatusReporter.getBenchmarkState();
        while (state.keepRunning()) {
            CharsetUtils.toUtf8Bytes(mValue, destPtr, 0, dest.length);
            CharsetUtils.toModifiedUtf8Bytes(mValue, destPtr, 0, dest.length);
       }
    }
}
+25 −11
Original line number Diff line number Diff line
@@ -31,34 +31,48 @@ import dalvik.annotation.optimization.FastNative;
 */
public class CharsetUtils {
    /**
     * Attempt to encode the given string as UTF-8 into the destination byte
     * array without making any new allocations.
     * Attempt to encode the given string as modified UTF-8 into the destination
     * byte array without making any new allocations.
     *
     * @param src string value to be encoded
     * @param dest destination byte array to encode into
     * @param destOff offset into destination where encoding should begin
     * @param destLen length of destination
     * @return the number of bytes written to the destination when encoded
     *         successfully, otherwise {@code -1} if not large enough
     * @return positive value when encoding succeeded, or negative value when
     *         failed; the magnitude of the value is the number of bytes
     *         required to encode the string.
     */
    public static int toUtf8Bytes(@NonNull String src,
    public static int toModifiedUtf8Bytes(@NonNull String src,
            long dest, int destOff, int destLen) {
        return toUtf8Bytes(src, src.length(), dest, destOff, destLen);
        return toModifiedUtf8Bytes(src, src.length(), dest, destOff, destLen);
    }

    /**
     * Attempt to encode the given string as UTF-8 into the destination byte
     * array without making any new allocations.
     * Attempt to encode the given string as modified UTF-8 into the destination
     * byte array without making any new allocations.
     *
     * @param src string value to be encoded
     * @param srcLen exact length of string to be encoded
     * @param dest destination byte array to encode into
     * @param destOff offset into destination where encoding should begin
     * @param destLen length of destination
     * @return the number of bytes written to the destination when encoded
     *         successfully, otherwise {@code -1} if not large enough
     * @return positive value when encoding succeeded, or negative value when
     *         failed; the magnitude of the value is the number of bytes
     *         required to encode the string.
     */
    @FastNative
    private static native int toUtf8Bytes(@NonNull String src, int srcLen,
    private static native int toModifiedUtf8Bytes(@NonNull String src, int srcLen,
            long dest, int destOff, int destLen);

    /**
     * Attempt to decode a modified UTF-8 string from the source byte array.
     *
     * @param src source byte array to decode from
     * @param srcOff offset into source where decoding should begin
     * @param srcLen length of source that should be decoded
     * @return the successfully decoded string
     */
    @FastNative
    public static native @NonNull String fromModifiedUtf8Bytes(
            long src, int srcOff, int srcLen);
}
+13 −7
Original line number Diff line number Diff line
@@ -17,6 +17,9 @@
package com.android.internal.util;

import android.annotation.NonNull;
import android.util.CharsetUtils;

import dalvik.system.VMRuntime;

import java.io.BufferedInputStream;
import java.io.Closeable;
@@ -25,7 +28,6 @@ import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Objects;

@@ -39,9 +41,11 @@ import java.util.Objects;
public class FastDataInput implements DataInput, Closeable {
    private static final int MAX_UNSIGNED_SHORT = 65_535;

    private final VMRuntime mRuntime;
    private final InputStream mIn;

    private final byte[] mBuffer;
    private final long mBufferPtr;
    private final int mBufferCap;

    private int mBufferPos;
@@ -54,12 +58,14 @@ public class FastDataInput implements DataInput, Closeable {
    private String[] mStringRefs = new String[32];

    public FastDataInput(@NonNull InputStream in, int bufferSize) {
        mRuntime = VMRuntime.getRuntime();
        mIn = Objects.requireNonNull(in);
        if (bufferSize < 8) {
            throw new IllegalArgumentException();
        }

        mBuffer = new byte[bufferSize];
        mBuffer = (byte[]) mRuntime.newNonMovableArray(byte.class, bufferSize);
        mBufferPtr = mRuntime.addressOf(mBuffer);
        mBufferCap = mBuffer.length;
    }

@@ -123,15 +129,15 @@ public class FastDataInput implements DataInput, Closeable {
        // Attempt to read directly from buffer space if there's enough room,
        // otherwise fall back to chunking into place
        final int len = readUnsignedShort();
        if (mBufferCap >= len) {
        if (mBufferCap > len) {
            if (mBufferLim - mBufferPos < len) fill(len);
            final String res = new String(mBuffer, mBufferPos, len, StandardCharsets.UTF_8);
            final String res = CharsetUtils.fromModifiedUtf8Bytes(mBufferPtr, mBufferPos, len);
            mBufferPos += len;
            return res;
        } else {
            final byte[] tmp = new byte[len];
            readFully(tmp, 0, tmp.length);
            return new String(tmp, StandardCharsets.UTF_8);
            final byte[] tmp = (byte[]) mRuntime.newNonMovableArray(byte.class, len + 1);
            readFully(tmp, 0, len);
            return CharsetUtils.fromModifiedUtf8Bytes(mRuntime.addressOf(tmp), 0, len);
        }
    }

+24 −16
Original line number Diff line number Diff line
@@ -28,7 +28,6 @@ import java.io.DataOutputStream;
import java.io.Flushable;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Objects;

@@ -42,6 +41,7 @@ import java.util.Objects;
public class FastDataOutput implements DataOutput, Flushable, Closeable {
    private static final int MAX_UNSIGNED_SHORT = 65_535;

    private final VMRuntime mRuntime;
    private final OutputStream mOut;

    private final byte[] mBuffer;
@@ -56,13 +56,14 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
    private HashMap<String, Short> mStringRefs = new HashMap<>();

    public FastDataOutput(@NonNull OutputStream out, int bufferSize) {
        mRuntime = VMRuntime.getRuntime();
        mOut = Objects.requireNonNull(out);
        if (bufferSize < 8) {
            throw new IllegalArgumentException();
        }

        mBuffer = (byte[]) VMRuntime.getRuntime().newNonMovableArray(byte.class, bufferSize);
        mBufferPtr = VMRuntime.getRuntime().addressOf(mBuffer);
        mBuffer = (byte[]) mRuntime.newNonMovableArray(byte.class, bufferSize);
        mBufferPtr = mRuntime.addressOf(mBuffer);
        mBufferCap = mBuffer.length;
    }

@@ -111,21 +112,28 @@ public class FastDataOutput implements DataOutput, Flushable, Closeable {
        // Attempt to write directly to buffer space if there's enough room,
        // otherwise fall back to chunking into place
        if (mBufferCap - mBufferPos < 2 + s.length()) drain();
        final int res = CharsetUtils.toUtf8Bytes(s, mBufferPtr, mBufferPos + 2,

        // Magnitude of this returned value indicates the number of bytes
        // required to encode the string; sign indicates success/failure
        int len = CharsetUtils.toModifiedUtf8Bytes(s, mBufferPtr, mBufferPos + 2,
                mBufferCap - mBufferPos - 2);
        if (res >= 0) {
            if (res > MAX_UNSIGNED_SHORT) {
                throw new IOException("UTF-8 length too large: " + res);
        if (Math.abs(len) > MAX_UNSIGNED_SHORT) {
            throw new IOException("Modified UTF-8 length too large: " + len);
        }
            writeShort(res);
            mBufferPos += res;

        if (len >= 0) {
            // Positive value indicates the string was encoded into the buffer
            // successfully, so we only need to prefix with length
            writeShort(len);
            mBufferPos += len;
        } else {
            final byte[] tmp = s.getBytes(StandardCharsets.UTF_8);
            if (tmp.length > MAX_UNSIGNED_SHORT) {
                throw new IOException("UTF-8 length too large: " + res);
            }
            writeShort(tmp.length);
            write(tmp, 0, tmp.length);
            // Negative value indicates buffer was too small and we need to
            // allocate a temporary buffer for encoding
            len = -len;
            final byte[] tmp = (byte[]) mRuntime.newNonMovableArray(byte.class, len + 1);
            CharsetUtils.toModifiedUtf8Bytes(s, mRuntime.addressOf(tmp), 0, tmp.length);
            writeShort(len);
            write(tmp, 0, len);
        }
    }

+22 −5
Original line number Diff line number Diff line
@@ -19,13 +19,14 @@

namespace android {

static jint android_util_CharsetUtils_toUtf8Bytes(JNIEnv *env, jobject clazz,
static jint android_util_CharsetUtils_toModifiedUtf8Bytes(JNIEnv *env, jobject clazz,
        jstring src, jint srcLen, jlong dest, jint destOff, jint destLen) {
    char *destPtr = reinterpret_cast<char*>(dest);

    // Quickly check if destination has plenty of room for worst-case
    // 4-bytes-per-char encoded size
    if (destOff >= 0 && destOff + (srcLen * 4) < destLen) {
    const size_t worstLen = (srcLen * 4);
    if (destOff >= 0 && destOff + worstLen < destLen) {
        env->GetStringUTFRegion(src, 0, srcLen, destPtr + destOff);
        return strlen(destPtr + destOff + srcLen) + srcLen;
    }
@@ -38,13 +39,29 @@ static jint android_util_CharsetUtils_toUtf8Bytes(JNIEnv *env, jobject clazz,
        return encodedLen;
    }

    return -1;
    return -encodedLen;
}

static jstring android_util_CharsetUtils_fromModifiedUtf8Bytes(JNIEnv *env, jobject clazz,
        jlong src, jint srcOff, jint srcLen) {
    char *srcPtr = reinterpret_cast<char*>(src);

    // This is funky, but we need to temporarily swap a null byte so that
    // JNI knows where the string ends; we'll put it back, we promise
    char tmp = srcPtr[srcOff + srcLen];
    srcPtr[srcOff + srcLen] = '\0';
    jstring res = env->NewStringUTF(srcPtr + srcOff);
    srcPtr[srcOff + srcLen] = tmp;
    return res;
}

static const JNINativeMethod methods[] = {
    // @FastNative
    {"toUtf8Bytes",      "(Ljava/lang/String;IJII)I",
            (void*)android_util_CharsetUtils_toUtf8Bytes},
    {"toModifiedUtf8Bytes",      "(Ljava/lang/String;IJII)I",
            (void*)android_util_CharsetUtils_toModifiedUtf8Bytes},
    // @FastNative
    {"fromModifiedUtf8Bytes",    "(JII)Ljava/lang/String;",
            (void*)android_util_CharsetUtils_fromModifiedUtf8Bytes},
};

int register_android_util_CharsetUtils(JNIEnv *env) {
Loading