Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit eb318fd8 authored by Android (Google) Code Review's avatar Android (Google) Code Review
Browse files

Merge change I129483f8 into eclair-mr2

* changes:
  Optional use of UTF-8 strings in resource bundles
parents ea3ec40a 92f5984d
Loading
Loading
Loading
Loading
+7 −2
Original line number Diff line number Diff line
@@ -393,7 +393,10 @@ struct ResStringPool_header
    enum {
        // If set, the string index is sorted by the string values (based
        // on strcmp16()).
        SORTED_FLAG = 1<<0
        SORTED_FLAG = 1<<0,

        // String pool is encoded in UTF-8
        UTF8_FLAG = 1<<8
    };
    uint32_t flags;

@@ -456,9 +459,11 @@ private:
    void*                       mOwnedData;
    const ResStringPool_header* mHeader;
    size_t                      mSize;
    mutable Mutex               mDecodeLock;
    const uint32_t*             mEntries;
    const uint32_t*             mEntryStyles;
    const char16_t*             mStrings;
    const void*                 mStrings;
    char16_t**                  mCache;
    uint32_t                    mStringPoolSize;    // number of uint16_t
    const uint32_t*             mStyles;
    uint32_t                    mStylePoolSize;    // number of uint32_t
+5 −0
Original line number Diff line number Diff line
@@ -49,12 +49,17 @@ int strzcmp16(const char16_t *s1, size_t n1, const char16_t *s2, size_t n2);
// Version of strzcmp16 for comparing strings in different endianness.
int strzcmp16_h_n(const char16_t *s1H, size_t n1, const char16_t *s2N, size_t n2);

// Convert UTF-8 to UTF-16 including surrogate pairs
void utf8_to_utf16(const uint8_t *src, size_t srcLen, char16_t* dst, const size_t dstLen);

}

// ---------------------------------------------------------------------------

namespace android {

// ---------------------------------------------------------------------------

class String8;
class TextOutput;

+8 −0
Original line number Diff line number Diff line
@@ -57,6 +57,11 @@ size_t utf8_length(const char *src);
 */
size_t utf32_length(const char *src, size_t src_len);

/*
 * Returns the UTF-8 length of "src".
 */
size_t utf8_length_from_utf16(const char16_t *src, size_t src_len);

/*
 * Returns the UTF-8 length of "src".
 */
@@ -120,6 +125,9 @@ size_t utf8_to_utf32(const char* src, size_t src_len,
size_t utf32_to_utf8(const char32_t* src, size_t src_len,
                     char* dst, size_t dst_len);

size_t utf16_to_utf8(const char16_t* src, size_t src_len,
                     char* dst, size_t dst_len);

}

// ---------------------------------------------------------------------------
+84 −20
Original line number Diff line number Diff line
@@ -229,12 +229,12 @@ Res_png_9patch* Res_png_9patch::deserialize(const void* inData)
// --------------------------------------------------------------------

ResStringPool::ResStringPool()
    : mError(NO_INIT), mOwnedData(NULL)
    : mError(NO_INIT), mOwnedData(NULL), mHeader(NULL), mCache(NULL)
{
}

ResStringPool::ResStringPool(const void* data, size_t size, bool copyData)
    : mError(NO_INIT), mOwnedData(NULL)
    : mError(NO_INIT), mOwnedData(NULL), mHeader(NULL), mCache(NULL)
{
    setTo(data, size, copyData);
}
@@ -296,7 +296,17 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData)
                    (int)size);
            return (mError=BAD_TYPE);
        }
        mStrings = (const char16_t*)

        size_t charSize;
        if (mHeader->flags&ResStringPool_header::UTF8_FLAG) {
            charSize = sizeof(uint8_t);
            mCache = (char16_t**)malloc(sizeof(char16_t**)*mHeader->stringCount);
            memset(mCache, 0, sizeof(char16_t**)*mHeader->stringCount);
        } else {
            charSize = sizeof(char16_t);
        }

        mStrings = (const void*)
            (((const uint8_t*)data)+mHeader->stringsStart);
        if (mHeader->stringsStart >= (mHeader->header.size-sizeof(uint16_t))) {
            LOGW("Bad string block: string pool starts at %d, after total size %d\n",
@@ -305,7 +315,7 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData)
        }
        if (mHeader->styleCount == 0) {
            mStringPoolSize =
                (mHeader->header.size-mHeader->stringsStart)/sizeof(uint16_t);
                (mHeader->header.size-mHeader->stringsStart)/charSize;
        } else {
            // check invariant: styles follow the strings
            if (mHeader->stylesStart <= mHeader->stringsStart) {
@@ -314,7 +324,7 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData)
                return (mError=BAD_TYPE);
            }
            mStringPoolSize =
                (mHeader->stylesStart-mHeader->stringsStart)/sizeof(uint16_t);
                (mHeader->stylesStart-mHeader->stringsStart)/charSize;
        }

        // check invariant: stringCount > 0 requires a string pool to exist
@@ -329,13 +339,19 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData)
            for (i=0; i<mHeader->stringCount; i++) {
                e[i] = dtohl(mEntries[i]);
            }
            char16_t* s = const_cast<char16_t*>(mStrings);
            if (!(mHeader->flags&ResStringPool_header::UTF8_FLAG)) {
                const char16_t* strings = (const char16_t*)mStrings;
                char16_t* s = const_cast<char16_t*>(strings);
                for (i=0; i<mStringPoolSize; i++) {
                s[i] = dtohs(mStrings[i]);
                    s[i] = dtohs(strings[i]);
                }
            }
        }

        if (mStrings[mStringPoolSize-1] != 0) {
        if ((mHeader->flags&ResStringPool_header::UTF8_FLAG &&
                ((uint8_t*)mStrings)[mStringPoolSize-1] != 0) ||
                (!mHeader->flags&ResStringPool_header::UTF8_FLAG &&
                ((char16_t*)mStrings)[mStringPoolSize-1] != 0)) {
            LOGW("Bad string block: last string is not 0-terminated\n");
            return (mError=BAD_TYPE);
        }
@@ -410,24 +426,67 @@ void ResStringPool::uninit()
        free(mOwnedData);
        mOwnedData = NULL;
    }
    if (mHeader != NULL && mCache != NULL) {
        for (size_t x = 0; x < mHeader->stringCount; x++) {
            if (mCache[x] != NULL) {
                free(mCache[x]);
                mCache[x] = NULL;
            }
        }
        free(mCache);
        mCache = NULL;
    }
}

#define DECODE_LENGTH(str, chrsz, len) \
    len = *(str); \
    if (*(str)&(1<<(chrsz*8-1))) { \
        (str)++; \
        len = (((len)&((1<<(chrsz*8-1))-1))<<(chrsz*8)) + *(str); \
    } \
    (str)++;

const uint16_t* ResStringPool::stringAt(size_t idx, size_t* outLen) const
{
    if (mError == NO_ERROR && idx < mHeader->stringCount) {
        const uint32_t off = (mEntries[idx]/sizeof(uint16_t));
        const bool isUTF8 = (mHeader->flags&ResStringPool_header::UTF8_FLAG) != 0;
        const uint32_t off = mEntries[idx]/(isUTF8?sizeof(char):sizeof(char16_t));
        if (off < (mStringPoolSize-1)) {
            const char16_t* str = mStrings+off;
            *outLen = *str;
            if ((*str)&0x8000) {
                str++;
                *outLen = (((*outLen)&0x7fff)<<16) + *str;
            }
            if ((uint32_t)(str+1+*outLen-mStrings) < mStringPoolSize) {
                return str+1;
            if (!isUTF8) {
                const char16_t* strings = (char16_t*)mStrings;
                const char16_t* str = strings+off;
                DECODE_LENGTH(str, sizeof(char16_t), *outLen)
                if ((uint32_t)(str+*outLen-strings) < mStringPoolSize) {
                    return str;
                } else {
                    LOGW("Bad string block: string #%d extends to %d, past end at %d\n",
                        (int)idx, (int)(str+1+*outLen-mStrings), (int)mStringPoolSize);
                            (int)idx, (int)(str+*outLen-strings), (int)mStringPoolSize);
                }
            } else {
                const uint8_t* strings = (uint8_t*)mStrings;
                const uint8_t* str = strings+off;
                DECODE_LENGTH(str, sizeof(uint8_t), *outLen)
                size_t encLen;
                DECODE_LENGTH(str, sizeof(uint8_t), encLen)
                if ((uint32_t)(str+encLen-strings) < mStringPoolSize) {
                    AutoMutex lock(mDecodeLock);
                    if (mCache[idx] != NULL) {
                        return mCache[idx];
                    }
                    char16_t *u16str = (char16_t *)calloc(*outLen+1, sizeof(char16_t));
                    if (!u16str) {
                        LOGW("No memory when trying to allocate decode cache for string #%d\n",
                                (int)idx);
                        return NULL;
                    }
                    const unsigned char *u8src = reinterpret_cast<const unsigned char *>(str);
                    utf8_to_utf16(u8src, encLen, u16str, *outLen);
                    mCache[idx] = u16str;
                    return u16str;
                } else {
                    LOGW("Bad string block: string #%d extends to %d, past end at %d\n",
                            (int)idx, (int)(str+encLen-strings), (int)mStringPoolSize);
                }
            }
        } else {
            LOGW("Bad string block: string #%d entry is at %d, past end at %d\n",
@@ -466,6 +525,10 @@ ssize_t ResStringPool::indexOfString(const char16_t* str, size_t strLen) const

    size_t len;

    // TODO optimize searching for UTF-8 strings taking into account
    // the cache fill to determine when to convert the searched-for
    // string key to UTF-8.

    if (mHeader->flags&ResStringPool_header::SORTED_FLAG) {
        // Do a binary search for the string...
        ssize_t l = 0;
@@ -1043,6 +1106,7 @@ status_t ResXMLTree::getError() const
void ResXMLTree::uninit()
{
    mError = NO_INIT;
    mStrings.uninit();
    if (mOwnedData) {
        free(mOwnedData);
        mOwnedData = NULL;
+34 −24
Original line number Diff line number Diff line
@@ -172,10 +172,6 @@ int strzcmp16_h_n(const char16_t *s1H, size_t n1, const char16_t *s2N, size_t n2
           : 0);
}

// ---------------------------------------------------------------------------

namespace android {

static inline size_t
utf8_char_len(uint8_t ch)
{
@@ -215,8 +211,38 @@ utf8_to_utf32(const uint8_t *src, size_t length)
    //printf("Char at %p: len=%d, utf-16=%p\n", src, length, (void*)result);
}

void
utf8_to_utf16(const uint8_t *src, size_t srcLen,
        char16_t* dst, const size_t dstLen)
{
    const uint8_t* const end = src + srcLen;
    const char16_t* const dstEnd = dst + dstLen;
    while (src < end && dst < dstEnd) {
        size_t len = utf8_char_len(*src);
        uint32_t codepoint = utf8_to_utf32((const uint8_t*)src, len);

        // Convert the UTF32 codepoint to one or more UTF16 codepoints
        if (codepoint <= 0xFFFF) {
            // Single UTF16 character
            *dst++ = (char16_t) codepoint;
        } else {
            // Multiple UTF16 characters with surrogates
            codepoint = codepoint - 0x10000;
            *dst++ = (char16_t) ((codepoint >> 10) + 0xD800);
            *dst++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
        }

        src += len;
    }
    if (dst < dstEnd) {
        *dst = 0;
    }
}

// ---------------------------------------------------------------------------

namespace android {

static SharedBuffer* gEmptyStringBuf = NULL;
static char16_t* gEmptyString = NULL;

@@ -260,29 +286,13 @@ static char16_t* allocFromUTF8(const char* in, size_t len)
        p += utf8len;
    }
    
    SharedBuffer* buf = SharedBuffer::alloc((chars+1)*sizeof(char16_t));
    size_t bufSize = (chars+1)*sizeof(char16_t);
    SharedBuffer* buf = SharedBuffer::alloc(bufSize);
    if (buf) {
        p = in;
        char16_t* str = (char16_t*)buf->data();
        char16_t* d = str;
        while (p < end) {
            size_t len = utf8_char_len(*p);
            uint32_t codepoint = utf8_to_utf32((const uint8_t*)p, len);
        
            // Convert the UTF32 codepoint to one or more UTF16 codepoints
            if (codepoint <= 0xFFFF) {
                // Single UTF16 character
                *d++ = (char16_t) codepoint;
            } else {
                // Multiple UTF16 characters with surrogates
                codepoint = codepoint - 0x10000;
                *d++ = (char16_t) ((codepoint >> 10) + 0xD800);
                *d++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
            }

            p += len;
        }
        *d = 0;
        utf8_to_utf16((const uint8_t*)p, len, str, bufSize);

        //printf("Created UTF-16 string from UTF-8 \"%s\":", in);
        //printHexData(1, str, buf->size(), 16, 1);
Loading