Optional use of UTF-8 strings in resource bundles (92f5984d) · Commits · e / os / android_frameworks_native-old

include/utils/ResourceTypes.h

+7 −2

Original line number	Diff line number	Diff line
		@@ -393,7 +393,10 @@ struct ResStringPool_header
		enum {
		// If set, the string index is sorted by the string values (based
		// on strcmp16()).
		SORTED_FLAG = 1<<0
		SORTED_FLAG = 1<<0,

		// String pool is encoded in UTF-8
		UTF8_FLAG = 1<<8
		};
		uint32_t flags;

		@@ -456,9 +459,11 @@ private:
		void* mOwnedData;
		const ResStringPool_header* mHeader;
		size_t mSize;
		mutable Mutex mDecodeLock;
		const uint32_t* mEntries;
		const uint32_t* mEntryStyles;
		const char16_t* mStrings;
		const void* mStrings;
		char16_t** mCache;
		uint32_t mStringPoolSize; // number of uint16_t
		const uint32_t* mStyles;
		uint32_t mStylePoolSize; // number of uint32_t

include/utils/String16.h

+5 −0

Original line number	Diff line number	Diff line
		@@ -49,12 +49,17 @@ int strzcmp16(const char16_t s1, size_t n1, const char16_t s2, size_t n2);
		// Version of strzcmp16 for comparing strings in different endianness.
		int strzcmp16_h_n(const char16_t s1H, size_t n1, const char16_t s2N, size_t n2);

		// Convert UTF-8 to UTF-16 including surrogate pairs
		void utf8_to_utf16(const uint8_t src, size_t srcLen, char16_t dst, const size_t dstLen);

		}

		// ---------------------------------------------------------------------------

		namespace android {

		// ---------------------------------------------------------------------------

		class String8;
		class TextOutput;

include/utils/String8.h

+8 −0

Original line number	Diff line number	Diff line
		@@ -57,6 +57,11 @@ size_t utf8_length(const char *src);
		*/
		size_t utf32_length(const char *src, size_t src_len);

		/*
		* Returns the UTF-8 length of "src".
		*/
		size_t utf8_length_from_utf16(const char16_t *src, size_t src_len);

		/*
		* Returns the UTF-8 length of "src".
		*/
		@@ -120,6 +125,9 @@ size_t utf8_to_utf32(const char* src, size_t src_len,
		size_t utf32_to_utf8(const char32_t* src, size_t src_len,
		char* dst, size_t dst_len);

		size_t utf16_to_utf8(const char16_t* src, size_t src_len,
		char* dst, size_t dst_len);

		}

		// ---------------------------------------------------------------------------

libs/utils/ResourceTypes.cpp

+84 −20

Original line number	Diff line number	Diff line
		@@ -229,12 +229,12 @@ Res_png_9patch* Res_png_9patch::deserialize(const void* inData)
		// --------------------------------------------------------------------

		ResStringPool::ResStringPool()
		: mError(NO_INIT), mOwnedData(NULL)
		: mError(NO_INIT), mOwnedData(NULL), mHeader(NULL), mCache(NULL)
		{
		}

		ResStringPool::ResStringPool(const void* data, size_t size, bool copyData)
		: mError(NO_INIT), mOwnedData(NULL)
		: mError(NO_INIT), mOwnedData(NULL), mHeader(NULL), mCache(NULL)
		{
		setTo(data, size, copyData);
		}
		@@ -296,7 +296,17 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData)
		(int)size);
		return (mError=BAD_TYPE);
		}
		mStrings = (const char16_t*)

		size_t charSize;
		if (mHeader->flags&ResStringPool_header::UTF8_FLAG) {
		charSize = sizeof(uint8_t);
		mCache = (char16_t)malloc(sizeof(char16_t)*mHeader->stringCount);
		memset(mCache, 0, sizeof(char16_t*)mHeader->stringCount);
		} else {
		charSize = sizeof(char16_t);
		}

		mStrings = (const void*)
		(((const uint8_t*)data)+mHeader->stringsStart);
		if (mHeader->stringsStart >= (mHeader->header.size-sizeof(uint16_t))) {
		LOGW("Bad string block: string pool starts at %d, after total size %d\n",
		@@ -305,7 +315,7 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData)
		}
		if (mHeader->styleCount == 0) {
		mStringPoolSize =
		(mHeader->header.size-mHeader->stringsStart)/sizeof(uint16_t);
		(mHeader->header.size-mHeader->stringsStart)/charSize;
		} else {
		// check invariant: styles follow the strings
		if (mHeader->stylesStart <= mHeader->stringsStart) {
		@@ -314,7 +324,7 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData)
		return (mError=BAD_TYPE);
		}
		mStringPoolSize =
		(mHeader->stylesStart-mHeader->stringsStart)/sizeof(uint16_t);
		(mHeader->stylesStart-mHeader->stringsStart)/charSize;
		}

		// check invariant: stringCount > 0 requires a string pool to exist
		@@ -329,13 +339,19 @@ status_t ResStringPool::setTo(const void* data, size_t size, bool copyData)
		for (i=0; i<mHeader->stringCount; i++) {
		e[i] = dtohl(mEntries[i]);
		}
		char16_t* s = const_cast<char16_t*>(mStrings);
		if (!(mHeader->flags&ResStringPool_header::UTF8_FLAG)) {
		const char16_t* strings = (const char16_t*)mStrings;
		char16_t* s = const_cast<char16_t*>(strings);
		for (i=0; i<mStringPoolSize; i++) {
		s[i] = dtohs(mStrings[i]);
		s[i] = dtohs(strings[i]);
		}
		}
		}

		if (mStrings[mStringPoolSize-1] != 0) {
		if ((mHeader->flags&ResStringPool_header::UTF8_FLAG &&
		((uint8_t*)mStrings)[mStringPoolSize-1] != 0) \|\|
		(!mHeader->flags&ResStringPool_header::UTF8_FLAG &&
		((char16_t*)mStrings)[mStringPoolSize-1] != 0)) {
		LOGW("Bad string block: last string is not 0-terminated\n");
		return (mError=BAD_TYPE);
		}
		@@ -410,24 +426,67 @@ void ResStringPool::uninit()
		free(mOwnedData);
		mOwnedData = NULL;
		}
		if (mHeader != NULL && mCache != NULL) {
		for (size_t x = 0; x < mHeader->stringCount; x++) {
		if (mCache[x] != NULL) {
		free(mCache[x]);
		mCache[x] = NULL;
		}
		}
		free(mCache);
		mCache = NULL;
		}
		}

		#define DECODE_LENGTH(str, chrsz, len) \
		len = *(str); \
		if ((str)&(1<<(chrsz8-1))) { \
		(str)++; \
		len = (((len)&((1<<(chrsz8-1))-1))<<(chrsz8)) + *(str); \
		} \
		(str)++;

		const uint16_t* ResStringPool::stringAt(size_t idx, size_t* outLen) const
		{
		if (mError == NO_ERROR && idx < mHeader->stringCount) {
		const uint32_t off = (mEntries[idx]/sizeof(uint16_t));
		const bool isUTF8 = (mHeader->flags&ResStringPool_header::UTF8_FLAG) != 0;
		const uint32_t off = mEntries[idx]/(isUTF8?sizeof(char):sizeof(char16_t));
		if (off < (mStringPoolSize-1)) {
		const char16_t* str = mStrings+off;
		outLen = str;
		if ((*str)&0x8000) {
		str++;
		outLen = (((outLen)&0x7fff)<<16) + *str;
		}
		if ((uint32_t)(str+1+*outLen-mStrings) < mStringPoolSize) {
		return str+1;
		if (!isUTF8) {
		const char16_t* strings = (char16_t*)mStrings;
		const char16_t* str = strings+off;
		DECODE_LENGTH(str, sizeof(char16_t), *outLen)
		if ((uint32_t)(str+*outLen-strings) < mStringPoolSize) {
		return str;
		} else {
		LOGW("Bad string block: string #%d extends to %d, past end at %d\n",
		(int)idx, (int)(str+1+*outLen-mStrings), (int)mStringPoolSize);
		(int)idx, (int)(str+*outLen-strings), (int)mStringPoolSize);
		}
		} else {
		const uint8_t* strings = (uint8_t*)mStrings;
		const uint8_t* str = strings+off;
		DECODE_LENGTH(str, sizeof(uint8_t), *outLen)
		size_t encLen;
		DECODE_LENGTH(str, sizeof(uint8_t), encLen)
		if ((uint32_t)(str+encLen-strings) < mStringPoolSize) {
		AutoMutex lock(mDecodeLock);
		if (mCache[idx] != NULL) {
		return mCache[idx];
		}
		char16_t u16str = (char16_t )calloc(*outLen+1, sizeof(char16_t));
		if (!u16str) {
		LOGW("No memory when trying to allocate decode cache for string #%d\n",
		(int)idx);
		return NULL;
		}
		const unsigned char u8src = reinterpret_cast<const unsigned char >(str);
		utf8_to_utf16(u8src, encLen, u16str, *outLen);
		mCache[idx] = u16str;
		return u16str;
		} else {
		LOGW("Bad string block: string #%d extends to %d, past end at %d\n",
		(int)idx, (int)(str+encLen-strings), (int)mStringPoolSize);
		}
		}
		} else {
		LOGW("Bad string block: string #%d entry is at %d, past end at %d\n",
		@@ -466,6 +525,10 @@ ssize_t ResStringPool::indexOfString(const char16_t* str, size_t strLen) const

		size_t len;

		// TODO optimize searching for UTF-8 strings taking into account
		// the cache fill to determine when to convert the searched-for
		// string key to UTF-8.

		if (mHeader->flags&ResStringPool_header::SORTED_FLAG) {
		// Do a binary search for the string...
		ssize_t l = 0;
		@@ -1043,6 +1106,7 @@ status_t ResXMLTree::getError() const
		void ResXMLTree::uninit()
		{
		mError = NO_INIT;
		mStrings.uninit();
		if (mOwnedData) {
		free(mOwnedData);
		mOwnedData = NULL;

libs/utils/String16.cpp

+34 −24

Original line number	Diff line number	Diff line
		@@ -172,10 +172,6 @@ int strzcmp16_h_n(const char16_t s1H, size_t n1, const char16_t s2N, size_t n2
		: 0);
		}

		// ---------------------------------------------------------------------------

		namespace android {

		static inline size_t
		utf8_char_len(uint8_t ch)
		{
		@@ -215,8 +211,38 @@ utf8_to_utf32(const uint8_t *src, size_t length)
		//printf("Char at %p: len=%d, utf-16=%p\n", src, length, (void*)result);
		}

		void
		utf8_to_utf16(const uint8_t *src, size_t srcLen,
		char16_t* dst, const size_t dstLen)
		{
		const uint8_t* const end = src + srcLen;
		const char16_t* const dstEnd = dst + dstLen;
		while (src < end && dst < dstEnd) {
		size_t len = utf8_char_len(*src);
		uint32_t codepoint = utf8_to_utf32((const uint8_t*)src, len);

		// Convert the UTF32 codepoint to one or more UTF16 codepoints
		if (codepoint <= 0xFFFF) {
		// Single UTF16 character
		*dst++ = (char16_t) codepoint;
		} else {
		// Multiple UTF16 characters with surrogates
		codepoint = codepoint - 0x10000;
		*dst++ = (char16_t) ((codepoint >> 10) + 0xD800);
		*dst++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
		}

		src += len;
		}
		if (dst < dstEnd) {
		*dst = 0;
		}
		}

		// ---------------------------------------------------------------------------

		namespace android {

		static SharedBuffer* gEmptyStringBuf = NULL;
		static char16_t* gEmptyString = NULL;

		@@ -260,29 +286,13 @@ static char16_t* allocFromUTF8(const char* in, size_t len)
		p += utf8len;
		}

		SharedBuffer* buf = SharedBuffer::alloc((chars+1)*sizeof(char16_t));
		size_t bufSize = (chars+1)*sizeof(char16_t);
		SharedBuffer* buf = SharedBuffer::alloc(bufSize);
		if (buf) {
		p = in;
		char16_t* str = (char16_t*)buf->data();
		char16_t* d = str;
		while (p < end) {
		size_t len = utf8_char_len(*p);
		uint32_t codepoint = utf8_to_utf32((const uint8_t*)p, len);

		// Convert the UTF32 codepoint to one or more UTF16 codepoints
		if (codepoint <= 0xFFFF) {
		// Single UTF16 character
		*d++ = (char16_t) codepoint;
		} else {
		// Multiple UTF16 characters with surrogates
		codepoint = codepoint - 0x10000;
		*d++ = (char16_t) ((codepoint >> 10) + 0xD800);
		*d++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
		}

		p += len;
		}
		*d = 0;
		utf8_to_utf16((const uint8_t*)p, len, str, bufSize);

		//printf("Created UTF-16 string from UTF-8 \"%s\":", in);
		//printHexData(1, str, buf->size(), 16, 1);