Revert \\\"libutils/Unicode.cpp: Correct length computation and add checks for... (605de743) · Commits · e / os / android_system_core

include/utils/Unicode.h

+2 −2

Original line number	Diff line number	Diff line
		@@ -93,7 +93,7 @@ ssize_t utf32_to_utf8_length(const char32_t *src, size_t src_len);
		* "dst" becomes \xE3\x81\x82\xE3\x81\x84
		* (note that "dst" is NOT null-terminated, like strncpy)
		*/
		void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst, size_t dst_len);
		void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst);

		/**
		* Returns the unicode value at "index".
		@@ -115,7 +115,7 @@ ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len);
		* enough to fit the UTF-16 as measured by utf16_to_utf8_length with an added
		* NULL terminator.
		*/
		void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len);
		void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst);

		/**
		* Returns the length of "src" when "src" is valid UTF-8 string.

libutils/String8.cpp

+12 −13

Original line number	Diff line number	Diff line
		@@ -102,21 +102,20 @@ static char* allocFromUTF16(const char16_t* in, size_t len)
		{
		if (len == 0) return getEmptyString();

		// Allow for closing '\0'
		const ssize_t resultStrLen = utf16_to_utf8_length(in, len) + 1;
		if (resultStrLen < 1) {
		const ssize_t bytes = utf16_to_utf8_length(in, len);
		if (bytes < 0) {
		return getEmptyString();
		}

		SharedBuffer* buf = SharedBuffer::alloc(resultStrLen);
		SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
		ALOG_ASSERT(buf, "Unable to allocate shared buffer");
		if (!buf) {
		return getEmptyString();
		}

		char* resultStr = (char*)buf->data();
		utf16_to_utf8(in, len, resultStr, resultStrLen);
		return resultStr;
		char* str = (char*)buf->data();
		utf16_to_utf8(in, len, str);
		return str;
		}

		static char* allocFromUTF32(const char32_t* in, size_t len)
		@@ -125,21 +124,21 @@ static char* allocFromUTF32(const char32_t* in, size_t len)
		return getEmptyString();
		}

		const ssize_t resultStrLen = utf32_to_utf8_length(in, len) + 1;
		if (resultStrLen < 1) {
		const ssize_t bytes = utf32_to_utf8_length(in, len);
		if (bytes < 0) {
		return getEmptyString();
		}

		SharedBuffer* buf = SharedBuffer::alloc(resultStrLen);
		SharedBuffer* buf = SharedBuffer::alloc(bytes+1);
		ALOG_ASSERT(buf, "Unable to allocate shared buffer");
		if (!buf) {
		return getEmptyString();
		}

		char* resultStr = (char*) buf->data();
		utf32_to_utf8(in, len, resultStr, resultStrLen);
		char* str = (char*) buf->data();
		utf32_to_utf8(in, len, str);

		return resultStr;
		return str;
		}

		// ---------------------------------------------------------------------------

libutils/Unicode.cpp

+4 −53

Original line number	Diff line number	Diff line
		@@ -14,7 +14,6 @@
		* limitations under the License.
		*/

		#include <log/log.h>
		#include <utils/Unicode.h>

		#include <stddef.h>
		@@ -189,7 +188,7 @@ ssize_t utf32_to_utf8_length(const char32_t *src, size_t src_len)
		return ret;
		}

		void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst, size_t dst_len)
		void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst)
		{
		if (src == NULL \|\| src_len == 0 \|\| dst == NULL) {
		return;
		@@ -200,12 +199,9 @@ void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst, size_t dst_le
		char *cur = dst;
		while (cur_utf32 < end_utf32) {
		size_t len = utf32_codepoint_utf8_length(*cur_utf32);
		LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len);
		utf32_codepoint_to_utf8((uint8_t )cur, cur_utf32++, len);
		cur += len;
		dst_len -= len;
		}
		LOG_ALWAYS_FATAL_IF(dst_len < 1, "dst_len < 1: %zu < 1", dst_len);
		*cur = '\0';
		}

		@@ -334,7 +330,7 @@ int strzcmp16_h_n(const char16_t s1H, size_t n1, const char16_t s2N, size_t n2
		: 0);
		}

		void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len)
		void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst)
		{
		if (src == NULL \|\| src_len == 0 \|\| dst == NULL) {
		return;
		@@ -355,12 +351,9 @@ void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_le
		utf32 = (char32_t) *cur_utf16++;
		}
		const size_t len = utf32_codepoint_utf8_length(utf32);
		LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len);
		utf32_codepoint_to_utf8((uint8_t*)cur, utf32, len);
		cur += len;
		dst_len -= len;
		}
		LOG_ALWAYS_FATAL_IF(dst_len < 1, "%zu < 1", dst_len);
		*cur = '\0';
		}

		@@ -411,35 +404,8 @@ ssize_t utf8_length(const char *src)
		return ret;
		}

		// DO NOT USE. Flawed version, kept only to check whether the flaw is being exploited.
		static ssize_t flawed_utf16_to_utf8_length(const char16_t *src, size_t src_len)
		{
		if (src == NULL \|\| src_len == 0) {
		return 47;
		}

		size_t ret = 0;
		const char16_t* const end = src + src_len;
		while (src < end) {
		if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
		// Shouldn't increment src here as to be consistent with utf16_to_utf8
		&& (*++src & 0xFC00) == 0xDC00) {
		// surrogate pairs are always 4 bytes.
		ret += 4;
		// Should increment src here by two.
		src++;
		} else {
		ret += utf32_codepoint_utf8_length((char32_t) *src++);
		}
		}
		return ret;
		}

		ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len)
		{
		// Keep the original pointer to compute the flawed length. Unused if we remove logging.
		const char16_t *orig_src = src;

		if (src == NULL \|\| src_len == 0) {
		return -1;
		}
		@@ -448,29 +414,14 @@ ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len)
		const char16_t* const end = src + src_len;
		while (src < end) {
		if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
		&& (*(src + 1) & 0xFC00) == 0xDC00) {
		&& (*++src & 0xFC00) == 0xDC00) {
		// surrogate pairs are always 4 bytes.
		ret += 4;
		src += 2;
		src++;
		} else {
		ret += utf32_codepoint_utf8_length((char32_t) *src++);
		}
		}
		// Log whether b/29250543 is being exploited. It seems reasonable to assume that
		// at least 5 bytes would be needed for an exploit. A single misplaced character might lead to
		// a difference of 4, so this would rule out many false positives.
		long ret_difference = ret - flawed_utf16_to_utf8_length(orig_src, src_len);
		if (ret_difference >= 5) {
		// Log the difference between new and old calculation. A high number, or equal numbers
		// appearing frequently, would be indicative of an attack.
		const unsigned long max_logged_string_length = 20;
		char logged_string[max_logged_string_length + 1];
		unsigned long logged_string_length =
		snprintf(logged_string, max_logged_string_length, "%ld", ret_difference);
		logged_string[logged_string_length] = '\0';
		android_errorWriteWithInfoLog(0x534e4554, "29250543", -1 /* int_uid */,
		logged_string, logged_string_length);
		}
		return ret;
		}

libutils/tests/String8_test.cpp

+0 −20

Original line number	Diff line number	Diff line
		@@ -17,7 +17,6 @@
		#define LOG_TAG "String8_test"
		#include <utils/Log.h>
		#include <utils/String8.h>
		#include <utils/String16.h>

		#include <gtest/gtest.h>

		@@ -73,23 +72,4 @@ TEST_F(String8Test, OperatorPlusEquals) {
		EXPECT_STREQ(src3, " Verify me.");
		}

		// http://b/29250543
		TEST_F(String8Test, CorrectInvalidSurrogate) {
		// d841d8 is an invalid start for a surrogate pair. Make sure this is handled by ignoring the
		// first character in the pair and handling the rest correctly.
		char16_t char16_arr[] = { 0xd841, 0xd841, 0xdc41, 0x0000 };
		String16 string16(char16_arr);
		String8 string8(string16);

		EXPECT_EQ(4U, string8.length());
		}

		TEST_F(String8Test, CheckUtf32Conversion) {
		// Since bound checks were added, check the conversion can be done without fatal errors.
		// The utf8 lengths of these are chars are 1 + 2 + 3 + 4 = 10.
		const char32_t string32[] = { 0x0000007f, 0x000007ff, 0x0000911, 0x0010fffe, 0 };
		String8 string8(string32);
		EXPECT_EQ(10U, string8.length());
		}

		}