Loading libutils/Unicode.cpp +0 −43 Original line number Diff line number Diff line Loading @@ -359,49 +359,6 @@ void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_le // UTF-8 // -------------------------------------------------------------------------- ssize_t utf8_length(const char *src) { const char *cur = src; size_t ret = 0; while (*cur != '\0') { const char first_char = *cur++; if ((first_char & 0x80) == 0) { // ASCII ret += 1; continue; } // (UTF-8's character must not be like 10xxxxxx, // but 110xxxxx, 1110xxxx, ... or 1111110x) if ((first_char & 0x40) == 0) { return -1; } int32_t mask, to_ignore_mask; size_t num_to_read = 0; char32_t utf32 = 0; for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0x80; num_to_read < 5 && (first_char & mask); num_to_read++, to_ignore_mask |= mask, mask >>= 1) { if ((*cur & 0xC0) != 0x80) { // must be 10xxxxxx return -1; } // 0x3F == 00111111 utf32 = (utf32 << 6) + (*cur++ & 0x3F); } // "first_char" must be (110xxxxx - 11110xxx) if (num_to_read == 5) { return -1; } to_ignore_mask |= mask; utf32 |= ((~to_ignore_mask) & first_char) << (6 * (num_to_read - 1)); if (utf32 > kUnicodeMaxCodepoint) { return -1; } ret += num_to_read; } return ret; } ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len) { if (src == nullptr || src_len == 0) { Loading libutils/include/utils/Unicode.h +0 −18 Original line number Diff line number Diff line Loading @@ -110,24 +110,6 @@ ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len); */ void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len); /** * Returns the length of "src" when "src" is valid UTF-8 string. * Returns 0 if src is NULL or 0-length string. Returns -1 when the source * is an invalid string. * * This function should be used to determine whether "src" is valid UTF-8 * characters with valid unicode codepoints. "src" must be nul-terminated. * * If you are going to use other utf8_to_... functions defined in this header * with string which may not be valid UTF-8 with valid codepoint (form 0 to * 0x10FFFF), you should use this function before calling others, since the * other functions do not check whether the string is valid UTF-8 or not. * * If you do not care whether "src" is valid UTF-8 or not, you should use * strlen() as usual, which should be much faster. */ ssize_t utf8_length(const char *src); /** * Returns the UTF-16 length of UTF-8 string "src". Returns -1 in case * it's invalid utf8. No buffer over-read occurs because of bound checks. Using overreadIsFatal you Loading Loading
libutils/Unicode.cpp +0 −43 Original line number Diff line number Diff line Loading @@ -359,49 +359,6 @@ void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_le // UTF-8 // -------------------------------------------------------------------------- ssize_t utf8_length(const char *src) { const char *cur = src; size_t ret = 0; while (*cur != '\0') { const char first_char = *cur++; if ((first_char & 0x80) == 0) { // ASCII ret += 1; continue; } // (UTF-8's character must not be like 10xxxxxx, // but 110xxxxx, 1110xxxx, ... or 1111110x) if ((first_char & 0x40) == 0) { return -1; } int32_t mask, to_ignore_mask; size_t num_to_read = 0; char32_t utf32 = 0; for (num_to_read = 1, mask = 0x40, to_ignore_mask = 0x80; num_to_read < 5 && (first_char & mask); num_to_read++, to_ignore_mask |= mask, mask >>= 1) { if ((*cur & 0xC0) != 0x80) { // must be 10xxxxxx return -1; } // 0x3F == 00111111 utf32 = (utf32 << 6) + (*cur++ & 0x3F); } // "first_char" must be (110xxxxx - 11110xxx) if (num_to_read == 5) { return -1; } to_ignore_mask |= mask; utf32 |= ((~to_ignore_mask) & first_char) << (6 * (num_to_read - 1)); if (utf32 > kUnicodeMaxCodepoint) { return -1; } ret += num_to_read; } return ret; } ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len) { if (src == nullptr || src_len == 0) { Loading
libutils/include/utils/Unicode.h +0 −18 Original line number Diff line number Diff line Loading @@ -110,24 +110,6 @@ ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len); */ void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len); /** * Returns the length of "src" when "src" is valid UTF-8 string. * Returns 0 if src is NULL or 0-length string. Returns -1 when the source * is an invalid string. * * This function should be used to determine whether "src" is valid UTF-8 * characters with valid unicode codepoints. "src" must be nul-terminated. * * If you are going to use other utf8_to_... functions defined in this header * with string which may not be valid UTF-8 with valid codepoint (form 0 to * 0x10FFFF), you should use this function before calling others, since the * other functions do not check whether the string is valid UTF-8 or not. * * If you do not care whether "src" is valid UTF-8 or not, you should use * strlen() as usual, which should be much faster. */ ssize_t utf8_length(const char *src); /** * Returns the UTF-16 length of UTF-8 string "src". Returns -1 in case * it's invalid utf8. No buffer over-read occurs because of bound checks. Using overreadIsFatal you Loading