Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0f10d0ab authored by Dianne Hackborn's avatar Dianne Hackborn
Browse files

Add a new utf8-to-utf16 conversion function.

Change-Id: I957c22fb219596ca4239db7a169473d3894b09eb
parent d43eae5e
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -163,6 +163,13 @@ char16_t* utf8_to_utf16_no_null_terminator(const uint8_t* src, size_t srcLen, ch
 */
void utf8_to_utf16(const uint8_t* src, size_t srcLen, char16_t* dst);

/**
 * Like utf8_to_utf16_no_null_terminator, but you can supply a maximum length of the
 * decoded string.  The decoded string will fill up to that length; if it is longer
 * the returned pointer will be to the character after dstLen.
 */
char16_t* utf8_to_utf16_n(const uint8_t* src, size_t srcLen, char16_t* dst, size_t dstLen);

}

#endif
+30 −0
Original line number Diff line number Diff line
@@ -573,4 +573,34 @@ void utf8_to_utf16(const uint8_t* u8str, size_t u8len, char16_t* u16str) {
    *end = 0;
}

char16_t* utf8_to_utf16_n(const uint8_t* src, size_t srcLen, char16_t* dst, size_t dstLen) {
    const uint8_t* const u8end = src + srcLen;
    const uint8_t* u8cur = src;
    const uint16_t* const u16end = dst + dstLen;
    char16_t* u16cur = dst;

    while (u8cur < u8end && u16cur < u16end) {
        size_t u8len = utf8_codepoint_len(*u8cur);
        uint32_t codepoint = utf8_to_utf32_codepoint(u8cur, u8len);

        // Convert the UTF32 codepoint to one or more UTF16 codepoints
        if (codepoint <= 0xFFFF) {
            // Single UTF16 character
            *u16cur++ = (char16_t) codepoint;
        } else {
            // Multiple UTF16 characters with surrogates
            codepoint = codepoint - 0x10000;
            *u16cur++ = (char16_t) ((codepoint >> 10) + 0xD800);
            if (u16cur >= u16end) {
                // Ooops...  not enough room for this surrogate pair.
                return u16cur-1;
            }
            *u16cur++ = (char16_t) ((codepoint & 0x3FF) + 0xDC00);
        }

        u8cur += u8len;
    }
    return u16cur;
}

}