Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0720a06a authored by Alan Stern's avatar Alan Stern Committed by Greg Kroah-Hartman
Browse files

NLS: improve UTF8 -> UTF16 string conversion routine



The utf8s_to_utf16s conversion routine needs to be improved.  Unlike
its utf16s_to_utf8s sibling, it doesn't accept arguments specifying
the maximum length of the output buffer or the endianness of its
16-bit output.

This patch (as1501) adds the two missing arguments, and adjusts the
only two places in the kernel where the function is called.  A
follow-on patch will add a third caller that does utilize the new
capabilities.

The two conversion routines are still annoyingly inconsistent in the
way they handle invalid byte combinations.  But that's a subject for a
different patch.

Signed-off-by: default avatarAlan Stern <stern@rowland.harvard.edu>
CC: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@suse.de>
parent b7463c71
Loading
Loading
Loading
Loading
+6 −4
Original line number Original line Diff line number Diff line
@@ -212,11 +212,13 @@ kvp_respond_to_host(char *key, char *value, int error)
	 * The windows host expects the key/value pair to be encoded
	 * The windows host expects the key/value pair to be encoded
	 * in utf16.
	 * in utf16.
	 */
	 */
	keylen = utf8s_to_utf16s(key_name, strlen(key_name),
	keylen = utf8s_to_utf16s(key_name, strlen(key_name), UTF16_HOST_ENDIAN,
				(wchar_t *)kvp_data->data.key);
				(wchar_t *) kvp_data->data.key,
				HV_KVP_EXCHANGE_MAX_KEY_SIZE / 2);
	kvp_data->data.key_size = 2*(keylen + 1); /* utf16 encoding */
	kvp_data->data.key_size = 2*(keylen + 1); /* utf16 encoding */
	valuelen = utf8s_to_utf16s(value, strlen(value),
	valuelen = utf8s_to_utf16s(value, strlen(value), UTF16_HOST_ENDIAN,
				(wchar_t *)kvp_data->data.value);
				(wchar_t *) kvp_data->data.value,
				HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2);
	kvp_data->data.value_size = 2*(valuelen + 1); /* utf16 encoding */
	kvp_data->data.value_size = 2*(valuelen + 1); /* utf16 encoding */


	kvp_data->data.value_type = REG_SZ; /* all our values are strings */
	kvp_data->data.value_type = REG_SZ; /* all our values are strings */
+2 −1
Original line number Original line Diff line number Diff line
@@ -512,7 +512,8 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
	int charlen;
	int charlen;


	if (utf8) {
	if (utf8) {
		*outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname);
		*outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN,
				(wchar_t *) outname, FAT_LFN_LEN + 2);
		if (*outlen < 0)
		if (*outlen < 0)
			return *outlen;
			return *outlen;
		else if (*outlen > FAT_LFN_LEN)
		else if (*outlen > FAT_LFN_LEN)
+33 −10
Original line number Original line Diff line number Diff line
@@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
}
}
EXPORT_SYMBOL(utf32_to_utf8);
EXPORT_SYMBOL(utf32_to_utf8);


int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian)
{
	switch (endian) {
	default:
		*s = (wchar_t) c;
		break;
	case UTF16_LITTLE_ENDIAN:
		*s = __cpu_to_le16(c);
		break;
	case UTF16_BIG_ENDIAN:
		*s = __cpu_to_be16(c);
		break;
	}
}

int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian,
		wchar_t *pwcs, int maxlen)
{
{
	u16 *op;
	u16 *op;
	int size;
	int size;
	unicode_t u;
	unicode_t u;


	op = pwcs;
	op = pwcs;
	while (*s && len > 0) {
	while (len > 0 && maxlen > 0 && *s) {
		if (*s & 0x80) {
		if (*s & 0x80) {
			size = utf8_to_utf32(s, len, &u);
			size = utf8_to_utf32(s, len, &u);
			if (size < 0)
			if (size < 0)
				return -EINVAL;
				return -EINVAL;
			s += size;
			len -= size;


			if (u >= PLANE_SIZE) {
			if (u >= PLANE_SIZE) {
				if (maxlen < 2)
					break;
				u -= PLANE_SIZE;
				u -= PLANE_SIZE;
				*op++ = (wchar_t) (SURROGATE_PAIR |
				put_utf16(op++, SURROGATE_PAIR |
						((u >> 10) & SURROGATE_BITS));
						((u >> 10) & SURROGATE_BITS),
				*op++ = (wchar_t) (SURROGATE_PAIR |
						endian);
				put_utf16(op++, SURROGATE_PAIR |
						SURROGATE_LOW |
						SURROGATE_LOW |
						(u & SURROGATE_BITS));
						(u & SURROGATE_BITS),
						endian);
				maxlen -= 2;
			} else {
			} else {
				*op++ = (wchar_t) u;
				put_utf16(op++, u, endian);
				maxlen--;
			}
			}
			s += size;
			len -= size;
		} else {
		} else {
			*op++ = *s++;
			put_utf16(op++, *s++, endian);
			len--;
			len--;
			maxlen--;
		}
		}
	}
	}
	return op - pwcs;
	return op - pwcs;
+3 −2
Original line number Original line Diff line number Diff line
@@ -43,7 +43,7 @@ enum utf16_endian {
	UTF16_BIG_ENDIAN
	UTF16_BIG_ENDIAN
};
};


/* nls.c */
/* nls_base.c */
extern int register_nls(struct nls_table *);
extern int register_nls(struct nls_table *);
extern int unregister_nls(struct nls_table *);
extern int unregister_nls(struct nls_table *);
extern struct nls_table *load_nls(char *);
extern struct nls_table *load_nls(char *);
@@ -52,7 +52,8 @@ extern struct nls_table *load_nls_default(void);


extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu);
extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu);
extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen);
extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen);
extern int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs);
extern int utf8s_to_utf16s(const u8 *s, int len,
		enum utf16_endian endian, wchar_t *pwcs, int maxlen);
extern int utf16s_to_utf8s(const wchar_t *pwcs, int len,
extern int utf16s_to_utf8s(const wchar_t *pwcs, int len,
		enum utf16_endian endian, u8 *s, int maxlen);
		enum utf16_endian endian, u8 *s, int maxlen);