Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 74675a58 authored by Alan Stern's avatar Alan Stern Committed by Greg Kroah-Hartman
Browse files

NLS: update handling of Unicode



This patch (as1239) updates the kernel's treatment of Unicode.  The
character-set conversion routines are well behind the current state of
the Unicode specification: They don't recognize the existence of code
points beyond plane 0 or of surrogate pairs in the UTF-16 encoding.

The old wchar_t 16-bit type is retained because it's still used in
lots of places.  This shouldn't cause any new problems; if a
conversion now results in an invalid 16-bit code then before it must
have yielded an undefined code.

Difficult-to-read names like "utf_mbstowcs" are replaced with more
transparent names like "utf8s_to_utf16s" and the ordering of the
parameters is rationalized (buffer lengths come immediate after the
pointers they refer to, and the inputs precede the outputs).
Fortunately the low-level conversion routines are used in only a few
places; the interfaces to the higher-level uni2char and char2uni
methods have been left unchanged.

Signed-off-by: default avatarAlan Stern <stern@rowland.harvard.edu>
Acked-by: default avatarClemens Ladisch <clemens@ladisch.de>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@suse.de>
parent a853a3d4
Loading
Loading
Loading
Loading
+3 −7
Original line number Diff line number Diff line
@@ -780,14 +780,13 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size)
{
	unsigned char *tbuf;
	int err;
	unsigned int u;

	if (dev->state == USB_STATE_SUSPENDED)
		return -EHOSTUNREACH;
	if (size <= 0 || !buf || !index)
		return -EINVAL;
	buf[0] = 0;
	tbuf = kmalloc(256 + 2, GFP_NOIO);
	tbuf = kmalloc(256, GFP_NOIO);
	if (!tbuf)
		return -ENOMEM;

@@ -814,12 +813,9 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size)
	if (err < 0)
		goto errout;

	for (u = 2; u < err; u += 2)
		le16_to_cpus((u16 *)&tbuf[u]);
	tbuf[u] = 0;
	tbuf[u + 1] = 0;
	size--;		/* leave room for trailing NULL char in output buffer */
	err = utf8_wcstombs(buf, (u16 *)&tbuf[2], size);
	err = utf16s_to_utf8s((wchar_t *) &tbuf[2], (err - 2) / 2,
			UTF16_LITTLE_ENDIAN, buf, size);
	buf[err] = 0;

	if (tbuf[1] != USB_DT_STRING)
+9 −11
Original line number Diff line number Diff line
@@ -513,7 +513,7 @@ befs_utf2nls(struct super_block *sb, const char *in,
{
	struct nls_table *nls = BEFS_SB(sb)->nls;
	int i, o;
	wchar_t uni;
	unicode_t uni;
	int unilen, utflen;
	char *result;
	/* The utf8->nls conversion won't make the final nls string bigger
@@ -539,17 +539,17 @@ befs_utf2nls(struct super_block *sb, const char *in,
	for (i = o = 0; i < in_len; i += utflen, o += unilen) {

		/* convert from UTF-8 to Unicode */
		utflen = utf8_mbtowc(&uni, &in[i], in_len - i);
		if (utflen < 0) {
		utflen = utf8_to_utf32(&in[i], in_len - i, &uni);
		if (utflen < 0)
			goto conv_err;
		}

		/* convert from Unicode to nls */
		if (uni > MAX_WCHAR_T)
			goto conv_err;
		unilen = nls->uni2char(uni, &result[o], in_len - o);
		if (unilen < 0) {
		if (unilen < 0)
			goto conv_err;
	}
	}
	result[o] = '\0';
	*out_len = o;

@@ -619,16 +619,14 @@ befs_nls2utf(struct super_block *sb, const char *in,

		/* convert from nls to unicode */
		unilen = nls->char2uni(&in[i], in_len - i, &uni);
		if (unilen < 0) {
		if (unilen < 0)
			goto conv_err;
		}

		/* convert from unicode to UTF-8 */
		utflen = utf8_wctomb(&result[o], uni, 3);
		if (utflen <= 0) {
		utflen = utf32_to_utf8(uni, &result[o], 3);
		if (utflen <= 0)
			goto conv_err;
	}
	}

	result[o] = '\0';
	*out_len = o;
+15 −14
Original line number Diff line number Diff line
@@ -22,6 +22,19 @@
#include <asm/uaccess.h>
#include "fat.h"

/*
 * Maximum buffer size of short name.
 * [(MSDOS_NAME + '.') * max one char + nul]
 * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
 */
#define FAT_MAX_SHORT_SIZE	((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
/*
 * Maximum buffer size of unicode chars from slots.
 * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
 */
#define FAT_MAX_UNI_CHARS	((MSDOS_SLOTS - 1) * 13 + 1)
#define FAT_MAX_UNI_SIZE	(FAT_MAX_UNI_CHARS * sizeof(wchar_t))

static inline loff_t fat_make_i_pos(struct super_block *sb,
				    struct buffer_head *bh,
				    struct msdos_dir_entry *de)
@@ -171,7 +184,8 @@ static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
				unsigned char *buf, int size)
{
	if (sbi->options.utf8)
		return utf8_wcstombs(buf, uni, size);
		return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS,
				UTF16_HOST_ENDIAN, buf, size);
	else
		return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
				   sbi->nls_io);
@@ -324,19 +338,6 @@ static int fat_parse_long(struct inode *dir, loff_t *pos,
	return 0;
}

/*
 * Maximum buffer size of short name.
 * [(MSDOS_NAME + '.') * max one char + nul]
 * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
 */
#define FAT_MAX_SHORT_SIZE	((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
/*
 * Maximum buffer size of unicode chars from slots.
 * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
 */
#define FAT_MAX_UNI_CHARS	((MSDOS_SLOTS - 1) * 13 + 1)
#define FAT_MAX_UNI_SIZE	(FAT_MAX_UNI_CHARS * sizeof(wchar_t))

/*
 * Return values: negative -> error, 0 -> not found, positive -> found,
 * value is the total amount of slots, including the shortname entry.
+2 −2
Original line number Diff line number Diff line
@@ -502,11 +502,11 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
	if (utf8) {
		int name_len = strlen(name);

		*outlen = utf8_mbstowcs((wchar_t *)outname, name, PATH_MAX);
		*outlen = utf8s_to_utf16s(name, PATH_MAX, (wchar_t *) outname);

		/*
		 * We stripped '.'s before and set len appropriately,
		 * but utf8_mbstowcs doesn't care about len
		 * but utf8s_to_utf16s doesn't care about len
		 */
		*outlen -= (name_len - len);

+3 −33
Original line number Diff line number Diff line
@@ -37,37 +37,6 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls)
	return (op - ascii);
}

/* Convert big endian wide character string to utf8 */
static int
wcsntombs_be(__u8 *s, const __u8 *pwcs, int inlen, int maxlen)
{
	const __u8 *ip;
	__u8 *op;
	int size;
	__u16 c;

	op = s;
	ip = pwcs;
	while ((*ip || ip[1]) && (maxlen > 0) && (inlen > 0)) {
		c = (*ip << 8) | ip[1];
		if (c > 0x7f) {
			size = utf8_wctomb(op, c, maxlen);
			if (size == -1) {
				/* Ignore character and move on */
				maxlen--;
			} else {
				op += size;
				maxlen -= size;
			}
		} else {
			*op++ = (__u8) c;
		}
		ip += 2;
		inlen--;
	}
	return (op - s);
}

int
get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode)
{
@@ -79,8 +48,9 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st
	nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;

	if (utf8) {
		len = wcsntombs_be(outname, de->name,
				de->name_len[0] >> 1, PAGE_SIZE);
		len = utf16s_to_utf8s((const wchar_t *) de->name,
				de->name_len[0] >> 1, UTF16_BIG_ENDIAN,
				outname, PAGE_SIZE);
	} else {
		len = uni16_to_x8(outname, (__be16 *) de->name,
				de->name_len[0] >> 1, nls);
Loading