From a853a3d4eb2edb066248a39f0634f6f5858816a0 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Fri, 24 Apr 2009 10:12:18 +0200 Subject: usb: return device strings in UTF-8 Change the encoding of strings returned by usb_string() from ISO 8859-1 to UTF-8. Signed-off-by: Clemens Ladisch Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/message.c | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) (limited to 'drivers/usb/core/message.c') diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index b62628377654..e98f928c08ea 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -759,7 +760,7 @@ static int usb_string_sub(struct usb_device *dev, unsigned int langid, } /** - * usb_string - returns ISO 8859-1 version of a string descriptor + * usb_string - returns UTF-8 version of a string descriptor * @dev: the device whose string descriptor is being retrieved * @index: the number of the descriptor * @buf: where to put the string @@ -767,17 +768,10 @@ static int usb_string_sub(struct usb_device *dev, unsigned int langid, * Context: !in_interrupt () * * This converts the UTF-16LE encoded strings returned by devices, from - * usb_get_string_descriptor(), to null-terminated ISO-8859-1 encoded ones - * that are more usable in most kernel contexts. Note that all characters - * in the chosen descriptor that can't be encoded using ISO-8859-1 - * are converted to the question mark ("?") character, and this function + * usb_get_string_descriptor(), to null-terminated UTF-8 encoded ones + * that are more usable in most kernel contexts. Note that this function * chooses strings in the first language supported by the device. * - * The ASCII (or, redundantly, "US-ASCII") character set is the seven-bit - * subset of ISO 8859-1. ISO-8859-1 is the eight-bit subset of Unicode, - * and is appropriate for use many uses of English and several other - * Western European languages. (But it doesn't include the "Euro" symbol.) - * * This call is synchronous, and may not be used in an interrupt context. * * Returns length of the string (>= 0) or usb_control_msg status (< 0). @@ -786,14 +780,14 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size) { unsigned char *tbuf; int err; - unsigned int u, idx; + unsigned int u; if (dev->state == USB_STATE_SUSPENDED) return -EHOSTUNREACH; if (size <= 0 || !buf || !index) return -EINVAL; buf[0] = 0; - tbuf = kmalloc(256, GFP_NOIO); + tbuf = kmalloc(256 + 2, GFP_NOIO); if (!tbuf) return -ENOMEM; @@ -820,17 +814,13 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size) if (err < 0) goto errout; + for (u = 2; u < err; u += 2) + le16_to_cpus((u16 *)&tbuf[u]); + tbuf[u] = 0; + tbuf[u + 1] = 0; size--; /* leave room for trailing NULL char in output buffer */ - for (idx = 0, u = 2; u < err; u += 2) { - if (idx >= size) - break; - if (tbuf[u+1]) /* high byte */ - buf[idx++] = '?'; /* non ISO-8859-1 character */ - else - buf[idx++] = tbuf[u]; - } - buf[idx] = 0; - err = idx; + err = utf8_wcstombs(buf, (u16 *)&tbuf[2], size); + buf[err] = 0; if (tbuf[1] != USB_DT_STRING) dev_dbg(&dev->dev, @@ -843,6 +833,9 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size) } EXPORT_SYMBOL_GPL(usb_string); +/* one UTF-8-encoded 16-bit character has at most three bytes */ +#define MAX_USB_STRING_SIZE (127 * 3 + 1) + /** * usb_cache_string - read a string descriptor and cache it for later use * @udev: the device whose string descriptor is being read @@ -860,9 +853,9 @@ char *usb_cache_string(struct usb_device *udev, int index) if (index <= 0) return NULL; - buf = kmalloc(256, GFP_KERNEL); + buf = kmalloc(MAX_USB_STRING_SIZE, GFP_KERNEL); if (buf) { - len = usb_string(udev, index, buf, 256); + len = usb_string(udev, index, buf, MAX_USB_STRING_SIZE); if (len > 0) { smallbuf = kmalloc(++len, GFP_KERNEL); if (!smallbuf) -- cgit v1.2.3 From 74675a58507e769beee7d949dbed788af3c4139d Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 30 Apr 2009 10:08:18 -0400 Subject: NLS: update handling of Unicode This patch (as1239) updates the kernel's treatment of Unicode. The character-set conversion routines are well behind the current state of the Unicode specification: They don't recognize the existence of code points beyond plane 0 or of surrogate pairs in the UTF-16 encoding. The old wchar_t 16-bit type is retained because it's still used in lots of places. This shouldn't cause any new problems; if a conversion now results in an invalid 16-bit code then before it must have yielded an undefined code. Difficult-to-read names like "utf_mbstowcs" are replaced with more transparent names like "utf8s_to_utf16s" and the ordering of the parameters is rationalized (buffer lengths come immediate after the pointers they refer to, and the inputs precede the outputs). Fortunately the low-level conversion routines are used in only a few places; the interfaces to the higher-level uni2char and char2uni methods have been left unchanged. Signed-off-by: Alan Stern Acked-by: Clemens Ladisch Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/message.c | 10 +-- fs/befs/linuxvfs.c | 20 +++--- fs/fat/dir.c | 29 ++++---- fs/fat/namei_vfat.c | 4 +- fs/isofs/joliet.c | 36 +--------- fs/ncpfs/ncplib_kernel.c | 8 ++- fs/nls/nls_base.c | 164 +++++++++++++++++++++++++++++---------------- fs/nls/nls_utf8.c | 13 +++- include/linux/nls.h | 35 ++++++++-- 9 files changed, 182 insertions(+), 137 deletions(-) (limited to 'drivers/usb/core/message.c') diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index e98f928c08ea..9bd26dec7599 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -780,14 +780,13 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size) { unsigned char *tbuf; int err; - unsigned int u; if (dev->state == USB_STATE_SUSPENDED) return -EHOSTUNREACH; if (size <= 0 || !buf || !index) return -EINVAL; buf[0] = 0; - tbuf = kmalloc(256 + 2, GFP_NOIO); + tbuf = kmalloc(256, GFP_NOIO); if (!tbuf) return -ENOMEM; @@ -814,12 +813,9 @@ int usb_string(struct usb_device *dev, int index, char *buf, size_t size) if (err < 0) goto errout; - for (u = 2; u < err; u += 2) - le16_to_cpus((u16 *)&tbuf[u]); - tbuf[u] = 0; - tbuf[u + 1] = 0; size--; /* leave room for trailing NULL char in output buffer */ - err = utf8_wcstombs(buf, (u16 *)&tbuf[2], size); + err = utf16s_to_utf8s((wchar_t *) &tbuf[2], (err - 2) / 2, + UTF16_LITTLE_ENDIAN, buf, size); buf[err] = 0; if (tbuf[1] != USB_DT_STRING) diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 9367b6297d84..89cd2deeb4af 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -513,7 +513,7 @@ befs_utf2nls(struct super_block *sb, const char *in, { struct nls_table *nls = BEFS_SB(sb)->nls; int i, o; - wchar_t uni; + unicode_t uni; int unilen, utflen; char *result; /* The utf8->nls conversion won't make the final nls string bigger @@ -539,16 +539,16 @@ befs_utf2nls(struct super_block *sb, const char *in, for (i = o = 0; i < in_len; i += utflen, o += unilen) { /* convert from UTF-8 to Unicode */ - utflen = utf8_mbtowc(&uni, &in[i], in_len - i); - if (utflen < 0) { + utflen = utf8_to_utf32(&in[i], in_len - i, &uni); + if (utflen < 0) goto conv_err; - } /* convert from Unicode to nls */ + if (uni > MAX_WCHAR_T) + goto conv_err; unilen = nls->uni2char(uni, &result[o], in_len - o); - if (unilen < 0) { + if (unilen < 0) goto conv_err; - } } result[o] = '\0'; *out_len = o; @@ -619,15 +619,13 @@ befs_nls2utf(struct super_block *sb, const char *in, /* convert from nls to unicode */ unilen = nls->char2uni(&in[i], in_len - i, &uni); - if (unilen < 0) { + if (unilen < 0) goto conv_err; - } /* convert from unicode to UTF-8 */ - utflen = utf8_wctomb(&result[o], uni, 3); - if (utflen <= 0) { + utflen = utf32_to_utf8(uni, &result[o], 3); + if (utflen <= 0) goto conv_err; - } } result[o] = '\0'; diff --git a/fs/fat/dir.c b/fs/fat/dir.c index f3500294eec5..7c14c8cbbaba 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -22,6 +22,19 @@ #include #include "fat.h" +/* + * Maximum buffer size of short name. + * [(MSDOS_NAME + '.') * max one char + nul] + * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul] + */ +#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1) +/* + * Maximum buffer size of unicode chars from slots. + * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)] + */ +#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1) +#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t)) + static inline loff_t fat_make_i_pos(struct super_block *sb, struct buffer_head *bh, struct msdos_dir_entry *de) @@ -171,7 +184,8 @@ static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni, unsigned char *buf, int size) { if (sbi->options.utf8) - return utf8_wcstombs(buf, uni, size); + return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS, + UTF16_HOST_ENDIAN, buf, size); else return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate, sbi->nls_io); @@ -324,19 +338,6 @@ parse_long: return 0; } -/* - * Maximum buffer size of short name. - * [(MSDOS_NAME + '.') * max one char + nul] - * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul] - */ -#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1) -/* - * Maximum buffer size of unicode chars from slots. - * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)] - */ -#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1) -#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t)) - /* * Return values: negative -> error, 0 -> not found, positive -> found, * value is the total amount of slots, including the shortname entry. diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index b50ecbe97f83..f92ad9995356 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -502,11 +502,11 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, if (utf8) { int name_len = strlen(name); - *outlen = utf8_mbstowcs((wchar_t *)outname, name, PATH_MAX); + *outlen = utf8s_to_utf16s(name, PATH_MAX, (wchar_t *) outname); /* * We stripped '.'s before and set len appropriately, - * but utf8_mbstowcs doesn't care about len + * but utf8s_to_utf16s doesn't care about len */ *outlen -= (name_len - len); diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c index 92c14b850e9c..a048de81c093 100644 --- a/fs/isofs/joliet.c +++ b/fs/isofs/joliet.c @@ -37,37 +37,6 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls) return (op - ascii); } -/* Convert big endian wide character string to utf8 */ -static int -wcsntombs_be(__u8 *s, const __u8 *pwcs, int inlen, int maxlen) -{ - const __u8 *ip; - __u8 *op; - int size; - __u16 c; - - op = s; - ip = pwcs; - while ((*ip || ip[1]) && (maxlen > 0) && (inlen > 0)) { - c = (*ip << 8) | ip[1]; - if (c > 0x7f) { - size = utf8_wctomb(op, c, maxlen); - if (size == -1) { - /* Ignore character and move on */ - maxlen--; - } else { - op += size; - maxlen -= size; - } - } else { - *op++ = (__u8) c; - } - ip += 2; - inlen--; - } - return (op - s); -} - int get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode) { @@ -79,8 +48,9 @@ get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, st nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset; if (utf8) { - len = wcsntombs_be(outname, de->name, - de->name_len[0] >> 1, PAGE_SIZE); + len = utf16s_to_utf8s((const wchar_t *) de->name, + de->name_len[0] >> 1, UTF16_BIG_ENDIAN, + outname, PAGE_SIZE); } else { len = uni16_to_x8(outname, (__be16 *) de->name, de->name_len[0] >> 1, nls); diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c index 97645f112114..0ec6237a5970 100644 --- a/fs/ncpfs/ncplib_kernel.c +++ b/fs/ncpfs/ncplib_kernel.c @@ -1113,11 +1113,13 @@ ncp__io2vol(struct ncp_server *server, unsigned char *vname, unsigned int *vlen, if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) { int k; + unicode_t u; - k = utf8_mbtowc(&ec, iname, iname_end - iname); - if (k < 0) + k = utf8_to_utf32(iname, iname_end - iname, &u); + if (k < 0 || u > MAX_WCHAR_T) return -EINVAL; iname += k; + ec = u; } else { if (*iname == NCP_ESC) { int k; @@ -1214,7 +1216,7 @@ ncp__vol2io(struct ncp_server *server, unsigned char *iname, unsigned int *ilen, if (NCP_IS_FLAG(server, NCP_FLAG_UTF8)) { int k; - k = utf8_wctomb(iname, ec, iname_end - iname); + k = utf32_to_utf8(ec, iname, iname_end - iname); if (k < 0) { err = -ENAMETOOLONG; goto quit; diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 750abf211e26..477d37d83b31 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -15,6 +15,7 @@ #include #include #include +#include static struct nls_table default_table; static struct nls_table *tables = &default_table; @@ -43,10 +44,17 @@ static const struct utf8_table utf8_table[] = {0, /* end of table */} }; -int -utf8_mbtowc(wchar_t *p, const __u8 *s, int n) +#define UNICODE_MAX 0x0010ffff +#define PLANE_SIZE 0x00010000 + +#define SURROGATE_MASK 0xfffff800 +#define SURROGATE_PAIR 0x0000d800 +#define SURROGATE_LOW 0x00000400 +#define SURROGATE_BITS 0x000003ff + +int utf8_to_utf32(const u8 *s, int len, unicode_t *pu) { - long l; + unsigned long l; int c0, c, nc; const struct utf8_table *t; @@ -57,12 +65,13 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n) nc++; if ((c0 & t->cmask) == t->cval) { l &= t->lmask; - if (l < t->lval) + if (l < t->lval || l > UNICODE_MAX || + (l & SURROGATE_MASK) == SURROGATE_PAIR) return -1; - *p = l; + *pu = (unicode_t) l; return nc; } - if (n <= nc) + if (len <= nc) return -1; s++; c = (*s ^ 0x80) & 0xFF; @@ -72,76 +81,119 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n) } return -1; } +EXPORT_SYMBOL(utf8_to_utf32); -int -utf8_mbstowcs(wchar_t *pwcs, const __u8 *s, int n) +int utf32_to_utf8(unicode_t u, u8 *s, int maxlen) { - __u16 *op; - const __u8 *ip; - int size; - - op = pwcs; - ip = s; - while (*ip && n > 0) { - if (*ip & 0x80) { - size = utf8_mbtowc(op, ip, n); - if (size == -1) { - /* Ignore character and move on */ - ip++; - n--; - } else { - op++; - ip += size; - n -= size; - } - } else { - *op++ = *ip++; - n--; - } - } - return (op - pwcs); -} - -int -utf8_wctomb(__u8 *s, wchar_t wc, int maxlen) -{ - long l; + unsigned long l; int c, nc; const struct utf8_table *t; - + if (!s) return 0; - - l = wc; + + l = u; + if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR) + return -1; + nc = 0; for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) { nc++; if (l <= t->lmask) { c = t->shift; - *s = t->cval | (l >> c); + *s = (u8) (t->cval | (l >> c)); while (c > 0) { c -= 6; s++; - *s = 0x80 | ((l >> c) & 0x3F); + *s = (u8) (0x80 | ((l >> c) & 0x3F)); } return nc; } } return -1; } +EXPORT_SYMBOL(utf32_to_utf8); -int -utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen) +int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs) { - const __u16 *ip; - __u8 *op; + u16 *op; int size; + unicode_t u; + + op = pwcs; + while (*s && len > 0) { + if (*s & 0x80) { + size = utf8_to_utf32(s, len, &u); + if (size < 0) { + /* Ignore character and move on */ + size = 1; + } else if (u >= PLANE_SIZE) { + u -= PLANE_SIZE; + *op++ = (wchar_t) (SURROGATE_PAIR | + ((u >> 10) & SURROGATE_BITS)); + *op++ = (wchar_t) (SURROGATE_PAIR | + SURROGATE_LOW | + (u & SURROGATE_BITS)); + } else { + *op++ = (wchar_t) u; + } + s += size; + len -= size; + } else { + *op++ = *s++; + len--; + } + } + return op - pwcs; +} +EXPORT_SYMBOL(utf8s_to_utf16s); + +static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian) +{ + switch (endian) { + default: + return c; + case UTF16_LITTLE_ENDIAN: + return __le16_to_cpu(c); + case UTF16_BIG_ENDIAN: + return __be16_to_cpu(c); + } +} + +int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian, + u8 *s, int maxlen) +{ + u8 *op; + int size; + unsigned long u, v; op = s; - ip = pwcs; - while (*ip && maxlen > 0) { - if (*ip > 0x7f) { - size = utf8_wctomb(op, *ip, maxlen); + while (len > 0 && maxlen > 0) { + u = get_utf16(*pwcs, endian); + if (!u) + break; + pwcs++; + len--; + if (u > 0x7f) { + if ((u & SURROGATE_MASK) == SURROGATE_PAIR) { + if (u & SURROGATE_LOW) { + /* Ignore character and move on */ + continue; + } + if (len <= 0) + break; + v = get_utf16(*pwcs, endian); + if ((v & SURROGATE_MASK) != SURROGATE_PAIR || + !(v & SURROGATE_LOW)) { + /* Ignore character and move on */ + continue; + } + u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10) + + (v & SURROGATE_BITS); + pwcs++; + len--; + } + size = utf32_to_utf8(u, op, maxlen); if (size == -1) { /* Ignore character and move on */ } else { @@ -149,13 +201,13 @@ utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen) maxlen -= size; } } else { - *op++ = (__u8) *ip; + *op++ = (u8) u; maxlen--; } - ip++; } - return (op - s); + return op - s; } +EXPORT_SYMBOL(utf16s_to_utf8s); int register_nls(struct nls_table * nls) { @@ -467,9 +519,5 @@ EXPORT_SYMBOL(unregister_nls); EXPORT_SYMBOL(unload_nls); EXPORT_SYMBOL(load_nls); EXPORT_SYMBOL(load_nls_default); -EXPORT_SYMBOL(utf8_mbtowc); -EXPORT_SYMBOL(utf8_mbstowcs); -EXPORT_SYMBOL(utf8_wctomb); -EXPORT_SYMBOL(utf8_wcstombs); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_utf8.c b/fs/nls/nls_utf8.c index aa2c42fdd977..0d60a44acacd 100644 --- a/fs/nls/nls_utf8.c +++ b/fs/nls/nls_utf8.c @@ -15,7 +15,11 @@ static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { int n; - if ( (n = utf8_wctomb(out, uni, boundlen)) == -1) { + if (boundlen <= 0) + return -ENAMETOOLONG; + + n = utf32_to_utf8(uni, out, boundlen); + if (n < 0) { *out = '?'; return -EINVAL; } @@ -25,11 +29,14 @@ static int uni2char(wchar_t uni, unsigned char *out, int boundlen) static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni) { int n; + unicode_t u; - if ( (n = utf8_mbtowc(uni, rawstring, boundlen)) == -1) { + n = utf8_to_utf32(rawstring, boundlen, &u); + if (n < 0 || u > MAX_WCHAR_T) { *uni = 0x003f; /* ? */ - n = -EINVAL; + return -EINVAL; } + *uni = (wchar_t) u; return n; } diff --git a/include/linux/nls.h b/include/linux/nls.h index 52b1a76c1b43..d47beef08dfd 100644 --- a/include/linux/nls.h +++ b/include/linux/nls.h @@ -3,8 +3,23 @@ #include -/* unicode character */ -typedef __u16 wchar_t; +/* Unicode has changed over the years. Unicode code points no longer + * fit into 16 bits; as of Unicode 5 valid code points range from 0 + * to 0x10ffff (17 planes, where each plane holds 65536 code points). + * + * The original decision to represent Unicode characters as 16-bit + * wchar_t values is now outdated. But plane 0 still includes the + * most commonly used characters, so we will retain it. The newer + * 32-bit unicode_t type can be used when it is necessary to + * represent the full Unicode character set. + */ + +/* Plane-0 Unicode character */ +typedef u16 wchar_t; +#define MAX_WCHAR_T 0xffff + +/* Arbitrary Unicode character */ +typedef u32 unicode_t; struct nls_table { const char *charset; @@ -21,6 +36,13 @@ struct nls_table { /* this value hold the maximum octet of charset */ #define NLS_MAX_CHARSET_SIZE 6 /* for UTF-8 */ +/* Byte order for UTF-16 strings */ +enum utf16_endian { + UTF16_HOST_ENDIAN, + UTF16_LITTLE_ENDIAN, + UTF16_BIG_ENDIAN +}; + /* nls.c */ extern int register_nls(struct nls_table *); extern int unregister_nls(struct nls_table *); @@ -28,10 +50,11 @@ extern struct nls_table *load_nls(char *); extern void unload_nls(struct nls_table *); extern struct nls_table *load_nls_default(void); -extern int utf8_mbtowc(wchar_t *, const __u8 *, int); -extern int utf8_mbstowcs(wchar_t *, const __u8 *, int); -extern int utf8_wctomb(__u8 *, wchar_t, int); -extern int utf8_wcstombs(__u8 *, const wchar_t *, int); +extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu); +extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen); +extern int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs); +extern int utf16s_to_utf8s(const wchar_t *pwcs, int len, + enum utf16_endian endian, u8 *s, int maxlen); static inline unsigned char nls_tolower(struct nls_table *t, unsigned char c) { -- cgit v1.2.3 From 79abb1ab13cee5ba488210798b6e7bbae0b391ac Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Mon, 27 Apr 2009 19:58:26 -0700 Subject: USB: Support for bandwidth allocation. Originally, the USB core had no support for allocating bandwidth when a particular configuration or alternate setting for an interface was selected. Instead, the device driver's URB submission would fail if there was not enough bandwidth for a periodic endpoint. Drivers could work around this, by using the scatter-gather list API to guarantee bandwidth. This patch adds host controller API to allow the USB core to allocate or deallocate bandwidth for an endpoint. Endpoints are added to or dropped from a copy of the current schedule by calling add_endpoint() or drop_endpoint(), and then the schedule is atomically evaluated with a call to check_bandwidth(). This allows all the endpoints for a new configuration or alternate setting to be added at the same time that the endpoints from the old configuration or alt setting are dropped. Endpoints must be added to the schedule before any URBs are submitted to them. The HCD must be allowed to reject a new configuration or alt setting before the control transfer is sent to the device requesting the change. It may reject the change because there is not enough bandwidth, not enough internal resources (such as memory on an embedded host controller), or perhaps even for security reasons in a virtualized environment. If the call to check_bandwidth() fails, the USB core must call reset_bandwidth(). This causes the schedule to be reverted back to the state it was in just after the last successful check_bandwidth() call. If the call succeeds, the host controller driver (and hardware) will have changed its internal state to match the new configuration or alternate setting. The USB core can then issue a control transfer to the device to change the configuration or alt setting. This allows the core to test new configurations or alternate settings before unbinding drivers bound to interfaces in the old configuration. WIP: The USB core must add endpoints from all interfaces in a configuration to the schedule, because a driver may claim that interface at any time. A slight optimization might be to add the endpoints to the schedule once a driver claims that interface. FIXME This patch does not cover changing alternate settings, but it does handle a configuration change or de-configuration. FIXME The code for managing the schedule is currently HCD specific. A generic scheduling algorithm could be added for host controllers without built-in scheduling support. For now, if a host controller does not define the check_bandwidth() function, the call to usb_hcd_check_bandwidth() will always succeed. Signed-off-by: Sarah Sharp Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/usb/core/hcd.h | 32 +++++++++++++++++ drivers/usb/core/message.c | 20 +++++++++++ drivers/usb/core/urb.c | 6 ++++ 4 files changed, 144 insertions(+) (limited to 'drivers/usb/core/message.c') diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 823744d80cb9..b2da4753b12e 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1560,6 +1560,92 @@ rescan: } } +/* Check whether a new configuration or alt setting for an interface + * will exceed the bandwidth for the bus (or the host controller resources). + * Only pass in a non-NULL config or interface, not both! + * Passing NULL for both new_config and new_intf means the device will be + * de-configured by issuing a set configuration 0 command. + */ +int usb_hcd_check_bandwidth(struct usb_device *udev, + struct usb_host_config *new_config, + struct usb_interface *new_intf) +{ + int num_intfs, i, j; + struct usb_interface_cache *intf_cache; + struct usb_host_interface *alt = 0; + int ret = 0; + struct usb_hcd *hcd; + struct usb_host_endpoint *ep; + + hcd = bus_to_hcd(udev->bus); + if (!hcd->driver->check_bandwidth) + return 0; + + /* Configuration is being removed - set configuration 0 */ + if (!new_config && !new_intf) { + for (i = 1; i < 16; ++i) { + ep = udev->ep_out[i]; + if (ep) + hcd->driver->drop_endpoint(hcd, udev, ep); + ep = udev->ep_in[i]; + if (ep) + hcd->driver->drop_endpoint(hcd, udev, ep); + } + hcd->driver->check_bandwidth(hcd, udev); + return 0; + } + /* Check if the HCD says there's enough bandwidth. Enable all endpoints + * each interface's alt setting 0 and ask the HCD to check the bandwidth + * of the bus. There will always be bandwidth for endpoint 0, so it's + * ok to exclude it. + */ + if (new_config) { + num_intfs = new_config->desc.bNumInterfaces; + /* Remove endpoints (except endpoint 0, which is always on the + * schedule) from the old config from the schedule + */ + for (i = 1; i < 16; ++i) { + ep = udev->ep_out[i]; + if (ep) { + ret = hcd->driver->drop_endpoint(hcd, udev, ep); + if (ret < 0) + goto reset; + } + ep = udev->ep_in[i]; + if (ep) { + ret = hcd->driver->drop_endpoint(hcd, udev, ep); + if (ret < 0) + goto reset; + } + } + for (i = 0; i < num_intfs; ++i) { + + /* Dig the endpoints for alt setting 0 out of the + * interface cache for this interface + */ + intf_cache = new_config->intf_cache[i]; + for (j = 0; j < intf_cache->num_altsetting; j++) { + if (intf_cache->altsetting[j].desc.bAlternateSetting == 0) + alt = &intf_cache->altsetting[j]; + } + if (!alt) { + printk(KERN_DEBUG "Did not find alt setting 0 for intf %d\n", i); + continue; + } + for (j = 0; j < alt->desc.bNumEndpoints; j++) { + ret = hcd->driver->add_endpoint(hcd, udev, &alt->endpoint[j]); + if (ret < 0) + goto reset; + } + } + } + ret = hcd->driver->check_bandwidth(hcd, udev); +reset: + if (ret < 0) + hcd->driver->reset_bandwidth(hcd, udev); + return ret; +} + /* Disables the endpoint: synchronizes with the hcd to make sure all * endpoint state is gone from hardware. usb_hcd_flush_endpoint() must * have been called previously. Use for set_configuration, set_interface, diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h index ae6d9db41ca9..d397ecfd5b17 100644 --- a/drivers/usb/core/hcd.h +++ b/drivers/usb/core/hcd.h @@ -232,6 +232,35 @@ struct hc_driver { int (*alloc_dev)(struct usb_hcd *, struct usb_device *); /* Called by usb_release_dev to free HC device structures */ void (*free_dev)(struct usb_hcd *, struct usb_device *); + + /* Bandwidth computation functions */ + /* Note that add_endpoint() can only be called once per endpoint before + * check_bandwidth() or reset_bandwidth() must be called. + * drop_endpoint() can only be called once per endpoint also. + * A call to xhci_drop_endpoint() followed by a call to xhci_add_endpoint() will + * add the endpoint to the schedule with possibly new parameters denoted by a + * different endpoint descriptor in usb_host_endpoint. + * A call to xhci_add_endpoint() followed by a call to xhci_drop_endpoint() is + * not allowed. + */ + /* Allocate endpoint resources and add them to a new schedule */ + int (*add_endpoint)(struct usb_hcd *, struct usb_device *, struct usb_host_endpoint *); + /* Drop an endpoint from a new schedule */ + int (*drop_endpoint)(struct usb_hcd *, struct usb_device *, struct usb_host_endpoint *); + /* Check that a new hardware configuration, set using + * endpoint_enable and endpoint_disable, does not exceed bus + * bandwidth. This must be called before any set configuration + * or set interface requests are sent to the device. + */ + int (*check_bandwidth)(struct usb_hcd *, struct usb_device *); + /* Reset the device schedule to the last known good schedule, + * which was set from a previous successful call to + * check_bandwidth(). This reverts any add_endpoint() and + * drop_endpoint() calls since that last successful call. + * Used for when a check_bandwidth() call fails due to resource + * or bandwidth constraints. + */ + void (*reset_bandwidth)(struct usb_hcd *, struct usb_device *); /* Returns the hardware-chosen device address */ int (*address_device)(struct usb_hcd *, struct usb_device *udev); }; @@ -252,6 +281,9 @@ extern void usb_hcd_disable_endpoint(struct usb_device *udev, extern void usb_hcd_reset_endpoint(struct usb_device *udev, struct usb_host_endpoint *ep); extern void usb_hcd_synchronize_unlinks(struct usb_device *udev); +extern int usb_hcd_check_bandwidth(struct usb_device *udev, + struct usb_host_config *new_config, + struct usb_interface *new_intf); extern int usb_hcd_get_frame_number(struct usb_device *udev); extern struct usb_hcd *usb_create_hcd(const struct hc_driver *driver, diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 9bd26dec7599..3a2e69ec2f29 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -510,6 +510,10 @@ EXPORT_SYMBOL_GPL(usb_sg_init); * could be transferred. That capability is less useful for low or full * speed interrupt endpoints, which allow at most one packet per millisecond, * of at most 8 or 64 bytes (respectively). + * + * It is not necessary to call this function to reserve bandwidth for devices + * under an xHCI host controller, as the bandwidth is reserved when the + * configuration or interface alt setting is selected. */ void usb_sg_wait(struct usb_sg_request *io) { @@ -1653,6 +1657,21 @@ free_interfaces: if (ret) goto free_interfaces; + /* Make sure we have bandwidth (and available HCD resources) for this + * configuration. Remove endpoints from the schedule if we're dropping + * this configuration to set configuration 0. After this point, the + * host controller will not allow submissions to dropped endpoints. If + * this call fails, the device state is unchanged. + */ + if (cp) + ret = usb_hcd_check_bandwidth(dev, cp, NULL); + else + ret = usb_hcd_check_bandwidth(dev, NULL, NULL); + if (ret < 0) { + usb_autosuspend_device(dev); + goto free_interfaces; + } + /* if it's already configured, clear out old state first. * getting rid of old interfaces means unbinding their drivers. */ @@ -1675,6 +1694,7 @@ free_interfaces: dev->actconfig = cp; if (!cp) { usb_set_device_state(dev, USB_STATE_ADDRESS); + usb_hcd_check_bandwidth(dev, NULL, NULL); usb_autosuspend_device(dev); goto free_interfaces; } diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index 02eb0ef7a4c3..0885d4abdc62 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -241,6 +241,12 @@ EXPORT_SYMBOL_GPL(usb_unanchor_urb); * If the USB subsystem can't allocate sufficient bandwidth to perform * the periodic request, submitting such a periodic request should fail. * + * For devices under xHCI, the bandwidth is reserved at configuration time, or + * when the alt setting is selected. If there is not enough bus bandwidth, the + * configuration/alt setting request will fail. Therefore, submissions to + * periodic endpoints on devices under xHCI should never fail due to bandwidth + * constraints. + * * Device drivers must explicitly request that repetition, by ensuring that * some URB is always on the endpoint's queue (except possibly for short * periods during completion callacks). When there is no longer an urb -- cgit v1.2.3 From e04748e3a87271fcf30d383e3780c5d3ee1c1618 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Mon, 27 Apr 2009 19:59:01 -0700 Subject: USB: Push scatter gather lists down to host controller drivers. This is the original patch I created before David Vrabel posted a better patch (http://marc.info/?l=linux-usb&m=123377477209109&w=2) that does basically the same thing. This patch will get replaced with his (modified) patch later. Allow USB device drivers that use usb_sg_init() and usb_sg_wait() to push bulk endpoint scatter gather lists down to the host controller drivers. This allows host controller drivers to more efficiently enqueue these transfers, and allows the xHCI host controller to better take advantage of USB 3.0 "bursts" for bulk endpoints. This patch currently only enables scatter gather lists for bulk endpoints. Other endpoint types that use the usb_sg_* functions will not have their scatter gather lists pushed down to the host controller. For periodic endpoints, we want each scatterlist entry to be a separate transfer. Eventually, HCDs could parse these scatter-gather lists for periodic endpoints also. For now, we use the old code and call usb_submit_urb() for each scatterlist entry. The caller of usb_sg_init() can request that all bytes in the scatter gather list be transferred by passing in a length of zero. Handle that request for a bulk endpoint under xHCI by walking the scatter gather list and calculating the length. We could let the HCD handle a zero length in this case, but I'm not sure if the core layers in between will get confused by this. Signed-off-by: Sarah Sharp Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 3 +- drivers/usb/core/message.c | 139 +++++++++++++++++++++++++++++---------------- include/linux/usb.h | 2 + 3 files changed, 95 insertions(+), 49 deletions(-) (limited to 'drivers/usb/core/message.c') diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index b2da4753b12e..1609623ec829 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1239,7 +1239,8 @@ static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, /* Map the URB's buffers for DMA access. * Lower level HCD code should use *_dma exclusively, - * unless it uses pio or talks to another transport. + * unless it uses pio or talks to another transport, + * or uses the provided scatter gather list for bulk. */ if (is_root_hub(urb->dev)) return 0; diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 3a2e69ec2f29..2bed83caacb1 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -365,6 +365,7 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev, int i; int urb_flags; int dma; + int use_sg; if (!io || !dev || !sg || usb_pipecontrol(pipe) @@ -392,7 +393,19 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev, if (io->entries <= 0) return io->entries; - io->urbs = kmalloc(io->entries * sizeof *io->urbs, mem_flags); + /* If we're running on an xHCI host controller, queue the whole scatter + * gather list with one call to urb_enqueue(). This is only for bulk, + * as that endpoint type does not care how the data gets broken up + * across frames. + */ + if (usb_pipebulk(pipe) && + bus_to_hcd(dev->bus)->driver->flags & HCD_USB3) { + io->urbs = kmalloc(sizeof *io->urbs, mem_flags); + use_sg = true; + } else { + io->urbs = kmalloc(io->entries * sizeof *io->urbs, mem_flags); + use_sg = false; + } if (!io->urbs) goto nomem; @@ -402,62 +415,92 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev, if (usb_pipein(pipe)) urb_flags |= URB_SHORT_NOT_OK; - for_each_sg(sg, sg, io->entries, i) { - unsigned len; - - io->urbs[i] = usb_alloc_urb(0, mem_flags); - if (!io->urbs[i]) { - io->entries = i; + if (use_sg) { + io->urbs[0] = usb_alloc_urb(0, mem_flags); + if (!io->urbs[0]) { + io->entries = 0; goto nomem; } - io->urbs[i]->dev = NULL; - io->urbs[i]->pipe = pipe; - io->urbs[i]->interval = period; - io->urbs[i]->transfer_flags = urb_flags; - - io->urbs[i]->complete = sg_complete; - io->urbs[i]->context = io; - - /* - * Some systems need to revert to PIO when DMA is temporarily - * unavailable. For their sakes, both transfer_buffer and - * transfer_dma are set when possible. However this can only - * work on systems without: - * - * - HIGHMEM, since DMA buffers located in high memory are - * not directly addressable by the CPU for PIO; - * - * - IOMMU, since dma_map_sg() is allowed to use an IOMMU to - * make virtually discontiguous buffers be "dma-contiguous" - * so that PIO and DMA need diferent numbers of URBs. - * - * So when HIGHMEM or IOMMU are in use, transfer_buffer is NULL - * to prevent stale pointers and to help spot bugs. - */ - if (dma) { - io->urbs[i]->transfer_dma = sg_dma_address(sg); - len = sg_dma_len(sg); + io->urbs[0]->dev = NULL; + io->urbs[0]->pipe = pipe; + io->urbs[0]->interval = period; + io->urbs[0]->transfer_flags = urb_flags; + + io->urbs[0]->complete = sg_complete; + io->urbs[0]->context = io; + /* A length of zero means transfer the whole sg list */ + io->urbs[0]->transfer_buffer_length = length; + if (length == 0) { + for_each_sg(sg, sg, io->entries, i) { + io->urbs[0]->transfer_buffer_length += + sg_dma_len(sg); + } + } + io->urbs[0]->sg = io; + io->urbs[0]->num_sgs = io->entries; + io->entries = 1; + } else { + for_each_sg(sg, sg, io->entries, i) { + unsigned len; + + io->urbs[i] = usb_alloc_urb(0, mem_flags); + if (!io->urbs[i]) { + io->entries = i; + goto nomem; + } + + io->urbs[i]->dev = NULL; + io->urbs[i]->pipe = pipe; + io->urbs[i]->interval = period; + io->urbs[i]->transfer_flags = urb_flags; + + io->urbs[i]->complete = sg_complete; + io->urbs[i]->context = io; + + /* + * Some systems need to revert to PIO when DMA is + * temporarily unavailable. For their sakes, both + * transfer_buffer and transfer_dma are set when + * possible. However this can only work on systems + * without: + * + * - HIGHMEM, since DMA buffers located in high memory + * are not directly addressable by the CPU for PIO; + * + * - IOMMU, since dma_map_sg() is allowed to use an + * IOMMU to make virtually discontiguous buffers be + * "dma-contiguous" so that PIO and DMA need diferent + * numbers of URBs. + * + * So when HIGHMEM or IOMMU are in use, transfer_buffer + * is NULL to prevent stale pointers and to help spot + * bugs. + */ + if (dma) { + io->urbs[i]->transfer_dma = sg_dma_address(sg); + len = sg_dma_len(sg); #if defined(CONFIG_HIGHMEM) || defined(CONFIG_GART_IOMMU) - io->urbs[i]->transfer_buffer = NULL; + io->urbs[i]->transfer_buffer = NULL; #else - io->urbs[i]->transfer_buffer = sg_virt(sg); + io->urbs[i]->transfer_buffer = sg_virt(sg); #endif - } else { - /* hc may use _only_ transfer_buffer */ - io->urbs[i]->transfer_buffer = sg_virt(sg); - len = sg->length; - } + } else { + /* hc may use _only_ transfer_buffer */ + io->urbs[i]->transfer_buffer = sg_virt(sg); + len = sg->length; + } - if (length) { - len = min_t(unsigned, len, length); - length -= len; - if (length == 0) - io->entries = i + 1; + if (length) { + len = min_t(unsigned, len, length); + length -= len; + if (length == 0) + io->entries = i + 1; + } + io->urbs[i]->transfer_buffer_length = len; } - io->urbs[i]->transfer_buffer_length = len; + io->urbs[--i]->transfer_flags &= ~URB_NO_INTERRUPT; } - io->urbs[--i]->transfer_flags &= ~URB_NO_INTERRUPT; /* transaction state */ io->count = io->entries; diff --git a/include/linux/usb.h b/include/linux/usb.h index 13bced521b83..0a1819a64974 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1198,6 +1198,8 @@ struct urb { unsigned int transfer_flags; /* (in) URB_SHORT_NOT_OK | ...*/ void *transfer_buffer; /* (in) associated data buffer */ dma_addr_t transfer_dma; /* (in) dma addr for transfer_buffer */ + struct usb_sg_request *sg; /* (in) scatter gather buffer list */ + int num_sgs; /* (in) number of entries in the sg list */ u32 transfer_buffer_length; /* (in) data buffer length */ u32 actual_length; /* (return) actual transfer length */ unsigned char *setup_packet; /* (in) setup packet (control only) */ -- cgit v1.2.3