From 525e2c56c341cb8b31bbe1694f0582077f454969 Mon Sep 17 00:00:00 2001 From: Andrew Gabbasov Date: Fri, 15 Jan 2016 02:44:19 -0600 Subject: udf: Parameterize output length in udf_put_filename Make the desired output length a parameter rather than have it hard-coded to UDF_NAME_LEN. Although all call sites still have this length the same, this parameterization will make the function more universal and also consistent with udf_get_filename. Signed-off-by: Andrew Gabbasov Signed-off-by: Jan Kara --- fs/udf/unicode.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/udf/unicode.c') diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index e788a05aab83..47e61883275d 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -395,22 +395,22 @@ out1: return ret; } -int udf_put_filename(struct super_block *sb, const uint8_t *sname, - uint8_t *dname, int flen) +int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen, + uint8_t *dname, int dlen) { struct ustr unifilename; int namelen; - if (!udf_char_to_ustr(&unifilename, sname, flen)) + if (!udf_char_to_ustr(&unifilename, sname, slen)) return 0; if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { - namelen = udf_UTF8toCS0(dname, &unifilename, UDF_NAME_LEN); + namelen = udf_UTF8toCS0(dname, &unifilename, dlen); if (!namelen) return 0; } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname, - &unifilename, UDF_NAME_LEN); + &unifilename, dlen); if (!namelen) return 0; } else -- cgit v1.2.3 From 3e7fc2055c931b1c27a9834a753611c879492a34 Mon Sep 17 00:00:00 2001 From: Andrew Gabbasov Date: Fri, 15 Jan 2016 02:44:20 -0600 Subject: udf: Join functions for UTF8 and NLS conversions There is no much sense to have separate functions for UTF8 and NLS conversions, since UTF8 encoding is actually the special case of NLS. However, although UTF8 is also supported by general NLS framework, it would be good to have separate UTF8 character conversion functions (char2uni and uni2char) locally in UDF code, so that they could be used even if NLS support is not enabled in the kernel configuration. Signed-off-by: Andrew Gabbasov Signed-off-by: Jan Kara --- fs/udf/unicode.c | 278 ++++++++++++++++++------------------------------------- 1 file changed, 90 insertions(+), 188 deletions(-) (limited to 'fs/udf/unicode.c') diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 47e61883275d..4d7a674ebce5 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -76,151 +76,72 @@ static void udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize) memcpy(dest->u_name, ptr + 1, exactsize - 1); } -/* - * udf_CS0toUTF8 - * - * PURPOSE - * Convert OSTA Compressed Unicode to the UTF-8 equivalent. - * - * PRE-CONDITIONS - * utf Pointer to UTF-8 output buffer. - * ocu Pointer to OSTA Compressed Unicode input buffer - * of size UDF_NAME_LEN bytes. - * both of type "struct ustr *" - * - * POST-CONDITIONS - * >= 0 on success. - * - * HISTORY - * November 12, 1997 - Andrew E. Mileski - * Written, tested, and released. - */ -int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i) +static int udf_uni2char_utf8(wchar_t uni, + unsigned char *out, + int boundlen) { - const uint8_t *ocu; - uint8_t cmp_id, ocu_len; - int i; - - ocu_len = ocu_i->u_len; - if (ocu_len == 0) { - memset(utf_o, 0, sizeof(struct ustr)); - return 0; - } - - cmp_id = ocu_i->u_cmpID; - if (cmp_id != 8 && cmp_id != 16) { - memset(utf_o, 0, sizeof(struct ustr)); - pr_err("unknown compression code (%d) stri=%s\n", - cmp_id, ocu_i->u_name); - return -EINVAL; - } - - ocu = ocu_i->u_name; - utf_o->u_len = 0; - for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) { - - /* Expand OSTA compressed Unicode to Unicode */ - uint32_t c = ocu[i++]; - if (cmp_id == 16) - c = (c << 8) | ocu[i++]; - - /* Compress Unicode to UTF-8 */ - if (c < 0x80U) - utf_o->u_name[utf_o->u_len++] = (uint8_t)c; - else if (c < 0x800U) { - if (utf_o->u_len > (UDF_NAME_LEN - 4)) - break; - utf_o->u_name[utf_o->u_len++] = - (uint8_t)(0xc0 | (c >> 6)); - utf_o->u_name[utf_o->u_len++] = - (uint8_t)(0x80 | (c & 0x3f)); - } else { - if (utf_o->u_len > (UDF_NAME_LEN - 5)) - break; - utf_o->u_name[utf_o->u_len++] = - (uint8_t)(0xe0 | (c >> 12)); - utf_o->u_name[utf_o->u_len++] = - (uint8_t)(0x80 | - ((c >> 6) & 0x3f)); - utf_o->u_name[utf_o->u_len++] = - (uint8_t)(0x80 | (c & 0x3f)); - } + int u_len = 0; + + if (boundlen <= 0) + return -ENAMETOOLONG; + + if (uni < 0x80) { + out[u_len++] = (unsigned char)uni; + } else if (uni < 0x800) { + if (boundlen < 2) + return -ENAMETOOLONG; + out[u_len++] = (unsigned char)(0xc0 | (uni >> 6)); + out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f)); + } else { + if (boundlen < 3) + return -ENAMETOOLONG; + out[u_len++] = (unsigned char)(0xe0 | (uni >> 12)); + out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f)); + out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f)); } - utf_o->u_cmpID = 8; - - return utf_o->u_len; + return u_len; } -/* - * - * udf_UTF8toCS0 - * - * PURPOSE - * Convert UTF-8 to the OSTA Compressed Unicode equivalent. - * - * DESCRIPTION - * This routine is only called by udf_lookup(). - * - * PRE-CONDITIONS - * ocu Pointer to OSTA Compressed Unicode output - * buffer of size UDF_NAME_LEN bytes. - * utf Pointer to UTF-8 input buffer. - * utf_len Length of UTF-8 input buffer in bytes. - * - * POST-CONDITIONS - * Zero on success. - * - * HISTORY - * November 12, 1997 - Andrew E. Mileski - * Written, tested, and released. - */ -static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length) +static int udf_char2uni_utf8(const unsigned char *in, + int boundlen, + wchar_t *uni) { - unsigned c, i, max_val, utf_char; - int utf_cnt, u_len, u_ch; + unsigned int utf_char; + unsigned char c; + int utf_cnt, u_len; - memset(ocu, 0, sizeof(dstring) * length); - ocu[0] = 8; - max_val = 0xffU; - u_ch = 1; - -try_again: - u_len = 0U; - utf_char = 0U; - utf_cnt = 0U; - for (i = 0U; i < utf->u_len; i++) { - /* Name didn't fit? */ - if (u_len + 1 + u_ch >= length) - return 0; - - c = (uint8_t)utf->u_name[i]; + utf_char = 0; + utf_cnt = 0; + for (u_len = 0; u_len < boundlen;) { + c = in[u_len++]; /* Complete a multi-byte UTF-8 character */ if (utf_cnt) { - utf_char = (utf_char << 6) | (c & 0x3fU); + utf_char = (utf_char << 6) | (c & 0x3f); if (--utf_cnt) continue; } else { /* Check for a multi-byte UTF-8 character */ - if (c & 0x80U) { + if (c & 0x80) { /* Start a multi-byte UTF-8 character */ - if ((c & 0xe0U) == 0xc0U) { - utf_char = c & 0x1fU; + if ((c & 0xe0) == 0xc0) { + utf_char = c & 0x1f; utf_cnt = 1; - } else if ((c & 0xf0U) == 0xe0U) { - utf_char = c & 0x0fU; + } else if ((c & 0xf0) == 0xe0) { + utf_char = c & 0x0f; utf_cnt = 2; - } else if ((c & 0xf8U) == 0xf0U) { - utf_char = c & 0x07U; + } else if ((c & 0xf8) == 0xf0) { + utf_char = c & 0x07; utf_cnt = 3; - } else if ((c & 0xfcU) == 0xf8U) { - utf_char = c & 0x03U; + } else if ((c & 0xfc) == 0xf8) { + utf_char = c & 0x03; utf_cnt = 4; - } else if ((c & 0xfeU) == 0xfcU) { - utf_char = c & 0x01U; + } else if ((c & 0xfe) == 0xfc) { + utf_char = c & 0x01; utf_cnt = 5; } else { - goto error_out; + utf_cnt = -1; + break; } continue; } else { @@ -228,36 +149,19 @@ try_again: utf_char = c; } } - - /* Choose no compression if necessary */ - if (utf_char > max_val) { - if (max_val == 0xffU) { - max_val = 0xffffU; - ocu[0] = (uint8_t)0x10U; - u_ch = 2; - goto try_again; - } - goto error_out; - } - - if (max_val == 0xffffU) - ocu[++u_len] = (uint8_t)(utf_char >> 8); - ocu[++u_len] = (uint8_t)(utf_char & 0xffU); + *uni = utf_char; + break; } - if (utf_cnt) { -error_out: - ocu[++u_len] = '?'; - printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n")); + *uni = '?'; + return -EINVAL; } - - ocu[length - 1] = (uint8_t)u_len + 1; - - return u_len + 1; + return u_len; } -static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, - const struct ustr *ocu_i) +static int udf_name_from_CS0(struct ustr *utf_o, + const struct ustr *ocu_i, + int (*conv_f)(wchar_t, unsigned char *, int)) { const uint8_t *ocu; uint8_t cmp_id, ocu_len; @@ -286,11 +190,13 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, if (cmp_id == 16) c = (c << 8) | ocu[i++]; - len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len], - UDF_NAME_LEN - 2 - utf_o->u_len); + len = conv_f(c, &utf_o->u_name[utf_o->u_len], + UDF_NAME_LEN - 2 - utf_o->u_len); /* Valid character? */ if (len >= 0) utf_o->u_len += len; + else if (len == -ENAMETOOLONG) + break; else utf_o->u_name[utf_o->u_len++] = '?'; } @@ -299,26 +205,26 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, return utf_o->u_len; } -static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, - int length) +static int udf_name_to_CS0(dstring *ocu, struct ustr *uni, int length, + int (*conv_f)(const unsigned char *, int, wchar_t *)) { - int len; - unsigned i, max_val; - uint16_t uni_char; + int i, len; + unsigned int max_val; + wchar_t uni_char; int u_len, u_ch; memset(ocu, 0, sizeof(dstring) * length); ocu[0] = 8; - max_val = 0xffU; + max_val = 0xff; u_ch = 1; try_again: - u_len = 0U; - for (i = 0U; i < uni->u_len; i++) { + u_len = 0; + for (i = 0; i < uni->u_len; i++) { /* Name didn't fit? */ if (u_len + 1 + u_ch >= length) return 0; - len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char); + len = conv_f(&uni->u_name[i], uni->u_len - i, &uni_char); if (!len) continue; /* Invalid character, deal with it */ @@ -328,15 +234,15 @@ try_again: } if (uni_char > max_val) { - max_val = 0xffffU; - ocu[0] = (uint8_t)0x10U; + max_val = 0xffff; + ocu[0] = 0x10; u_ch = 2; goto try_again; } - if (max_val == 0xffffU) + if (max_val == 0xffff) ocu[++u_len] = (uint8_t)(uni_char >> 8); - ocu[++u_len] = (uint8_t)(uni_char & 0xffU); + ocu[++u_len] = (uint8_t)(uni_char & 0xff); i += len - 1; } @@ -344,10 +250,16 @@ try_again: return u_len + 1; } +int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i) +{ + return udf_name_from_CS0(utf_o, ocu_i, udf_uni2char_utf8); +} + int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen, uint8_t *dname, int dlen) { struct ustr *filename, *unifilename; + int (*conv_f)(wchar_t, unsigned char *, int); int ret; if (!slen) @@ -365,23 +277,18 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen, udf_build_ustr_exact(unifilename, sname, slen); if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { - ret = udf_CS0toUTF8(filename, unifilename); - if (ret < 0) { - udf_debug("Failed in udf_get_filename: sname = %s\n", - sname); - goto out2; - } + conv_f = udf_uni2char_utf8; } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { - ret = udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename, - unifilename); - if (ret < 0) { - udf_debug("Failed in udf_get_filename: sname = %s\n", - sname); - goto out2; - } + conv_f = UDF_SB(sb)->s_nls_map->uni2char; } else BUG(); + ret = udf_name_from_CS0(filename, unifilename, conv_f); + if (ret < 0) { + udf_debug("Failed in udf_get_filename: sname = %s\n", sname); + goto out2; + } + ret = udf_translate_to_linux(dname, dlen, filename->u_name, filename->u_len, unifilename->u_name, unifilename->u_len); @@ -399,24 +306,19 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen, uint8_t *dname, int dlen) { struct ustr unifilename; - int namelen; + int (*conv_f)(const unsigned char *, int, wchar_t *); if (!udf_char_to_ustr(&unifilename, sname, slen)) return 0; if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { - namelen = udf_UTF8toCS0(dname, &unifilename, dlen); - if (!namelen) - return 0; + conv_f = udf_char2uni_utf8; } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { - namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname, - &unifilename, dlen); - if (!namelen) - return 0; + conv_f = UDF_SB(sb)->s_nls_map->char2uni; } else - return 0; + BUG(); - return namelen; + return udf_name_to_CS0(dname, &unifilename, dlen, conv_f); } #define ILLEGAL_CHAR_MARK '_' -- cgit v1.2.3 From 9fba70569d9c3c253dba10ebbe3359f2157e504c Mon Sep 17 00:00:00 2001 From: Andrew Gabbasov Date: Fri, 15 Jan 2016 02:44:21 -0600 Subject: udf: Adjust UDF_NAME_LEN to better reflect actual restrictions Actual name length restriction is 254 bytes, this is used in 'ustr' structure, and this is what fits into UDF File Ident structures. And in most cases the constant is used as UDF_NAME_LEN-2. So, it's better to just modify the constant to make it closer to reality. Also, in some cases it's useful to have a separate constant for the maximum length of file name field in CS0 encoding in UDF File Ident structures. Also, remove the unused UDF_PATH_LEN constant. Signed-off-by: Andrew Gabbasov Signed-off-by: Jan Kara --- fs/udf/namei.c | 10 +++++----- fs/udf/super.c | 2 +- fs/udf/udfdecl.h | 6 +++--- fs/udf/unicode.c | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) (limited to 'fs/udf/unicode.c') diff --git a/fs/udf/namei.c b/fs/udf/namei.c index f82c70d73aba..9eb9c6440270 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -291,7 +291,7 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, struct udf_fileident_bh fibh; struct fileIdentDesc *fi; - if (dentry->d_name.len > UDF_NAME_LEN - 2) + if (dentry->d_name.len > UDF_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); #ifdef UDF_RECOVERY @@ -351,7 +351,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, struct udf_inode_info *dinfo; fibh->sbh = fibh->ebh = NULL; - name = kmalloc(UDF_NAME_LEN, GFP_NOFS); + name = kmalloc(UDF_NAME_LEN_CS0, GFP_NOFS); if (!name) { *err = -ENOMEM; goto out_err; @@ -364,7 +364,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, } namelen = udf_put_filename(sb, dentry->d_name.name, dentry->d_name.len, - name, UDF_NAME_LEN); + name, UDF_NAME_LEN_CS0); if (!namelen) { *err = -ENAMETOOLONG; goto out_err; @@ -915,7 +915,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, iinfo = UDF_I(inode); down_write(&iinfo->i_data_sem); - name = kmalloc(UDF_NAME_LEN, GFP_NOFS); + name = kmalloc(UDF_NAME_LEN_CS0, GFP_NOFS); if (!name) { err = -ENOMEM; goto out_no_entry; @@ -1000,7 +1000,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, if (pc->componentType == 5) { namelen = udf_put_filename(sb, compstart, symname - compstart, - name, UDF_NAME_LEN); + name, UDF_NAME_LEN_CS0); if (!namelen) goto out_no_entry; diff --git a/fs/udf/super.c b/fs/udf/super.c index a522c15a0bfd..ffb35f7eab38 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -2358,7 +2358,7 @@ static int udf_statfs(struct dentry *dentry, struct kstatfs *buf) le32_to_cpu(lvidiu->numDirs)) : 0) + buf->f_bfree; buf->f_ffree = buf->f_bfree; - buf->f_namelen = UDF_NAME_LEN - 2; + buf->f_namelen = UDF_NAME_LEN; buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[1] = (u32)(id >> 32); diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 4a47c7267614..47a228248c5b 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -49,8 +49,8 @@ extern __printf(3, 4) void _udf_warn(struct super_block *sb, #define UDF_EXTENT_FLAG_MASK 0xC0000000 #define UDF_NAME_PAD 4 -#define UDF_NAME_LEN 256 -#define UDF_PATH_LEN 1023 +#define UDF_NAME_LEN 254 +#define UDF_NAME_LEN_CS0 255 static inline size_t udf_file_entry_alloc_offset(struct inode *inode) { @@ -108,7 +108,7 @@ struct generic_desc { struct ustr { uint8_t u_cmpID; - uint8_t u_name[UDF_NAME_LEN - 2]; + uint8_t u_name[UDF_NAME_LEN]; uint8_t u_len; }; diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 4d7a674ebce5..5599e7535401 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -33,7 +33,7 @@ static int udf_translate_to_linux(uint8_t *, int, uint8_t *, int, uint8_t *, static int udf_char_to_ustr(struct ustr *dest, const uint8_t *src, int strlen) { - if ((!dest) || (!src) || (!strlen) || (strlen > UDF_NAME_LEN - 2)) + if ((!dest) || (!src) || (!strlen) || (strlen > UDF_NAME_LEN)) return 0; memset(dest, 0, sizeof(struct ustr)); @@ -184,14 +184,14 @@ static int udf_name_from_CS0(struct ustr *utf_o, ocu = ocu_i->u_name; utf_o->u_len = 0; - for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) { + for (i = 0; (i < ocu_len) && (utf_o->u_len < UDF_NAME_LEN);) { /* Expand OSTA compressed Unicode to Unicode */ uint32_t c = ocu[i++]; if (cmp_id == 16) c = (c << 8) | ocu[i++]; len = conv_f(c, &utf_o->u_name[utf_o->u_len], - UDF_NAME_LEN - 2 - utf_o->u_len); + UDF_NAME_LEN - utf_o->u_len); /* Valid character? */ if (len >= 0) utf_o->u_len += len; -- cgit v1.2.3 From 9293fcfbc1812a22ad5ce1b542eb90c1bbe01be1 Mon Sep 17 00:00:00 2001 From: Andrew Gabbasov Date: Fri, 15 Jan 2016 02:44:22 -0600 Subject: udf: Remove struct ustr as non-needed intermediate storage Although 'struct ustr' tries to structurize the data by combining the string and its length, it doesn't actually make much benefit, since it saves only one parameter, but introduces an extra copying of the whole buffer, serving as an intermediate storage. It looks quite inefficient and not actually needed. This commit gets rid of the struct ustr by changing the parameters of some functions appropriately. Also, it removes using 'dstring' type, since it doesn't make much sense too. Just using the occasion, add a 'const' qualifier to udf_get_filename to make consistent parameters sets. Signed-off-by: Andrew Gabbasov Signed-off-by: Jan Kara --- fs/udf/super.c | 36 +++++--------- fs/udf/udfdecl.h | 13 ++--- fs/udf/unicode.c | 146 +++++++++++++++++-------------------------------------- 3 files changed, 61 insertions(+), 134 deletions(-) (limited to 'fs/udf/unicode.c') diff --git a/fs/udf/super.c b/fs/udf/super.c index ffb35f7eab38..fa92fe839fda 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -887,18 +887,14 @@ static int udf_find_fileset(struct super_block *sb, static int udf_load_pvoldesc(struct super_block *sb, sector_t block) { struct primaryVolDesc *pvoldesc; - struct ustr *instr, *outstr; + uint8_t *outstr; struct buffer_head *bh; uint16_t ident; int ret = -ENOMEM; - instr = kmalloc(sizeof(struct ustr), GFP_NOFS); - if (!instr) - return -ENOMEM; - - outstr = kmalloc(sizeof(struct ustr), GFP_NOFS); + outstr = kmalloc(128, GFP_NOFS); if (!outstr) - goto out1; + return -ENOMEM; bh = udf_read_tagged(sb, block, block, &ident); if (!bh) { @@ -923,31 +919,25 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block) #endif } - if (!udf_build_ustr(instr, pvoldesc->volIdent, 32)) { - ret = udf_CS0toUTF8(outstr, instr); - if (ret < 0) - goto out_bh; + ret = udf_CS0toUTF8(outstr, 31, pvoldesc->volIdent, 32); + if (ret < 0) + goto out_bh; - strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name, - outstr->u_len > 31 ? 31 : outstr->u_len); - udf_debug("volIdent[] = '%s'\n", UDF_SB(sb)->s_volume_ident); - } + strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret); + udf_debug("volIdent[] = '%s'\n", UDF_SB(sb)->s_volume_ident); - if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128)) { - ret = udf_CS0toUTF8(outstr, instr); - if (ret < 0) - goto out_bh; + ret = udf_CS0toUTF8(outstr, 127, pvoldesc->volSetIdent, 128); + if (ret < 0) + goto out_bh; - udf_debug("volSetIdent[] = '%s'\n", outstr->u_name); - } + outstr[ret] = 0; + udf_debug("volSetIdent[] = '%s'\n", outstr); ret = 0; out_bh: brelse(bh); out2: kfree(outstr); -out1: - kfree(instr); return ret; } diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index 47a228248c5b..972b70625614 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -106,12 +106,6 @@ struct generic_desc { __le32 volDescSeqNum; }; -struct ustr { - uint8_t u_cmpID; - uint8_t u_name[UDF_NAME_LEN]; - uint8_t u_len; -}; - /* super.c */ @@ -214,12 +208,11 @@ udf_get_lb_pblock(struct super_block *sb, struct kernel_lb_addr *loc, } /* unicode.c */ -extern int udf_get_filename(struct super_block *, uint8_t *, int, uint8_t *, - int); +extern int udf_get_filename(struct super_block *, const uint8_t *, int, + uint8_t *, int); extern int udf_put_filename(struct super_block *, const uint8_t *, int, uint8_t *, int); -extern int udf_build_ustr(struct ustr *, dstring *, int); -extern int udf_CS0toUTF8(struct ustr *, const struct ustr *); +extern int udf_CS0toUTF8(uint8_t *, int, const uint8_t *, int); /* ialloc.c */ extern void udf_free_inode(struct inode *); diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 5599e7535401..dc5990f4c952 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -28,53 +28,8 @@ #include "udf_sb.h" -static int udf_translate_to_linux(uint8_t *, int, uint8_t *, int, uint8_t *, - int); - -static int udf_char_to_ustr(struct ustr *dest, const uint8_t *src, int strlen) -{ - if ((!dest) || (!src) || (!strlen) || (strlen > UDF_NAME_LEN)) - return 0; - - memset(dest, 0, sizeof(struct ustr)); - memcpy(dest->u_name, src, strlen); - dest->u_cmpID = 0x08; - dest->u_len = strlen; - - return strlen; -} - -/* - * udf_build_ustr - */ -int udf_build_ustr(struct ustr *dest, dstring *ptr, int size) -{ - int usesize; - - if (!dest || !ptr || !size) - return -1; - BUG_ON(size < 2); - - usesize = min_t(size_t, ptr[size - 1], sizeof(dest->u_name)); - usesize = min(usesize, size - 2); - dest->u_cmpID = ptr[0]; - dest->u_len = usesize; - memcpy(dest->u_name, ptr + 1, usesize); - memset(dest->u_name + usesize, 0, sizeof(dest->u_name) - usesize); - - return 0; -} - -/* - * udf_build_ustr_exact - */ -static void udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize) -{ - memset(dest, 0, sizeof(struct ustr)); - dest->u_cmpID = ptr[0]; - dest->u_len = exactsize - 1; - memcpy(dest->u_name, ptr + 1, exactsize - 1); -} +static int udf_translate_to_linux(uint8_t *, int, const uint8_t *, int, + const uint8_t *, int); static int udf_uni2char_utf8(wchar_t uni, unsigned char *out, @@ -159,53 +114,50 @@ static int udf_char2uni_utf8(const unsigned char *in, return u_len; } -static int udf_name_from_CS0(struct ustr *utf_o, - const struct ustr *ocu_i, +static int udf_name_from_CS0(uint8_t *str_o, int str_max_len, + const uint8_t *ocu, int ocu_len, int (*conv_f)(wchar_t, unsigned char *, int)) { - const uint8_t *ocu; - uint8_t cmp_id, ocu_len; + uint8_t cmp_id; int i, len; + int str_o_len = 0; + if (str_max_len <= 0) + return 0; - ocu_len = ocu_i->u_len; if (ocu_len == 0) { - memset(utf_o, 0, sizeof(struct ustr)); + memset(str_o, 0, str_max_len); return 0; } - cmp_id = ocu_i->u_cmpID; + cmp_id = ocu[0]; if (cmp_id != 8 && cmp_id != 16) { - memset(utf_o, 0, sizeof(struct ustr)); - pr_err("unknown compression code (%d) stri=%s\n", - cmp_id, ocu_i->u_name); + memset(str_o, 0, str_max_len); + pr_err("unknown compression code (%d) stri=%s\n", cmp_id, ocu); return -EINVAL; } - ocu = ocu_i->u_name; - utf_o->u_len = 0; - for (i = 0; (i < ocu_len) && (utf_o->u_len < UDF_NAME_LEN);) { + for (i = 1; (i < ocu_len) && (str_o_len < str_max_len);) { /* Expand OSTA compressed Unicode to Unicode */ uint32_t c = ocu[i++]; if (cmp_id == 16) c = (c << 8) | ocu[i++]; - len = conv_f(c, &utf_o->u_name[utf_o->u_len], - UDF_NAME_LEN - utf_o->u_len); + len = conv_f(c, &str_o[str_o_len], str_max_len - str_o_len); /* Valid character? */ if (len >= 0) - utf_o->u_len += len; + str_o_len += len; else if (len == -ENAMETOOLONG) break; else - utf_o->u_name[utf_o->u_len++] = '?'; + str_o[str_o_len++] = '?'; } - utf_o->u_cmpID = 8; - return utf_o->u_len; + return str_o_len; } -static int udf_name_to_CS0(dstring *ocu, struct ustr *uni, int length, +static int udf_name_to_CS0(uint8_t *ocu, int ocu_max_len, + const uint8_t *str_i, int str_len, int (*conv_f)(const unsigned char *, int, wchar_t *)) { int i, len; @@ -213,18 +165,21 @@ static int udf_name_to_CS0(dstring *ocu, struct ustr *uni, int length, wchar_t uni_char; int u_len, u_ch; - memset(ocu, 0, sizeof(dstring) * length); + if (ocu_max_len <= 0) + return 0; + + memset(ocu, 0, ocu_max_len); ocu[0] = 8; max_val = 0xff; u_ch = 1; try_again: - u_len = 0; - for (i = 0; i < uni->u_len; i++) { + u_len = 1; + for (i = 0; i < str_len; i++) { /* Name didn't fit? */ - if (u_len + 1 + u_ch >= length) + if (u_len + u_ch > ocu_max_len) return 0; - len = conv_f(&uni->u_name[i], uni->u_len - i, &uni_char); + len = conv_f(&str_i[i], str_len - i, &uni_char); if (!len) continue; /* Invalid character, deal with it */ @@ -241,41 +196,37 @@ try_again: } if (max_val == 0xffff) - ocu[++u_len] = (uint8_t)(uni_char >> 8); - ocu[++u_len] = (uint8_t)(uni_char & 0xff); + ocu[u_len++] = (uint8_t)(uni_char >> 8); + ocu[u_len++] = (uint8_t)(uni_char & 0xff); i += len - 1; } - ocu[length - 1] = (uint8_t)u_len + 1; - return u_len + 1; + return u_len; } -int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i) +int udf_CS0toUTF8(uint8_t *utf_o, int o_len, const uint8_t *ocu_i, int i_len) { - return udf_name_from_CS0(utf_o, ocu_i, udf_uni2char_utf8); + return udf_name_from_CS0(utf_o, o_len, ocu_i, i_len, + udf_uni2char_utf8); } -int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen, +int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, uint8_t *dname, int dlen) { - struct ustr *filename, *unifilename; + uint8_t *filename; int (*conv_f)(wchar_t, unsigned char *, int); int ret; if (!slen) return -EIO; - filename = kmalloc(sizeof(struct ustr), GFP_NOFS); + if (dlen <= 0) + return 0; + + filename = kmalloc(dlen, GFP_NOFS); if (!filename) return -ENOMEM; - unifilename = kmalloc(sizeof(struct ustr), GFP_NOFS); - if (!unifilename) { - ret = -ENOMEM; - goto out1; - } - - udf_build_ustr_exact(unifilename, sname, slen); if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { conv_f = udf_uni2char_utf8; } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { @@ -283,21 +234,18 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen, } else BUG(); - ret = udf_name_from_CS0(filename, unifilename, conv_f); + ret = udf_name_from_CS0(filename, dlen, sname, slen, conv_f); if (ret < 0) { udf_debug("Failed in udf_get_filename: sname = %s\n", sname); goto out2; } - ret = udf_translate_to_linux(dname, dlen, - filename->u_name, filename->u_len, - unifilename->u_name, unifilename->u_len); + ret = udf_translate_to_linux(dname, dlen, filename, dlen, + sname + 1, slen - 1); /* Zero length filename isn't valid... */ if (ret == 0) ret = -EINVAL; out2: - kfree(unifilename); -out1: kfree(filename); return ret; } @@ -305,12 +253,8 @@ out1: int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen, uint8_t *dname, int dlen) { - struct ustr unifilename; int (*conv_f)(const unsigned char *, int, wchar_t *); - if (!udf_char_to_ustr(&unifilename, sname, slen)) - return 0; - if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { conv_f = udf_char2uni_utf8; } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { @@ -318,7 +262,7 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen, } else BUG(); - return udf_name_to_CS0(dname, &unifilename, dlen, conv_f); + return udf_name_to_CS0(dname, dlen, sname, slen, conv_f); } #define ILLEGAL_CHAR_MARK '_' @@ -329,8 +273,8 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen, #define CRC_LEN 5 static int udf_translate_to_linux(uint8_t *newName, int newLen, - uint8_t *udfName, int udfLen, - uint8_t *fidName, int fidNameLen) + const uint8_t *udfName, int udfLen, + const uint8_t *fidName, int fidNameLen) { int index, newIndex = 0, needsCRC = 0; int extIndex = 0, newExtIndex = 0, hasExt = 0; -- cgit v1.2.3 From 484a10f49387e4386bf2708532e75bf78ffea2cb Mon Sep 17 00:00:00 2001 From: Andrew Gabbasov Date: Fri, 15 Jan 2016 02:44:23 -0600 Subject: udf: Merge linux specific translation into CS0 conversion function Current implementation of udf_translate_to_linux function does not support multi-bytes characters at all: it counts bytes while calculating extension length, when inserting CRC inside the name it doesn't take into account inter-character boundaries and can break into the middle of the character. The most efficient way to properly support multi-bytes characters is merging of translation operations directly into conversion function. This can help to avoid extra passes along the string or parsing the multi-bytes character back into unicode to find out it's length. Signed-off-by: Andrew Gabbasov Signed-off-by: Jan Kara --- fs/udf/unicode.c | 280 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 152 insertions(+), 128 deletions(-) (limited to 'fs/udf/unicode.c') diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index dc5990f4c952..3ff42f4437f3 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -28,9 +28,6 @@ #include "udf_sb.h" -static int udf_translate_to_linux(uint8_t *, int, const uint8_t *, int, - const uint8_t *, int); - static int udf_uni2char_utf8(wchar_t uni, unsigned char *out, int boundlen) @@ -114,13 +111,83 @@ static int udf_char2uni_utf8(const unsigned char *in, return u_len; } +#define ILLEGAL_CHAR_MARK '_' +#define EXT_MARK '.' +#define CRC_MARK '#' +#define EXT_SIZE 5 +/* Number of chars we need to store generated CRC to make filename unique */ +#define CRC_LEN 5 + +static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len, + int *str_o_idx, + const uint8_t *str_i, int str_i_max_len, + int *str_i_idx, + int u_ch, int *needsCRC, + int (*conv_f)(wchar_t, unsigned char *, int), + int translate) +{ + uint32_t c; + int illChar = 0; + int len, gotch = 0; + + for (; (!gotch) && (*str_i_idx < str_i_max_len); *str_i_idx += u_ch) { + if (*str_o_idx >= str_o_max_len) { + *needsCRC = 1; + return gotch; + } + + /* Expand OSTA compressed Unicode to Unicode */ + c = str_i[*str_i_idx]; + if (u_ch > 1) + c = (c << 8) | str_i[*str_i_idx + 1]; + + if (translate && (c == '/' || c == 0)) + illChar = 1; + else if (illChar) + break; + else + gotch = 1; + } + if (illChar) { + *needsCRC = 1; + c = ILLEGAL_CHAR_MARK; + gotch = 1; + } + if (gotch) { + len = conv_f(c, &str_o[*str_o_idx], str_o_max_len - *str_o_idx); + /* Valid character? */ + if (len >= 0) + *str_o_idx += len; + else if (len == -ENAMETOOLONG) { + *needsCRC = 1; + gotch = 0; + } else { + str_o[(*str_o_idx)++] = '?'; + *needsCRC = 1; + } + } + return gotch; +} + static int udf_name_from_CS0(uint8_t *str_o, int str_max_len, const uint8_t *ocu, int ocu_len, - int (*conv_f)(wchar_t, unsigned char *, int)) + int (*conv_f)(wchar_t, unsigned char *, int), + int translate) { + uint32_t c; uint8_t cmp_id; - int i, len; - int str_o_len = 0; + int idx, len; + int u_ch; + int needsCRC = 0; + int ext_i_len, ext_max_len; + int str_o_len = 0; /* Length of resulting output */ + int ext_o_len = 0; /* Extension output length */ + int ext_crc_len = 0; /* Extension output length if used with CRC */ + int i_ext = -1; /* Extension position in input buffer */ + int o_crc = 0; /* Rightmost possible output pos for CRC+ext */ + unsigned short valueCRC; + uint8_t ext[EXT_SIZE * NLS_MAX_CHARSET_SIZE + 1]; + uint8_t crc[CRC_LEN]; if (str_max_len <= 0) return 0; @@ -133,24 +200,88 @@ static int udf_name_from_CS0(uint8_t *str_o, int str_max_len, cmp_id = ocu[0]; if (cmp_id != 8 && cmp_id != 16) { memset(str_o, 0, str_max_len); - pr_err("unknown compression code (%d) stri=%s\n", cmp_id, ocu); + pr_err("unknown compression code (%d)\n", cmp_id); return -EINVAL; } + u_ch = cmp_id >> 3; - for (i = 1; (i < ocu_len) && (str_o_len < str_max_len);) { - /* Expand OSTA compressed Unicode to Unicode */ - uint32_t c = ocu[i++]; - if (cmp_id == 16) - c = (c << 8) | ocu[i++]; + ocu++; + ocu_len--; - len = conv_f(c, &str_o[str_o_len], str_max_len - str_o_len); - /* Valid character? */ - if (len >= 0) - str_o_len += len; - else if (len == -ENAMETOOLONG) + if (ocu_len % u_ch) { + pr_err("incorrect filename length (%d)\n", ocu_len + 1); + return -EINVAL; + } + + if (translate) { + /* Look for extension */ + for (idx = ocu_len - u_ch, ext_i_len = 0; + (idx >= 0) && (ext_i_len < EXT_SIZE); + idx -= u_ch, ext_i_len++) { + c = ocu[idx]; + if (u_ch > 1) + c = (c << 8) | ocu[idx + 1]; + + if (c == EXT_MARK) { + if (ext_i_len) + i_ext = idx; + break; + } + } + if (i_ext >= 0) { + /* Convert extension */ + ext_max_len = min_t(int, sizeof(ext), str_max_len); + ext[ext_o_len++] = EXT_MARK; + idx = i_ext + u_ch; + while (udf_name_conv_char(ext, ext_max_len, &ext_o_len, + ocu, ocu_len, &idx, + u_ch, &needsCRC, + conv_f, translate)) { + if ((ext_o_len + CRC_LEN) < str_max_len) + ext_crc_len = ext_o_len; + } + } + } + + idx = 0; + while (1) { + if (translate && (idx == i_ext)) { + if (str_o_len > (str_max_len - ext_o_len)) + needsCRC = 1; break; - else - str_o[str_o_len++] = '?'; + } + + if (!udf_name_conv_char(str_o, str_max_len, &str_o_len, + ocu, ocu_len, &idx, + u_ch, &needsCRC, conv_f, translate)) + break; + + if (translate && + (str_o_len <= (str_max_len - ext_o_len - CRC_LEN))) + o_crc = str_o_len; + } + + if (translate) { + if (str_o_len <= 2 && str_o[0] == '.' && + (str_o_len == 1 || str_o[1] == '.')) + needsCRC = 1; + if (needsCRC) { + str_o_len = o_crc; + valueCRC = crc_itu_t(0, ocu, ocu_len); + crc[0] = CRC_MARK; + crc[1] = hex_asc_upper_hi(valueCRC >> 8); + crc[2] = hex_asc_upper_lo(valueCRC >> 8); + crc[3] = hex_asc_upper_hi(valueCRC); + crc[4] = hex_asc_upper_lo(valueCRC); + len = min_t(int, CRC_LEN, str_max_len - str_o_len); + memcpy(&str_o[str_o_len], crc, len); + str_o_len += len; + ext_o_len = ext_crc_len; + } + if (ext_o_len > 0) { + memcpy(&str_o[str_o_len], ext, ext_o_len); + str_o_len += ext_o_len; + } } return str_o_len; @@ -207,13 +338,12 @@ try_again: int udf_CS0toUTF8(uint8_t *utf_o, int o_len, const uint8_t *ocu_i, int i_len) { return udf_name_from_CS0(utf_o, o_len, ocu_i, i_len, - udf_uni2char_utf8); + udf_uni2char_utf8, 0); } int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, uint8_t *dname, int dlen) { - uint8_t *filename; int (*conv_f)(wchar_t, unsigned char *, int); int ret; @@ -223,10 +353,6 @@ int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, if (dlen <= 0) return 0; - filename = kmalloc(dlen, GFP_NOFS); - if (!filename) - return -ENOMEM; - if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { conv_f = udf_uni2char_utf8; } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { @@ -234,19 +360,10 @@ int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, } else BUG(); - ret = udf_name_from_CS0(filename, dlen, sname, slen, conv_f); - if (ret < 0) { - udf_debug("Failed in udf_get_filename: sname = %s\n", sname); - goto out2; - } - - ret = udf_translate_to_linux(dname, dlen, filename, dlen, - sname + 1, slen - 1); + ret = udf_name_from_CS0(dname, dlen, sname, slen, conv_f, 1); /* Zero length filename isn't valid... */ if (ret == 0) ret = -EINVAL; -out2: - kfree(filename); return ret; } @@ -265,96 +382,3 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen, return udf_name_to_CS0(dname, dlen, sname, slen, conv_f); } -#define ILLEGAL_CHAR_MARK '_' -#define EXT_MARK '.' -#define CRC_MARK '#' -#define EXT_SIZE 5 -/* Number of chars we need to store generated CRC to make filename unique */ -#define CRC_LEN 5 - -static int udf_translate_to_linux(uint8_t *newName, int newLen, - const uint8_t *udfName, int udfLen, - const uint8_t *fidName, int fidNameLen) -{ - int index, newIndex = 0, needsCRC = 0; - int extIndex = 0, newExtIndex = 0, hasExt = 0; - unsigned short valueCRC; - uint8_t curr; - - if (udfName[0] == '.' && - (udfLen == 1 || (udfLen == 2 && udfName[1] == '.'))) { - needsCRC = 1; - newIndex = udfLen; - memcpy(newName, udfName, udfLen); - } else { - for (index = 0; index < udfLen; index++) { - curr = udfName[index]; - if (curr == '/' || curr == 0) { - needsCRC = 1; - curr = ILLEGAL_CHAR_MARK; - while (index + 1 < udfLen && - (udfName[index + 1] == '/' || - udfName[index + 1] == 0)) - index++; - } - if (curr == EXT_MARK && - (udfLen - index - 1) <= EXT_SIZE) { - if (udfLen == index + 1) - hasExt = 0; - else { - hasExt = 1; - extIndex = index; - newExtIndex = newIndex; - } - } - if (newIndex < newLen) - newName[newIndex++] = curr; - else - needsCRC = 1; - } - } - if (needsCRC) { - uint8_t ext[EXT_SIZE]; - int localExtIndex = 0; - - if (hasExt) { - int maxFilenameLen; - for (index = 0; - index < EXT_SIZE && extIndex + index + 1 < udfLen; - index++) { - curr = udfName[extIndex + index + 1]; - - if (curr == '/' || curr == 0) { - needsCRC = 1; - curr = ILLEGAL_CHAR_MARK; - while (extIndex + index + 2 < udfLen && - (index + 1 < EXT_SIZE && - (udfName[extIndex + index + 2] == '/' || - udfName[extIndex + index + 2] == 0))) - index++; - } - ext[localExtIndex++] = curr; - } - maxFilenameLen = newLen - CRC_LEN - localExtIndex; - if (newIndex > maxFilenameLen) - newIndex = maxFilenameLen; - else - newIndex = newExtIndex; - } else if (newIndex > newLen - CRC_LEN) - newIndex = newLen - CRC_LEN; - newName[newIndex++] = CRC_MARK; - valueCRC = crc_itu_t(0, fidName, fidNameLen); - newName[newIndex++] = hex_asc_upper_hi(valueCRC >> 8); - newName[newIndex++] = hex_asc_upper_lo(valueCRC >> 8); - newName[newIndex++] = hex_asc_upper_hi(valueCRC); - newName[newIndex++] = hex_asc_upper_lo(valueCRC); - - if (hasExt) { - newName[newIndex++] = EXT_MARK; - for (index = 0; index < localExtIndex; index++) - newName[newIndex++] = ext[index]; - } - } - - return newIndex; -} -- cgit v1.2.3