0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021 #include "udfdecl.h"
0022
0023 #include <linux/kernel.h>
0024 #include <linux/string.h> /* for memset */
0025 #include <linux/nls.h>
0026 #include <linux/crc-itu-t.h>
0027 #include <linux/slab.h>
0028
0029 #include "udf_sb.h"
0030
0031 #define PLANE_SIZE 0x10000
0032 #define UNICODE_MAX 0x10ffff
0033 #define SURROGATE_MASK 0xfffff800
0034 #define SURROGATE_PAIR 0x0000d800
0035 #define SURROGATE_LOW 0x00000400
0036 #define SURROGATE_CHAR_BITS 10
0037 #define SURROGATE_CHAR_MASK ((1 << SURROGATE_CHAR_BITS) - 1)
0038
0039 #define ILLEGAL_CHAR_MARK '_'
0040 #define EXT_MARK '.'
0041 #define CRC_MARK '#'
0042 #define EXT_SIZE 5
0043
0044 #define CRC_LEN 5
0045
0046 static unicode_t get_utf16_char(const uint8_t *str_i, int str_i_max_len,
0047 int str_i_idx, int u_ch, unicode_t *ret)
0048 {
0049 unicode_t c;
0050 int start_idx = str_i_idx;
0051
0052
0053 c = str_i[str_i_idx++];
0054 if (u_ch > 1)
0055 c = (c << 8) | str_i[str_i_idx++];
0056 if ((c & SURROGATE_MASK) == SURROGATE_PAIR) {
0057 unicode_t next;
0058
0059
0060 if (str_i_idx >= str_i_max_len) {
0061 c = UNICODE_MAX + 1;
0062 goto out;
0063 }
0064
0065
0066 if (c & SURROGATE_LOW) {
0067 c = UNICODE_MAX + 1;
0068 goto out;
0069 }
0070
0071 WARN_ON_ONCE(u_ch != 2);
0072 next = str_i[str_i_idx++] << 8;
0073 next |= str_i[str_i_idx++];
0074 if ((next & SURROGATE_MASK) != SURROGATE_PAIR ||
0075 !(next & SURROGATE_LOW)) {
0076 c = UNICODE_MAX + 1;
0077 goto out;
0078 }
0079
0080 c = PLANE_SIZE +
0081 ((c & SURROGATE_CHAR_MASK) << SURROGATE_CHAR_BITS) +
0082 (next & SURROGATE_CHAR_MASK);
0083 }
0084 out:
0085 *ret = c;
0086 return str_i_idx - start_idx;
0087 }
0088
0089
0090 static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len,
0091 int *str_o_idx,
0092 const uint8_t *str_i, int str_i_max_len,
0093 int *str_i_idx,
0094 int u_ch, int *needsCRC,
0095 int (*conv_f)(wchar_t, unsigned char *, int),
0096 int translate)
0097 {
0098 unicode_t c;
0099 int illChar = 0;
0100 int len, gotch = 0;
0101
0102 while (!gotch && *str_i_idx < str_i_max_len) {
0103 if (*str_o_idx >= str_o_max_len) {
0104 *needsCRC = 1;
0105 return gotch;
0106 }
0107
0108 len = get_utf16_char(str_i, str_i_max_len, *str_i_idx, u_ch,
0109 &c);
0110
0111 if (c == 0 || c > UNICODE_MAX || (conv_f && c > MAX_WCHAR_T) ||
0112 (translate && c == '/')) {
0113 illChar = 1;
0114 if (!translate)
0115 gotch = 1;
0116 } else if (illChar)
0117 break;
0118 else
0119 gotch = 1;
0120 *str_i_idx += len;
0121 }
0122 if (illChar) {
0123 *needsCRC = 1;
0124 c = ILLEGAL_CHAR_MARK;
0125 gotch = 1;
0126 }
0127 if (gotch) {
0128 if (conv_f) {
0129 len = conv_f(c, &str_o[*str_o_idx],
0130 str_o_max_len - *str_o_idx);
0131 } else {
0132 len = utf32_to_utf8(c, &str_o[*str_o_idx],
0133 str_o_max_len - *str_o_idx);
0134 if (len < 0)
0135 len = -ENAMETOOLONG;
0136 }
0137
0138 if (len >= 0)
0139 *str_o_idx += len;
0140 else if (len == -ENAMETOOLONG) {
0141 *needsCRC = 1;
0142 gotch = 0;
0143 } else {
0144 str_o[(*str_o_idx)++] = ILLEGAL_CHAR_MARK;
0145 *needsCRC = 1;
0146 }
0147 }
0148 return gotch;
0149 }
0150
0151 static int udf_name_from_CS0(struct super_block *sb,
0152 uint8_t *str_o, int str_max_len,
0153 const uint8_t *ocu, int ocu_len,
0154 int translate)
0155 {
0156 uint32_t c;
0157 uint8_t cmp_id;
0158 int idx, len;
0159 int u_ch;
0160 int needsCRC = 0;
0161 int ext_i_len, ext_max_len;
0162 int str_o_len = 0;
0163 int ext_o_len = 0;
0164 int ext_crc_len = 0;
0165 int i_ext = -1;
0166 int o_crc = 0;
0167 unsigned short valueCRC;
0168 uint8_t ext[EXT_SIZE * NLS_MAX_CHARSET_SIZE + 1];
0169 uint8_t crc[CRC_LEN];
0170 int (*conv_f)(wchar_t, unsigned char *, int);
0171
0172 if (str_max_len <= 0)
0173 return 0;
0174
0175 if (ocu_len == 0) {
0176 memset(str_o, 0, str_max_len);
0177 return 0;
0178 }
0179
0180 if (UDF_SB(sb)->s_nls_map)
0181 conv_f = UDF_SB(sb)->s_nls_map->uni2char;
0182 else
0183 conv_f = NULL;
0184
0185 cmp_id = ocu[0];
0186 if (cmp_id != 8 && cmp_id != 16) {
0187 memset(str_o, 0, str_max_len);
0188 pr_err("unknown compression code (%u)\n", cmp_id);
0189 return -EINVAL;
0190 }
0191 u_ch = cmp_id >> 3;
0192
0193 ocu++;
0194 ocu_len--;
0195
0196 if (ocu_len % u_ch) {
0197 pr_err("incorrect filename length (%d)\n", ocu_len + 1);
0198 return -EINVAL;
0199 }
0200
0201 if (translate) {
0202
0203 for (idx = ocu_len - u_ch, ext_i_len = 0;
0204 (idx >= 0) && (ext_i_len < EXT_SIZE);
0205 idx -= u_ch, ext_i_len++) {
0206 c = ocu[idx];
0207 if (u_ch > 1)
0208 c = (c << 8) | ocu[idx + 1];
0209
0210 if (c == EXT_MARK) {
0211 if (ext_i_len)
0212 i_ext = idx;
0213 break;
0214 }
0215 }
0216 if (i_ext >= 0) {
0217
0218 ext_max_len = min_t(int, sizeof(ext), str_max_len);
0219 ext[ext_o_len++] = EXT_MARK;
0220 idx = i_ext + u_ch;
0221 while (udf_name_conv_char(ext, ext_max_len, &ext_o_len,
0222 ocu, ocu_len, &idx,
0223 u_ch, &needsCRC,
0224 conv_f, translate)) {
0225 if ((ext_o_len + CRC_LEN) < str_max_len)
0226 ext_crc_len = ext_o_len;
0227 }
0228 }
0229 }
0230
0231 idx = 0;
0232 while (1) {
0233 if (translate && (idx == i_ext)) {
0234 if (str_o_len > (str_max_len - ext_o_len))
0235 needsCRC = 1;
0236 break;
0237 }
0238
0239 if (!udf_name_conv_char(str_o, str_max_len, &str_o_len,
0240 ocu, ocu_len, &idx,
0241 u_ch, &needsCRC, conv_f, translate))
0242 break;
0243
0244 if (translate &&
0245 (str_o_len <= (str_max_len - ext_o_len - CRC_LEN)))
0246 o_crc = str_o_len;
0247 }
0248
0249 if (translate) {
0250 if (str_o_len <= 2 && str_o[0] == '.' &&
0251 (str_o_len == 1 || str_o[1] == '.'))
0252 needsCRC = 1;
0253 if (needsCRC) {
0254 str_o_len = o_crc;
0255 valueCRC = crc_itu_t(0, ocu, ocu_len);
0256 crc[0] = CRC_MARK;
0257 crc[1] = hex_asc_upper_hi(valueCRC >> 8);
0258 crc[2] = hex_asc_upper_lo(valueCRC >> 8);
0259 crc[3] = hex_asc_upper_hi(valueCRC);
0260 crc[4] = hex_asc_upper_lo(valueCRC);
0261 len = min_t(int, CRC_LEN, str_max_len - str_o_len);
0262 memcpy(&str_o[str_o_len], crc, len);
0263 str_o_len += len;
0264 ext_o_len = ext_crc_len;
0265 }
0266 if (ext_o_len > 0) {
0267 memcpy(&str_o[str_o_len], ext, ext_o_len);
0268 str_o_len += ext_o_len;
0269 }
0270 }
0271
0272 return str_o_len;
0273 }
0274
0275 static int udf_name_to_CS0(struct super_block *sb,
0276 uint8_t *ocu, int ocu_max_len,
0277 const uint8_t *str_i, int str_len)
0278 {
0279 int i, len;
0280 unsigned int max_val;
0281 int u_len, u_ch;
0282 unicode_t uni_char;
0283 int (*conv_f)(const unsigned char *, int, wchar_t *);
0284
0285 if (ocu_max_len <= 0)
0286 return 0;
0287
0288 if (UDF_SB(sb)->s_nls_map)
0289 conv_f = UDF_SB(sb)->s_nls_map->char2uni;
0290 else
0291 conv_f = NULL;
0292
0293 memset(ocu, 0, ocu_max_len);
0294 ocu[0] = 8;
0295 max_val = 0xff;
0296 u_ch = 1;
0297
0298 try_again:
0299 u_len = 1;
0300 for (i = 0; i < str_len; i += len) {
0301
0302 if (u_len + u_ch > ocu_max_len)
0303 return 0;
0304 if (conv_f) {
0305 wchar_t wchar;
0306
0307 len = conv_f(&str_i[i], str_len - i, &wchar);
0308 if (len > 0)
0309 uni_char = wchar;
0310 } else {
0311 len = utf8_to_utf32(&str_i[i], str_len - i,
0312 &uni_char);
0313 }
0314
0315 if (len <= 0 || uni_char > UNICODE_MAX) {
0316 len = 1;
0317 uni_char = '?';
0318 }
0319
0320 if (uni_char > max_val) {
0321 unicode_t c;
0322
0323 if (max_val == 0xff) {
0324 max_val = 0xffff;
0325 ocu[0] = 0x10;
0326 u_ch = 2;
0327 goto try_again;
0328 }
0329
0330
0331
0332
0333 if (u_len + 2 * u_ch > ocu_max_len)
0334 return 0;
0335
0336 uni_char -= PLANE_SIZE;
0337 c = SURROGATE_PAIR |
0338 ((uni_char >> SURROGATE_CHAR_BITS) &
0339 SURROGATE_CHAR_MASK);
0340 ocu[u_len++] = (uint8_t)(c >> 8);
0341 ocu[u_len++] = (uint8_t)(c & 0xff);
0342 uni_char = SURROGATE_PAIR | SURROGATE_LOW |
0343 (uni_char & SURROGATE_CHAR_MASK);
0344 }
0345
0346 if (max_val == 0xffff)
0347 ocu[u_len++] = (uint8_t)(uni_char >> 8);
0348 ocu[u_len++] = (uint8_t)(uni_char & 0xff);
0349 }
0350
0351 return u_len;
0352 }
0353
0354
0355
0356
0357
0358
0359 int udf_dstrCS0toChar(struct super_block *sb, uint8_t *utf_o, int o_len,
0360 const uint8_t *ocu_i, int i_len)
0361 {
0362 int s_len = 0;
0363
0364 if (i_len > 0) {
0365 s_len = ocu_i[i_len - 1];
0366 if (s_len >= i_len) {
0367 pr_warn("incorrect dstring lengths (%d/%d),"
0368 " truncating\n", s_len, i_len);
0369 s_len = i_len - 1;
0370
0371 if (ocu_i[0] == 16)
0372 s_len -= (s_len - 1) & 2;
0373 }
0374 }
0375
0376 return udf_name_from_CS0(sb, utf_o, o_len, ocu_i, s_len, 0);
0377 }
0378
0379 int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen,
0380 uint8_t *dname, int dlen)
0381 {
0382 int ret;
0383
0384 if (!slen)
0385 return -EIO;
0386
0387 if (dlen <= 0)
0388 return 0;
0389
0390 ret = udf_name_from_CS0(sb, dname, dlen, sname, slen, 1);
0391
0392 if (ret == 0)
0393 ret = -EINVAL;
0394 return ret;
0395 }
0396
0397 int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen,
0398 uint8_t *dname, int dlen)
0399 {
0400 return udf_name_to_CS0(sb, dname, dlen, sname, slen);
0401 }
0402