Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *  linux/fs/hfsplus/unicode.c
0004  *
0005  * Copyright (C) 2001
0006  * Brad Boyer (flar@allandria.com)
0007  * (C) 2003 Ardis Technologies <roman@ardistech.com>
0008  *
0009  * Handler routines for unicode strings
0010  */
0011 
0012 #include <linux/types.h>
0013 #include <linux/nls.h>
0014 #include "hfsplus_fs.h"
0015 #include "hfsplus_raw.h"
0016 
0017 /* Fold the case of a unicode char, given the 16 bit value */
0018 /* Returns folded char, or 0 if ignorable */
0019 static inline u16 case_fold(u16 c)
0020 {
0021     u16 tmp;
0022 
0023     tmp = hfsplus_case_fold_table[c >> 8];
0024     if (tmp)
0025         tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
0026     else
0027         tmp = c;
0028     return tmp;
0029 }
0030 
0031 /* Compare unicode strings, return values like normal strcmp */
0032 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
0033                const struct hfsplus_unistr *s2)
0034 {
0035     u16 len1, len2, c1, c2;
0036     const hfsplus_unichr *p1, *p2;
0037 
0038     len1 = be16_to_cpu(s1->length);
0039     len2 = be16_to_cpu(s2->length);
0040     p1 = s1->unicode;
0041     p2 = s2->unicode;
0042 
0043     while (1) {
0044         c1 = c2 = 0;
0045 
0046         while (len1 && !c1) {
0047             c1 = case_fold(be16_to_cpu(*p1));
0048             p1++;
0049             len1--;
0050         }
0051         while (len2 && !c2) {
0052             c2 = case_fold(be16_to_cpu(*p2));
0053             p2++;
0054             len2--;
0055         }
0056 
0057         if (c1 != c2)
0058             return (c1 < c2) ? -1 : 1;
0059         if (!c1 && !c2)
0060             return 0;
0061     }
0062 }
0063 
0064 /* Compare names as a sequence of 16-bit unsigned integers */
0065 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
0066            const struct hfsplus_unistr *s2)
0067 {
0068     u16 len1, len2, c1, c2;
0069     const hfsplus_unichr *p1, *p2;
0070     int len;
0071 
0072     len1 = be16_to_cpu(s1->length);
0073     len2 = be16_to_cpu(s2->length);
0074     p1 = s1->unicode;
0075     p2 = s2->unicode;
0076 
0077     for (len = min(len1, len2); len > 0; len--) {
0078         c1 = be16_to_cpu(*p1);
0079         c2 = be16_to_cpu(*p2);
0080         if (c1 != c2)
0081             return c1 < c2 ? -1 : 1;
0082         p1++;
0083         p2++;
0084     }
0085 
0086     return len1 < len2 ? -1 :
0087            len1 > len2 ? 1 : 0;
0088 }
0089 
0090 
0091 #define Hangul_SBase    0xac00
0092 #define Hangul_LBase    0x1100
0093 #define Hangul_VBase    0x1161
0094 #define Hangul_TBase    0x11a7
0095 #define Hangul_SCount   11172
0096 #define Hangul_LCount   19
0097 #define Hangul_VCount   21
0098 #define Hangul_TCount   28
0099 #define Hangul_NCount   (Hangul_VCount * Hangul_TCount)
0100 
0101 
0102 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
0103 {
0104     int i, s, e;
0105 
0106     s = 1;
0107     e = p[1];
0108     if (!e || cc < p[s * 2] || cc > p[e * 2])
0109         return NULL;
0110     do {
0111         i = (s + e) / 2;
0112         if (cc > p[i * 2])
0113             s = i + 1;
0114         else if (cc < p[i * 2])
0115             e = i - 1;
0116         else
0117             return hfsplus_compose_table + p[i * 2 + 1];
0118     } while (s <= e);
0119     return NULL;
0120 }
0121 
0122 int hfsplus_uni2asc(struct super_block *sb,
0123         const struct hfsplus_unistr *ustr,
0124         char *astr, int *len_p)
0125 {
0126     const hfsplus_unichr *ip;
0127     struct nls_table *nls = HFSPLUS_SB(sb)->nls;
0128     u8 *op;
0129     u16 cc, c0, c1;
0130     u16 *ce1, *ce2;
0131     int i, len, ustrlen, res, compose;
0132 
0133     op = astr;
0134     ip = ustr->unicode;
0135     ustrlen = be16_to_cpu(ustr->length);
0136     len = *len_p;
0137     ce1 = NULL;
0138     compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
0139 
0140     while (ustrlen > 0) {
0141         c0 = be16_to_cpu(*ip++);
0142         ustrlen--;
0143         /* search for single decomposed char */
0144         if (likely(compose))
0145             ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
0146         if (ce1)
0147             cc = ce1[0];
0148         else
0149             cc = 0;
0150         if (cc) {
0151             /* start of a possibly decomposed Hangul char */
0152             if (cc != 0xffff)
0153                 goto done;
0154             if (!ustrlen)
0155                 goto same;
0156             c1 = be16_to_cpu(*ip) - Hangul_VBase;
0157             if (c1 < Hangul_VCount) {
0158                 /* compose the Hangul char */
0159                 cc = (c0 - Hangul_LBase) * Hangul_VCount;
0160                 cc = (cc + c1) * Hangul_TCount;
0161                 cc += Hangul_SBase;
0162                 ip++;
0163                 ustrlen--;
0164                 if (!ustrlen)
0165                     goto done;
0166                 c1 = be16_to_cpu(*ip) - Hangul_TBase;
0167                 if (c1 > 0 && c1 < Hangul_TCount) {
0168                     cc += c1;
0169                     ip++;
0170                     ustrlen--;
0171                 }
0172                 goto done;
0173             }
0174         }
0175         while (1) {
0176             /* main loop for common case of not composed chars */
0177             if (!ustrlen)
0178                 goto same;
0179             c1 = be16_to_cpu(*ip);
0180             if (likely(compose))
0181                 ce1 = hfsplus_compose_lookup(
0182                     hfsplus_compose_table, c1);
0183             if (ce1)
0184                 break;
0185             switch (c0) {
0186             case 0:
0187                 c0 = 0x2400;
0188                 break;
0189             case '/':
0190                 c0 = ':';
0191                 break;
0192             }
0193             res = nls->uni2char(c0, op, len);
0194             if (res < 0) {
0195                 if (res == -ENAMETOOLONG)
0196                     goto out;
0197                 *op = '?';
0198                 res = 1;
0199             }
0200             op += res;
0201             len -= res;
0202             c0 = c1;
0203             ip++;
0204             ustrlen--;
0205         }
0206         ce2 = hfsplus_compose_lookup(ce1, c0);
0207         if (ce2) {
0208             i = 1;
0209             while (i < ustrlen) {
0210                 ce1 = hfsplus_compose_lookup(ce2,
0211                     be16_to_cpu(ip[i]));
0212                 if (!ce1)
0213                     break;
0214                 i++;
0215                 ce2 = ce1;
0216             }
0217             cc = ce2[0];
0218             if (cc) {
0219                 ip += i;
0220                 ustrlen -= i;
0221                 goto done;
0222             }
0223         }
0224 same:
0225         switch (c0) {
0226         case 0:
0227             cc = 0x2400;
0228             break;
0229         case '/':
0230             cc = ':';
0231             break;
0232         default:
0233             cc = c0;
0234         }
0235 done:
0236         res = nls->uni2char(cc, op, len);
0237         if (res < 0) {
0238             if (res == -ENAMETOOLONG)
0239                 goto out;
0240             *op = '?';
0241             res = 1;
0242         }
0243         op += res;
0244         len -= res;
0245     }
0246     res = 0;
0247 out:
0248     *len_p = (char *)op - astr;
0249     return res;
0250 }
0251 
0252 /*
0253  * Convert one or more ASCII characters into a single unicode character.
0254  * Returns the number of ASCII characters corresponding to the unicode char.
0255  */
0256 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
0257                   wchar_t *uc)
0258 {
0259     int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
0260     if (size <= 0) {
0261         *uc = '?';
0262         size = 1;
0263     }
0264     switch (*uc) {
0265     case 0x2400:
0266         *uc = 0;
0267         break;
0268     case ':':
0269         *uc = '/';
0270         break;
0271     }
0272     return size;
0273 }
0274 
0275 /* Decomposes a non-Hangul unicode character. */
0276 static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
0277 {
0278     int off;
0279 
0280     off = hfsplus_decompose_table[(uc >> 12) & 0xf];
0281     if (off == 0 || off == 0xffff)
0282         return NULL;
0283 
0284     off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
0285     if (!off)
0286         return NULL;
0287 
0288     off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
0289     if (!off)
0290         return NULL;
0291 
0292     off = hfsplus_decompose_table[off + (uc & 0xf)];
0293     *size = off & 3;
0294     if (*size == 0)
0295         return NULL;
0296     return hfsplus_decompose_table + (off / 4);
0297 }
0298 
0299 /*
0300  * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
0301  * precomposed Hangul, otherwise return the length of the decomposition.
0302  *
0303  * This function was adapted from sample code from the Unicode Standard
0304  * Annex #15: Unicode Normalization Forms, version 3.2.0.
0305  *
0306  * Copyright (C) 1991-2018 Unicode, Inc.  All rights reserved.  Distributed
0307  * under the Terms of Use in http://www.unicode.org/copyright.html.
0308  */
0309 static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
0310 {
0311     int index;
0312     int l, v, t;
0313 
0314     index = uc - Hangul_SBase;
0315     if (index < 0 || index >= Hangul_SCount)
0316         return 0;
0317 
0318     l = Hangul_LBase + index / Hangul_NCount;
0319     v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
0320     t = Hangul_TBase + index % Hangul_TCount;
0321 
0322     result[0] = l;
0323     result[1] = v;
0324     if (t != Hangul_TBase) {
0325         result[2] = t;
0326         return 3;
0327     }
0328     return 2;
0329 }
0330 
0331 /* Decomposes a single unicode character. */
0332 static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
0333 {
0334     u16 *result;
0335 
0336     /* Hangul is handled separately */
0337     result = hangul_buffer;
0338     *size = hfsplus_try_decompose_hangul(uc, result);
0339     if (*size == 0)
0340         result = hfsplus_decompose_nonhangul(uc, size);
0341     return result;
0342 }
0343 
0344 int hfsplus_asc2uni(struct super_block *sb,
0345             struct hfsplus_unistr *ustr, int max_unistr_len,
0346             const char *astr, int len)
0347 {
0348     int size, dsize, decompose;
0349     u16 *dstr, outlen = 0;
0350     wchar_t c;
0351     u16 dhangul[3];
0352 
0353     decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
0354     while (outlen < max_unistr_len && len > 0) {
0355         size = asc2unichar(sb, astr, len, &c);
0356 
0357         if (decompose)
0358             dstr = decompose_unichar(c, &dsize, dhangul);
0359         else
0360             dstr = NULL;
0361         if (dstr) {
0362             if (outlen + dsize > max_unistr_len)
0363                 break;
0364             do {
0365                 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
0366             } while (--dsize > 0);
0367         } else
0368             ustr->unicode[outlen++] = cpu_to_be16(c);
0369 
0370         astr += size;
0371         len -= size;
0372     }
0373     ustr->length = cpu_to_be16(outlen);
0374     if (len > 0)
0375         return -ENAMETOOLONG;
0376     return 0;
0377 }
0378 
0379 /*
0380  * Hash a string to an integer as appropriate for the HFS+ filesystem.
0381  * Composed unicode characters are decomposed and case-folding is performed
0382  * if the appropriate bits are (un)set on the superblock.
0383  */
0384 int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
0385 {
0386     struct super_block *sb = dentry->d_sb;
0387     const char *astr;
0388     const u16 *dstr;
0389     int casefold, decompose, size, len;
0390     unsigned long hash;
0391     wchar_t c;
0392     u16 c2;
0393     u16 dhangul[3];
0394 
0395     casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
0396     decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
0397     hash = init_name_hash(dentry);
0398     astr = str->name;
0399     len = str->len;
0400     while (len > 0) {
0401         int dsize;
0402         size = asc2unichar(sb, astr, len, &c);
0403         astr += size;
0404         len -= size;
0405 
0406         if (decompose)
0407             dstr = decompose_unichar(c, &dsize, dhangul);
0408         else
0409             dstr = NULL;
0410         if (dstr) {
0411             do {
0412                 c2 = *dstr++;
0413                 if (casefold)
0414                     c2 = case_fold(c2);
0415                 if (!casefold || c2)
0416                     hash = partial_name_hash(c2, hash);
0417             } while (--dsize > 0);
0418         } else {
0419             c2 = c;
0420             if (casefold)
0421                 c2 = case_fold(c2);
0422             if (!casefold || c2)
0423                 hash = partial_name_hash(c2, hash);
0424         }
0425     }
0426     str->hash = end_name_hash(hash);
0427 
0428     return 0;
0429 }
0430 
0431 /*
0432  * Compare strings with HFS+ filename ordering.
0433  * Composed unicode characters are decomposed and case-folding is performed
0434  * if the appropriate bits are (un)set on the superblock.
0435  */
0436 int hfsplus_compare_dentry(const struct dentry *dentry,
0437         unsigned int len, const char *str, const struct qstr *name)
0438 {
0439     struct super_block *sb = dentry->d_sb;
0440     int casefold, decompose, size;
0441     int dsize1, dsize2, len1, len2;
0442     const u16 *dstr1, *dstr2;
0443     const char *astr1, *astr2;
0444     u16 c1, c2;
0445     wchar_t c;
0446     u16 dhangul_1[3], dhangul_2[3];
0447 
0448     casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
0449     decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
0450     astr1 = str;
0451     len1 = len;
0452     astr2 = name->name;
0453     len2 = name->len;
0454     dsize1 = dsize2 = 0;
0455     dstr1 = dstr2 = NULL;
0456 
0457     while (len1 > 0 && len2 > 0) {
0458         if (!dsize1) {
0459             size = asc2unichar(sb, astr1, len1, &c);
0460             astr1 += size;
0461             len1 -= size;
0462 
0463             if (decompose)
0464                 dstr1 = decompose_unichar(c, &dsize1,
0465                               dhangul_1);
0466             if (!decompose || !dstr1) {
0467                 c1 = c;
0468                 dstr1 = &c1;
0469                 dsize1 = 1;
0470             }
0471         }
0472 
0473         if (!dsize2) {
0474             size = asc2unichar(sb, astr2, len2, &c);
0475             astr2 += size;
0476             len2 -= size;
0477 
0478             if (decompose)
0479                 dstr2 = decompose_unichar(c, &dsize2,
0480                               dhangul_2);
0481             if (!decompose || !dstr2) {
0482                 c2 = c;
0483                 dstr2 = &c2;
0484                 dsize2 = 1;
0485             }
0486         }
0487 
0488         c1 = *dstr1;
0489         c2 = *dstr2;
0490         if (casefold) {
0491             c1 = case_fold(c1);
0492             if (!c1) {
0493                 dstr1++;
0494                 dsize1--;
0495                 continue;
0496             }
0497             c2 = case_fold(c2);
0498             if (!c2) {
0499                 dstr2++;
0500                 dsize2--;
0501                 continue;
0502             }
0503         }
0504         if (c1 < c2)
0505             return -1;
0506         else if (c1 > c2)
0507             return 1;
0508 
0509         dstr1++;
0510         dsize1--;
0511         dstr2++;
0512         dsize2--;
0513     }
0514 
0515     if (len1 < len2)
0516         return -1;
0517     if (len1 > len2)
0518         return 1;
0519     return 0;
0520 }