fs/ntfs/unistr.c

0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * unistr.c - NTFS Unicode string handling. Part of the Linux-NTFS project.
0004  *
0005  * Copyright (c) 2001-2006 Anton Altaparmakov
0006  */
0007
0008 #include <linux/slab.h>
0009
0010 #include "types.h"
0011 #include "debug.h"
0012 #include "ntfs.h"
0013
0014 /*
0015  * IMPORTANT
0016  * =========
0017  *
0018  * All these routines assume that the Unicode characters are in little endian
0019  * encoding inside the strings!!!
0020  */
0021
0022 /*
0023  * This is used by the name collation functions to quickly determine what
0024  * characters are (in)valid.
0025  */
0026 static const u8 legal_ansi_char_array[0x40] = {
0027     0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0028     0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0029
0030     0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0031     0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0032
0033     0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17,
0034     0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00,
0035
0036     0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,
0037     0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18,
0038 };
0039
0040 /**
0041  * ntfs_are_names_equal - compare two Unicode names for equality
0042  * @s1:         name to compare to @s2
0043  * @s1_len:     length in Unicode characters of @s1
0044  * @s2:         name to compare to @s1
0045  * @s2_len:     length in Unicode characters of @s2
0046  * @ic:         ignore case bool
0047  * @upcase:     upcase table (only if @ic == IGNORE_CASE)
0048  * @upcase_size:    length in Unicode characters of @upcase (if present)
0049  *
0050  * Compare the names @s1 and @s2 and return 'true' (1) if the names are
0051  * identical, or 'false' (0) if they are not identical. If @ic is IGNORE_CASE,
0052  * the @upcase table is used to performa a case insensitive comparison.
0053  */
0054 bool ntfs_are_names_equal(const ntfschar *s1, size_t s1_len,
0055         const ntfschar *s2, size_t s2_len, const IGNORE_CASE_BOOL ic,
0056         const ntfschar *upcase, const u32 upcase_size)
0057 {
0058     if (s1_len != s2_len)
0059         return false;
0060     if (ic == CASE_SENSITIVE)
0061         return !ntfs_ucsncmp(s1, s2, s1_len);
0062     return !ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size);
0063 }
0064
0065 /**
0066  * ntfs_collate_names - collate two Unicode names
0067  * @name1:  first Unicode name to compare
0068  * @name2:  second Unicode name to compare
0069  * @err_val:    if @name1 contains an invalid character return this value
0070  * @ic:     either CASE_SENSITIVE or IGNORE_CASE
0071  * @upcase: upcase table (ignored if @ic is CASE_SENSITIVE)
0072  * @upcase_len: upcase table size (ignored if @ic is CASE_SENSITIVE)
0073  *
0074  * ntfs_collate_names collates two Unicode names and returns:
0075  *
0076  *  -1 if the first name collates before the second one,
0077  *   0 if the names match,
0078  *   1 if the second name collates before the first one, or
0079  * @err_val if an invalid character is found in @name1 during the comparison.
0080  *
0081  * The following characters are considered invalid: '"', '*', '<', '>' and '?'.
0082  */
0083 int ntfs_collate_names(const ntfschar *name1, const u32 name1_len,
0084         const ntfschar *name2, const u32 name2_len,
0085         const int err_val, const IGNORE_CASE_BOOL ic,
0086         const ntfschar *upcase, const u32 upcase_len)
0087 {
0088     u32 cnt, min_len;
0089     u16 c1, c2;
0090
0091     min_len = name1_len;
0092     if (name1_len > name2_len)
0093         min_len = name2_len;
0094     for (cnt = 0; cnt < min_len; ++cnt) {
0095         c1 = le16_to_cpu(*name1++);
0096         c2 = le16_to_cpu(*name2++);
0097         if (ic) {
0098             if (c1 < upcase_len)
0099                 c1 = le16_to_cpu(upcase[c1]);
0100             if (c2 < upcase_len)
0101                 c2 = le16_to_cpu(upcase[c2]);
0102         }
0103         if (c1 < 64 && legal_ansi_char_array[c1] & 8)
0104             return err_val;
0105         if (c1 < c2)
0106             return -1;
0107         if (c1 > c2)
0108             return 1;
0109     }
0110     if (name1_len < name2_len)
0111         return -1;
0112     if (name1_len == name2_len)
0113         return 0;
0114     /* name1_len > name2_len */
0115     c1 = le16_to_cpu(*name1);
0116     if (c1 < 64 && legal_ansi_char_array[c1] & 8)
0117         return err_val;
0118     return 1;
0119 }
0120
0121 /**
0122  * ntfs_ucsncmp - compare two little endian Unicode strings
0123  * @s1:     first string
0124  * @s2:     second string
0125  * @n:      maximum unicode characters to compare
0126  *
0127  * Compare the first @n characters of the Unicode strings @s1 and @s2,
0128  * The strings in little endian format and appropriate le16_to_cpu()
0129  * conversion is performed on non-little endian machines.
0130  *
0131  * The function returns an integer less than, equal to, or greater than zero
0132  * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
0133  * to be less than, to match, or be greater than @s2.
0134  */
0135 int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n)
0136 {
0137     u16 c1, c2;
0138     size_t i;
0139
0140     for (i = 0; i < n; ++i) {
0141         c1 = le16_to_cpu(s1[i]);
0142         c2 = le16_to_cpu(s2[i]);
0143         if (c1 < c2)
0144             return -1;
0145         if (c1 > c2)
0146             return 1;
0147         if (!c1)
0148             break;
0149     }
0150     return 0;
0151 }
0152
0153 /**
0154  * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case
0155  * @s1:         first string
0156  * @s2:         second string
0157  * @n:          maximum unicode characters to compare
0158  * @upcase:     upcase table
0159  * @upcase_size:    upcase table size in Unicode characters
0160  *
0161  * Compare the first @n characters of the Unicode strings @s1 and @s2,
0162  * ignoring case. The strings in little endian format and appropriate
0163  * le16_to_cpu() conversion is performed on non-little endian machines.
0164  *
0165  * Each character is uppercased using the @upcase table before the comparison.
0166  *
0167  * The function returns an integer less than, equal to, or greater than zero
0168  * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
0169  * to be less than, to match, or be greater than @s2.
0170  */
0171 int ntfs_ucsncasecmp(const ntfschar *s1, const ntfschar *s2, size_t n,
0172         const ntfschar *upcase, const u32 upcase_size)
0173 {
0174     size_t i;
0175     u16 c1, c2;
0176
0177     for (i = 0; i < n; ++i) {
0178         if ((c1 = le16_to_cpu(s1[i])) < upcase_size)
0179             c1 = le16_to_cpu(upcase[c1]);
0180         if ((c2 = le16_to_cpu(s2[i])) < upcase_size)
0181             c2 = le16_to_cpu(upcase[c2]);
0182         if (c1 < c2)
0183             return -1;
0184         if (c1 > c2)
0185             return 1;
0186         if (!c1)
0187             break;
0188     }
0189     return 0;
0190 }
0191
0192 void ntfs_upcase_name(ntfschar *name, u32 name_len, const ntfschar *upcase,
0193         const u32 upcase_len)
0194 {
0195     u32 i;
0196     u16 u;
0197
0198     for (i = 0; i < name_len; i++)
0199         if ((u = le16_to_cpu(name[i])) < upcase_len)
0200             name[i] = upcase[u];
0201 }
0202
0203 void ntfs_file_upcase_value(FILE_NAME_ATTR *file_name_attr,
0204         const ntfschar *upcase, const u32 upcase_len)
0205 {
0206     ntfs_upcase_name((ntfschar*)&file_name_attr->file_name,
0207             file_name_attr->file_name_length, upcase, upcase_len);
0208 }
0209
0210 int ntfs_file_compare_values(FILE_NAME_ATTR *file_name_attr1,
0211         FILE_NAME_ATTR *file_name_attr2,
0212         const int err_val, const IGNORE_CASE_BOOL ic,
0213         const ntfschar *upcase, const u32 upcase_len)
0214 {
0215     return ntfs_collate_names((ntfschar*)&file_name_attr1->file_name,
0216             file_name_attr1->file_name_length,
0217             (ntfschar*)&file_name_attr2->file_name,
0218             file_name_attr2->file_name_length,
0219             err_val, ic, upcase, upcase_len);
0220 }
0221
0222 /**
0223  * ntfs_nlstoucs - convert NLS string to little endian Unicode string
0224  * @vol:    ntfs volume which we are working with
0225  * @ins:    input NLS string buffer
0226  * @ins_len:    length of input string in bytes
0227  * @outs:   on return contains the allocated output Unicode string buffer
0228  *
0229  * Convert the input string @ins, which is in whatever format the loaded NLS
0230  * map dictates, into a little endian, 2-byte Unicode string.
0231  *
0232  * This function allocates the string and the caller is responsible for
0233  * calling kmem_cache_free(ntfs_name_cache, *@outs); when finished with it.
0234  *
0235  * On success the function returns the number of Unicode characters written to
0236  * the output string *@outs (>= 0), not counting the terminating Unicode NULL
0237  * character. *@outs is set to the allocated output string buffer.
0238  *
0239  * On error, a negative number corresponding to the error code is returned. In
0240  * that case the output string is not allocated. Both *@outs and *@outs_len
0241  * are then undefined.
0242  *
0243  * This might look a bit odd due to fast path optimization...
0244  */
0245 int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins,
0246         const int ins_len, ntfschar **outs)
0247 {
0248     struct nls_table *nls = vol->nls_map;
0249     ntfschar *ucs;
0250     wchar_t wc;
0251     int i, o, wc_len;
0252
0253     /* We do not trust outside sources. */
0254     if (likely(ins)) {
0255         ucs = kmem_cache_alloc(ntfs_name_cache, GFP_NOFS);
0256         if (likely(ucs)) {
0257             for (i = o = 0; i < ins_len; i += wc_len) {
0258                 wc_len = nls->char2uni(ins + i, ins_len - i,
0259                         &wc);
0260                 if (likely(wc_len >= 0 &&
0261                         o < NTFS_MAX_NAME_LEN)) {
0262                     if (likely(wc)) {
0263                         ucs[o++] = cpu_to_le16(wc);
0264                         continue;
0265                     } /* else if (!wc) */
0266                     break;
0267                 } /* else if (wc_len < 0 ||
0268                         o >= NTFS_MAX_NAME_LEN) */
0269                 goto name_err;
0270             }
0271             ucs[o] = 0;
0272             *outs = ucs;
0273             return o;
0274         } /* else if (!ucs) */
0275         ntfs_error(vol->sb, "Failed to allocate buffer for converted "
0276                 "name from ntfs_name_cache.");
0277         return -ENOMEM;
0278     } /* else if (!ins) */
0279     ntfs_error(vol->sb, "Received NULL pointer.");
0280     return -EINVAL;
0281 name_err:
0282     kmem_cache_free(ntfs_name_cache, ucs);
0283     if (wc_len < 0) {
0284         ntfs_error(vol->sb, "Name using character set %s contains "
0285                 "characters that cannot be converted to "
0286                 "Unicode.", nls->charset);
0287         i = -EILSEQ;
0288     } else /* if (o >= NTFS_MAX_NAME_LEN) */ {
0289         ntfs_error(vol->sb, "Name is too long (maximum length for a "
0290                 "name on NTFS is %d Unicode characters.",
0291                 NTFS_MAX_NAME_LEN);
0292         i = -ENAMETOOLONG;
0293     }
0294     return i;
0295 }
0296
0297 /**
0298  * ntfs_ucstonls - convert little endian Unicode string to NLS string
0299  * @vol:    ntfs volume which we are working with
0300  * @ins:    input Unicode string buffer
0301  * @ins_len:    length of input string in Unicode characters
0302  * @outs:   on return contains the (allocated) output NLS string buffer
0303  * @outs_len:   length of output string buffer in bytes
0304  *
0305  * Convert the input little endian, 2-byte Unicode string @ins, of length
0306  * @ins_len into the string format dictated by the loaded NLS.
0307  *
0308  * If *@outs is NULL, this function allocates the string and the caller is
0309  * responsible for calling kfree(*@outs); when finished with it. In this case
0310  * @outs_len is ignored and can be 0.
0311  *
0312  * On success the function returns the number of bytes written to the output
0313  * string *@outs (>= 0), not counting the terminating NULL byte. If the output
0314  * string buffer was allocated, *@outs is set to it.
0315  *
0316  * On error, a negative number corresponding to the error code is returned. In
0317  * that case the output string is not allocated. The contents of *@outs are
0318  * then undefined.
0319  *
0320  * This might look a bit odd due to fast path optimization...
0321  */
0322 int ntfs_ucstonls(const ntfs_volume *vol, const ntfschar *ins,
0323         const int ins_len, unsigned char **outs, int outs_len)
0324 {
0325     struct nls_table *nls = vol->nls_map;
0326     unsigned char *ns;
0327     int i, o, ns_len, wc;
0328
0329     /* We don't trust outside sources. */
0330     if (ins) {
0331         ns = *outs;
0332         ns_len = outs_len;
0333         if (ns && !ns_len) {
0334             wc = -ENAMETOOLONG;
0335             goto conversion_err;
0336         }
0337         if (!ns) {
0338             ns_len = ins_len * NLS_MAX_CHARSET_SIZE;
0339             ns = kmalloc(ns_len + 1, GFP_NOFS);
0340             if (!ns)
0341                 goto mem_err_out;
0342         }
0343         for (i = o = 0; i < ins_len; i++) {
0344 retry:          wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o,
0345                     ns_len - o);
0346             if (wc > 0) {
0347                 o += wc;
0348                 continue;
0349             } else if (!wc)
0350                 break;
0351             else if (wc == -ENAMETOOLONG && ns != *outs) {
0352                 unsigned char *tc;
0353                 /* Grow in multiples of 64 bytes. */
0354                 tc = kmalloc((ns_len + 64) &
0355                         ~63, GFP_NOFS);
0356                 if (tc) {
0357                     memcpy(tc, ns, ns_len);
0358                     ns_len = ((ns_len + 64) & ~63) - 1;
0359                     kfree(ns);
0360                     ns = tc;
0361                     goto retry;
0362                 } /* No memory so goto conversion_error; */
0363             } /* wc < 0, real error. */
0364             goto conversion_err;
0365         }
0366         ns[o] = 0;
0367         *outs = ns;
0368         return o;
0369     } /* else (!ins) */
0370     ntfs_error(vol->sb, "Received NULL pointer.");
0371     return -EINVAL;
0372 conversion_err:
0373     ntfs_error(vol->sb, "Unicode name contains characters that cannot be "
0374             "converted to character set %s.  You might want to "
0375             "try to use the mount option nls=utf8.", nls->charset);
0376     if (ns != *outs)
0377         kfree(ns);
0378     if (wc != -ENAMETOOLONG)
0379         wc = -EILSEQ;
0380     return wc;
0381 mem_err_out:
0382     ntfs_error(vol->sb, "Failed to allocate name!");
0383     return -ENOMEM;
0384 }