fs/ksmbd/unicode.c

0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *   Some of the source code in this file came from fs/cifs/cifs_unicode.c
0004  *
0005  *   Copyright (c) International Business Machines  Corp., 2000,2009
0006  *   Modified by Steve French (sfrench@us.ibm.com)
0007  *   Modified by Namjae Jeon (linkinjeon@kernel.org)
0008  */
0009 #include <linux/fs.h>
0010 #include <linux/slab.h>
0011 #include <asm/unaligned.h>
0012 #include "glob.h"
0013 #include "unicode.h"
0014 #include "uniupr.h"
0015 #include "smb_common.h"
0016
0017 /*
0018  * smb_utf16_bytes() - how long will a string be after conversion?
0019  * @from:   pointer to input string
0020  * @maxbytes:   don't go past this many bytes of input string
0021  * @codepage:   destination codepage
0022  *
0023  * Walk a utf16le string and return the number of bytes that the string will
0024  * be after being converted to the given charset, not including any null
0025  * termination required. Don't walk past maxbytes in the source buffer.
0026  *
0027  * Return:  string length after conversion
0028  */
0029 static int smb_utf16_bytes(const __le16 *from, int maxbytes,
0030                const struct nls_table *codepage)
0031 {
0032     int i;
0033     int charlen, outlen = 0;
0034     int maxwords = maxbytes / 2;
0035     char tmp[NLS_MAX_CHARSET_SIZE];
0036     __u16 ftmp;
0037
0038     for (i = 0; i < maxwords; i++) {
0039         ftmp = get_unaligned_le16(&from[i]);
0040         if (ftmp == 0)
0041             break;
0042
0043         charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
0044         if (charlen > 0)
0045             outlen += charlen;
0046         else
0047             outlen++;
0048     }
0049
0050     return outlen;
0051 }
0052
0053 /*
0054  * cifs_mapchar() - convert a host-endian char to proper char in codepage
0055  * @target: where converted character should be copied
0056  * @src_char:   2 byte host-endian source character
0057  * @cp:     codepage to which character should be converted
0058  * @mapchar:    should character be mapped according to mapchars mount option?
0059  *
0060  * This function handles the conversion of a single character. It is the
0061  * responsibility of the caller to ensure that the target buffer is large
0062  * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
0063  *
0064  * Return:  string length after conversion
0065  */
0066 static int
0067 cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
0068          bool mapchar)
0069 {
0070     int len = 1;
0071
0072     if (!mapchar)
0073         goto cp_convert;
0074
0075     /*
0076      * BB: Cannot handle remapping UNI_SLASH until all the calls to
0077      *     build_path_from_dentry are modified, as they use slash as
0078      *     separator.
0079      */
0080     switch (src_char) {
0081     case UNI_COLON:
0082         *target = ':';
0083         break;
0084     case UNI_ASTERISK:
0085         *target = '*';
0086         break;
0087     case UNI_QUESTION:
0088         *target = '?';
0089         break;
0090     case UNI_PIPE:
0091         *target = '|';
0092         break;
0093     case UNI_GRTRTHAN:
0094         *target = '>';
0095         break;
0096     case UNI_LESSTHAN:
0097         *target = '<';
0098         break;
0099     default:
0100         goto cp_convert;
0101     }
0102
0103 out:
0104     return len;
0105
0106 cp_convert:
0107     len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
0108     if (len <= 0) {
0109         *target = '?';
0110         len = 1;
0111     }
0112
0113     goto out;
0114 }
0115
0116 /*
0117  * is_char_allowed() - check for valid character
0118  * @ch:     input character to be checked
0119  *
0120  * Return:  1 if char is allowed, otherwise 0
0121  */
0122 static inline int is_char_allowed(char *ch)
0123 {
0124     /* check for control chars, wildcards etc. */
0125     if (!(*ch & 0x80) &&
0126         (*ch <= 0x1f ||
0127          *ch == '?' || *ch == '"' || *ch == '<' ||
0128          *ch == '>' || *ch == '|'))
0129         return 0;
0130
0131     return 1;
0132 }
0133
0134 /*
0135  * smb_from_utf16() - convert utf16le string to local charset
0136  * @to:     destination buffer
0137  * @from:   source buffer
0138  * @tolen:  destination buffer size (in bytes)
0139  * @fromlen:    source buffer size (in bytes)
0140  * @codepage:   codepage to which characters should be converted
0141  * @mapchar:    should characters be remapped according to the mapchars option?
0142  *
0143  * Convert a little-endian utf16le string (as sent by the server) to a string
0144  * in the provided codepage. The tolen and fromlen parameters are to ensure
0145  * that the code doesn't walk off of the end of the buffer (which is always
0146  * a danger if the alignment of the source buffer is off). The destination
0147  * string is always properly null terminated and fits in the destination
0148  * buffer. Returns the length of the destination string in bytes (including
0149  * null terminator).
0150  *
0151  * Note that some windows versions actually send multiword UTF-16 characters
0152  * instead of straight UTF16-2. The linux nls routines however aren't able to
0153  * deal with those characters properly. In the event that we get some of
0154  * those characters, they won't be translated properly.
0155  *
0156  * Return:  string length after conversion
0157  */
0158 static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
0159               const struct nls_table *codepage, bool mapchar)
0160 {
0161     int i, charlen, safelen;
0162     int outlen = 0;
0163     int nullsize = nls_nullsize(codepage);
0164     int fromwords = fromlen / 2;
0165     char tmp[NLS_MAX_CHARSET_SIZE];
0166     __u16 ftmp;
0167
0168     /*
0169      * because the chars can be of varying widths, we need to take care
0170      * not to overflow the destination buffer when we get close to the
0171      * end of it. Until we get to this offset, we don't need to check
0172      * for overflow however.
0173      */
0174     safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
0175
0176     for (i = 0; i < fromwords; i++) {
0177         ftmp = get_unaligned_le16(&from[i]);
0178         if (ftmp == 0)
0179             break;
0180
0181         /*
0182          * check to see if converting this character might make the
0183          * conversion bleed into the null terminator
0184          */
0185         if (outlen >= safelen) {
0186             charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar);
0187             if ((outlen + charlen) > (tolen - nullsize))
0188                 break;
0189         }
0190
0191         /* put converted char into 'to' buffer */
0192         charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);
0193         outlen += charlen;
0194     }
0195
0196     /* properly null-terminate string */
0197     for (i = 0; i < nullsize; i++)
0198         to[outlen++] = 0;
0199
0200     return outlen;
0201 }
0202
0203 /*
0204  * smb_strtoUTF16() - Convert character string to unicode string
0205  * @to:     destination buffer
0206  * @from:   source buffer
0207  * @len:    destination buffer size (in bytes)
0208  * @codepage:   codepage to which characters should be converted
0209  *
0210  * Return:  string length after conversion
0211  */
0212 int smb_strtoUTF16(__le16 *to, const char *from, int len,
0213            const struct nls_table *codepage)
0214 {
0215     int charlen;
0216     int i;
0217     wchar_t wchar_to; /* needed to quiet sparse */
0218
0219     /* special case for utf8 to handle no plane0 chars */
0220     if (!strcmp(codepage->charset, "utf8")) {
0221         /*
0222          * convert utf8 -> utf16, we assume we have enough space
0223          * as caller should have assumed conversion does not overflow
0224          * in destination len is length in wchar_t units (16bits)
0225          */
0226         i  = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
0227                      (wchar_t *)to, len);
0228
0229         /* if success terminate and exit */
0230         if (i >= 0)
0231             goto success;
0232         /*
0233          * if fails fall back to UCS encoding as this
0234          * function should not return negative values
0235          * currently can fail only if source contains
0236          * invalid encoded characters
0237          */
0238     }
0239
0240     for (i = 0; len > 0 && *from; i++, from += charlen, len -= charlen) {
0241         charlen = codepage->char2uni(from, len, &wchar_to);
0242         if (charlen < 1) {
0243             /* A question mark */
0244             wchar_to = 0x003f;
0245             charlen = 1;
0246         }
0247         put_unaligned_le16(wchar_to, &to[i]);
0248     }
0249
0250 success:
0251     put_unaligned_le16(0, &to[i]);
0252     return i;
0253 }
0254
0255 /*
0256  * smb_strndup_from_utf16() - copy a string from wire format to the local
0257  *      codepage
0258  * @src:    source string
0259  * @maxlen: don't walk past this many bytes in the source string
0260  * @is_unicode: is this a unicode string?
0261  * @codepage:   destination codepage
0262  *
0263  * Take a string given by the server, convert it to the local codepage and
0264  * put it in a new buffer. Returns a pointer to the new string or NULL on
0265  * error.
0266  *
0267  * Return:  destination string buffer or error ptr
0268  */
0269 char *smb_strndup_from_utf16(const char *src, const int maxlen,
0270                  const bool is_unicode,
0271                  const struct nls_table *codepage)
0272 {
0273     int len, ret;
0274     char *dst;
0275
0276     if (is_unicode) {
0277         len = smb_utf16_bytes((__le16 *)src, maxlen, codepage);
0278         len += nls_nullsize(codepage);
0279         dst = kmalloc(len, GFP_KERNEL);
0280         if (!dst)
0281             return ERR_PTR(-ENOMEM);
0282         ret = smb_from_utf16(dst, (__le16 *)src, len, maxlen, codepage,
0283                      false);
0284         if (ret < 0) {
0285             kfree(dst);
0286             return ERR_PTR(-EINVAL);
0287         }
0288     } else {
0289         len = strnlen(src, maxlen);
0290         len++;
0291         dst = kmalloc(len, GFP_KERNEL);
0292         if (!dst)
0293             return ERR_PTR(-ENOMEM);
0294         strscpy(dst, src, len);
0295     }
0296
0297     return dst;
0298 }
0299
0300 /*
0301  * Convert 16 bit Unicode pathname to wire format from string in current code
0302  * page. Conversion may involve remapping up the six characters that are
0303  * only legal in POSIX-like OS (if they are present in the string). Path
0304  * names are little endian 16 bit Unicode on the wire
0305  */
0306 /*
0307  * smbConvertToUTF16() - convert string from local charset to utf16
0308  * @target: destination buffer
0309  * @source: source buffer
0310  * @srclen: source buffer size (in bytes)
0311  * @cp:     codepage to which characters should be converted
0312  * @mapchar:    should characters be remapped according to the mapchars option?
0313  *
0314  * Convert 16 bit Unicode pathname to wire format from string in current code
0315  * page. Conversion may involve remapping up the six characters that are
0316  * only legal in POSIX-like OS (if they are present in the string). Path
0317  * names are little endian 16 bit Unicode on the wire
0318  *
0319  * Return:  char length after conversion
0320  */
0321 int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
0322               const struct nls_table *cp, int mapchars)
0323 {
0324     int i, j, charlen;
0325     char src_char;
0326     __le16 dst_char;
0327     wchar_t tmp;
0328
0329     if (!mapchars)
0330         return smb_strtoUTF16(target, source, srclen, cp);
0331
0332     for (i = 0, j = 0; i < srclen; j++) {
0333         src_char = source[i];
0334         charlen = 1;
0335         switch (src_char) {
0336         case 0:
0337             put_unaligned(0, &target[j]);
0338             return j;
0339         case ':':
0340             dst_char = cpu_to_le16(UNI_COLON);
0341             break;
0342         case '*':
0343             dst_char = cpu_to_le16(UNI_ASTERISK);
0344             break;
0345         case '?':
0346             dst_char = cpu_to_le16(UNI_QUESTION);
0347             break;
0348         case '<':
0349             dst_char = cpu_to_le16(UNI_LESSTHAN);
0350             break;
0351         case '>':
0352             dst_char = cpu_to_le16(UNI_GRTRTHAN);
0353             break;
0354         case '|':
0355             dst_char = cpu_to_le16(UNI_PIPE);
0356             break;
0357         /*
0358          * FIXME: We can not handle remapping backslash (UNI_SLASH)
0359          * until all the calls to build_path_from_dentry are modified,
0360          * as they use backslash as separator.
0361          */
0362         default:
0363             charlen = cp->char2uni(source + i, srclen - i, &tmp);
0364             dst_char = cpu_to_le16(tmp);
0365
0366             /*
0367              * if no match, use question mark, which at least in
0368              * some cases serves as wild card
0369              */
0370             if (charlen < 1) {
0371                 dst_char = cpu_to_le16(0x003f);
0372                 charlen = 1;
0373             }
0374         }
0375         /*
0376          * character may take more than one byte in the source string,
0377          * but will take exactly two bytes in the target string
0378          */
0379         i += charlen;
0380         put_unaligned(dst_char, &target[j]);
0381     }
0382
0383     return j;
0384 }