fs/cifs/cifs_unicode.h

0001 /* SPDX-License-Identifier: GPL-2.0-or-later */
0002 /*
0003  * cifs_unicode:  Unicode kernel case support
0004  *
0005  * Function:
0006  *     Convert a unicode character to upper or lower case using
0007  *     compressed tables.
0008  *
0009  *   Copyright (c) International Business Machines  Corp., 2000,2009
0010  *
0011  * Notes:
0012  *     These APIs are based on the C library functions.  The semantics
0013  *     should match the C functions but with expanded size operands.
0014  *
0015  *     The upper/lower functions are based on a table created by mkupr.
0016  *     This is a compressed table of upper and lower case conversion.
0017  */
0018 #ifndef _CIFS_UNICODE_H
0019 #define _CIFS_UNICODE_H
0020
0021 #include <asm/byteorder.h>
0022 #include <linux/types.h>
0023 #include <linux/nls.h>
0024
0025 #define  UNIUPR_NOLOWER     /* Example to not expand lower case tables */
0026
0027 /*
0028  * Windows maps these to the user defined 16 bit Unicode range since they are
0029  * reserved symbols (along with \ and /), otherwise illegal to store
0030  * in filenames in NTFS
0031  */
0032 #define UNI_ASTERISK    (__u16) ('*' + 0xF000)
0033 #define UNI_QUESTION    (__u16) ('?' + 0xF000)
0034 #define UNI_COLON       (__u16) (':' + 0xF000)
0035 #define UNI_GRTRTHAN    (__u16) ('>' + 0xF000)
0036 #define UNI_LESSTHAN    (__u16) ('<' + 0xF000)
0037 #define UNI_PIPE        (__u16) ('|' + 0xF000)
0038 #define UNI_SLASH       (__u16) ('\\' + 0xF000)
0039
0040 /*
0041  * Macs use an older "SFM" mapping of the symbols above. Fortunately it does
0042  * not conflict (although almost does) with the mapping above.
0043  */
0044
0045 #define SFM_DOUBLEQUOTE ((__u16) 0xF020)
0046 #define SFM_ASTERISK    ((__u16) 0xF021)
0047 #define SFM_QUESTION    ((__u16) 0xF025)
0048 #define SFM_COLON       ((__u16) 0xF022)
0049 #define SFM_GRTRTHAN    ((__u16) 0xF024)
0050 #define SFM_LESSTHAN    ((__u16) 0xF023)
0051 #define SFM_PIPE        ((__u16) 0xF027)
0052 #define SFM_SLASH       ((__u16) 0xF026)
0053 #define SFM_SPACE   ((__u16) 0xF028)
0054 #define SFM_PERIOD  ((__u16) 0xF029)
0055
0056 /*
0057  * Mapping mechanism to use when one of the seven reserved characters is
0058  * encountered.  We can only map using one of the mechanisms at a time
0059  * since otherwise readdir could return directory entries which we would
0060  * not be able to open
0061  *
0062  * NO_MAP_UNI_RSVD  = do not perform any remapping of the character
0063  * SFM_MAP_UNI_RSVD = map reserved characters using SFM scheme (MAC compatible)
0064  * SFU_MAP_UNI_RSVD = map reserved characters ala SFU ("mapchars" option)
0065  *
0066  */
0067 #define NO_MAP_UNI_RSVD     0
0068 #define SFM_MAP_UNI_RSVD    1
0069 #define SFU_MAP_UNI_RSVD    2
0070
0071 /* Just define what we want from uniupr.h.  We don't want to define the tables
0072  * in each source file.
0073  */
0074 #ifndef UNICASERANGE_DEFINED
0075 struct UniCaseRange {
0076     wchar_t start;
0077     wchar_t end;
0078     signed char *table;
0079 };
0080 #endif              /* UNICASERANGE_DEFINED */
0081
0082 #ifndef UNIUPR_NOUPPER
0083 extern signed char CifsUniUpperTable[512];
0084 extern const struct UniCaseRange CifsUniUpperRange[];
0085 #endif              /* UNIUPR_NOUPPER */
0086
0087 #ifndef UNIUPR_NOLOWER
0088 extern signed char CifsUniLowerTable[512];
0089 extern const struct UniCaseRange CifsUniLowerRange[];
0090 #endif              /* UNIUPR_NOLOWER */
0091
0092 #ifdef __KERNEL__
0093 int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
0094             const struct nls_table *cp, int map_type);
0095 int cifs_utf16_bytes(const __le16 *from, int maxbytes,
0096              const struct nls_table *codepage);
0097 int cifs_strtoUTF16(__le16 *, const char *, int, const struct nls_table *);
0098 char *cifs_strndup_from_utf16(const char *src, const int maxlen,
0099                   const bool is_unicode,
0100                   const struct nls_table *codepage);
0101 extern int cifsConvertToUTF16(__le16 *target, const char *source, int maxlen,
0102                   const struct nls_table *cp, int mapChars);
0103 extern int cifs_remap(struct cifs_sb_info *cifs_sb);
0104 extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen,
0105                      int *utf16_len, const struct nls_table *cp,
0106                      int remap);
0107 #endif
0108
0109 wchar_t cifs_toupper(wchar_t in);
0110
0111 /*
0112  * UniStrcat:  Concatenate the second string to the first
0113  *
0114  * Returns:
0115  *     Address of the first string
0116  */
0117 static inline __le16 *
0118 UniStrcat(__le16 *ucs1, const __le16 *ucs2)
0119 {
0120     __le16 *anchor = ucs1;  /* save a pointer to start of ucs1 */
0121
0122     while (*ucs1++) ;   /* To end of first string */
0123     ucs1--;         /* Return to the null */
0124     while ((*ucs1++ = *ucs2++)) ;   /* copy string 2 over */
0125     return anchor;
0126 }
0127
0128 /*
0129  * UniStrchr:  Find a character in a string
0130  *
0131  * Returns:
0132  *     Address of first occurrence of character in string
0133  *     or NULL if the character is not in the string
0134  */
0135 static inline wchar_t *
0136 UniStrchr(const wchar_t *ucs, wchar_t uc)
0137 {
0138     while ((*ucs != uc) && *ucs)
0139         ucs++;
0140
0141     if (*ucs == uc)
0142         return (wchar_t *) ucs;
0143     return NULL;
0144 }
0145
0146 /*
0147  * UniStrcmp:  Compare two strings
0148  *
0149  * Returns:
0150  *     < 0:  First string is less than second
0151  *     = 0:  Strings are equal
0152  *     > 0:  First string is greater than second
0153  */
0154 static inline int
0155 UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2)
0156 {
0157     while ((*ucs1 == *ucs2) && *ucs1) {
0158         ucs1++;
0159         ucs2++;
0160     }
0161     return (int) *ucs1 - (int) *ucs2;
0162 }
0163
0164 /*
0165  * UniStrcpy:  Copy a string
0166  */
0167 static inline wchar_t *
0168 UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2)
0169 {
0170     wchar_t *anchor = ucs1; /* save the start of result string */
0171
0172     while ((*ucs1++ = *ucs2++)) ;
0173     return anchor;
0174 }
0175
0176 /*
0177  * UniStrlen:  Return the length of a string (in 16 bit Unicode chars not bytes)
0178  */
0179 static inline size_t
0180 UniStrlen(const wchar_t *ucs1)
0181 {
0182     int i = 0;
0183
0184     while (*ucs1++)
0185         i++;
0186     return i;
0187 }
0188
0189 /*
0190  * UniStrnlen:  Return the length (in 16 bit Unicode chars not bytes) of a
0191  *      string (length limited)
0192  */
0193 static inline size_t
0194 UniStrnlen(const wchar_t *ucs1, int maxlen)
0195 {
0196     int i = 0;
0197
0198     while (*ucs1++) {
0199         i++;
0200         if (i >= maxlen)
0201             break;
0202     }
0203     return i;
0204 }
0205
0206 /*
0207  * UniStrncat:  Concatenate length limited string
0208  */
0209 static inline wchar_t *
0210 UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
0211 {
0212     wchar_t *anchor = ucs1; /* save pointer to string 1 */
0213
0214     while (*ucs1++) ;
0215     ucs1--;         /* point to null terminator of s1 */
0216     while (n-- && (*ucs1 = *ucs2)) {    /* copy s2 after s1 */
0217         ucs1++;
0218         ucs2++;
0219     }
0220     *ucs1 = 0;      /* Null terminate the result */
0221     return (anchor);
0222 }
0223
0224 /*
0225  * UniStrncmp:  Compare length limited string
0226  */
0227 static inline int
0228 UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
0229 {
0230     if (!n)
0231         return 0;   /* Null strings are equal */
0232     while ((*ucs1 == *ucs2) && *ucs1 && --n) {
0233         ucs1++;
0234         ucs2++;
0235     }
0236     return (int) *ucs1 - (int) *ucs2;
0237 }
0238
0239 /*
0240  * UniStrncmp_le:  Compare length limited string - native to little-endian
0241  */
0242 static inline int
0243 UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n)
0244 {
0245     if (!n)
0246         return 0;   /* Null strings are equal */
0247     while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) {
0248         ucs1++;
0249         ucs2++;
0250     }
0251     return (int) *ucs1 - (int) __le16_to_cpu(*ucs2);
0252 }
0253
0254 /*
0255  * UniStrncpy:  Copy length limited string with pad
0256  */
0257 static inline wchar_t *
0258 UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
0259 {
0260     wchar_t *anchor = ucs1;
0261
0262     while (n-- && *ucs2)    /* Copy the strings */
0263         *ucs1++ = *ucs2++;
0264
0265     n++;
0266     while (n--)     /* Pad with nulls */
0267         *ucs1++ = 0;
0268     return anchor;
0269 }
0270
0271 /*
0272  * UniStrncpy_le:  Copy length limited string with pad to little-endian
0273  */
0274 static inline wchar_t *
0275 UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n)
0276 {
0277     wchar_t *anchor = ucs1;
0278
0279     while (n-- && *ucs2)    /* Copy the strings */
0280         *ucs1++ = __le16_to_cpu(*ucs2++);
0281
0282     n++;
0283     while (n--)     /* Pad with nulls */
0284         *ucs1++ = 0;
0285     return anchor;
0286 }
0287
0288 /*
0289  * UniStrstr:  Find a string in a string
0290  *
0291  * Returns:
0292  *     Address of first match found
0293  *     NULL if no matching string is found
0294  */
0295 static inline wchar_t *
0296 UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2)
0297 {
0298     const wchar_t *anchor1 = ucs1;
0299     const wchar_t *anchor2 = ucs2;
0300
0301     while (*ucs1) {
0302         if (*ucs1 == *ucs2) {
0303             /* Partial match found */
0304             ucs1++;
0305             ucs2++;
0306         } else {
0307             if (!*ucs2) /* Match found */
0308                 return (wchar_t *) anchor1;
0309             ucs1 = ++anchor1;   /* No match */
0310             ucs2 = anchor2;
0311         }
0312     }
0313
0314     if (!*ucs2)     /* Both end together */
0315         return (wchar_t *) anchor1; /* Match found */
0316     return NULL;        /* No match */
0317 }
0318
0319 #ifndef UNIUPR_NOUPPER
0320 /*
0321  * UniToupper:  Convert a unicode character to upper case
0322  */
0323 static inline wchar_t
0324 UniToupper(register wchar_t uc)
0325 {
0326     register const struct UniCaseRange *rp;
0327
0328     if (uc < sizeof(CifsUniUpperTable)) {
0329         /* Latin characters */
0330         return uc + CifsUniUpperTable[uc];  /* Use base tables */
0331     } else {
0332         rp = CifsUniUpperRange; /* Use range tables */
0333         while (rp->start) {
0334             if (uc < rp->start) /* Before start of range */
0335                 return uc;  /* Uppercase = input */
0336             if (uc <= rp->end)  /* In range */
0337                 return uc + rp->table[uc - rp->start];
0338             rp++;   /* Try next range */
0339         }
0340     }
0341     return uc;      /* Past last range */
0342 }
0343
0344 /*
0345  * UniStrupr:  Upper case a unicode string
0346  */
0347 static inline __le16 *
0348 UniStrupr(register __le16 *upin)
0349 {
0350     register __le16 *up;
0351
0352     up = upin;
0353     while (*up) {       /* For all characters */
0354         *up = cpu_to_le16(UniToupper(le16_to_cpu(*up)));
0355         up++;
0356     }
0357     return upin;        /* Return input pointer */
0358 }
0359 #endif              /* UNIUPR_NOUPPER */
0360
0361 #ifndef UNIUPR_NOLOWER
0362 /*
0363  * UniTolower:  Convert a unicode character to lower case
0364  */
0365 static inline wchar_t
0366 UniTolower(register wchar_t uc)
0367 {
0368     register const struct UniCaseRange *rp;
0369
0370     if (uc < sizeof(CifsUniLowerTable)) {
0371         /* Latin characters */
0372         return uc + CifsUniLowerTable[uc];  /* Use base tables */
0373     } else {
0374         rp = CifsUniLowerRange; /* Use range tables */
0375         while (rp->start) {
0376             if (uc < rp->start) /* Before start of range */
0377                 return uc;  /* Uppercase = input */
0378             if (uc <= rp->end)  /* In range */
0379                 return uc + rp->table[uc - rp->start];
0380             rp++;   /* Try next range */
0381         }
0382     }
0383     return uc;      /* Past last range */
0384 }
0385
0386 /*
0387  * UniStrlwr:  Lower case a unicode string
0388  */
0389 static inline wchar_t *
0390 UniStrlwr(register wchar_t *upin)
0391 {
0392     register wchar_t *up;
0393
0394     up = upin;
0395     while (*up) {       /* For all characters */
0396         *up = UniTolower(*up);
0397         up++;
0398     }
0399     return upin;        /* Return input pointer */
0400 }
0401
0402 #endif
0403
0404 #endif /* _CIFS_UNICODE_H */