Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * Cryptographic API.
0004  *
0005  * Support for VIA PadLock hardware crypto engine.
0006  *
0007  * Copyright (c) 2006  Michal Ludvig <michal@logix.cz>
0008  */
0009 
0010 #include <crypto/internal/hash.h>
0011 #include <crypto/padlock.h>
0012 #include <crypto/sha1.h>
0013 #include <crypto/sha2.h>
0014 #include <linux/err.h>
0015 #include <linux/module.h>
0016 #include <linux/init.h>
0017 #include <linux/errno.h>
0018 #include <linux/interrupt.h>
0019 #include <linux/kernel.h>
0020 #include <linux/scatterlist.h>
0021 #include <asm/cpu_device_id.h>
0022 #include <asm/fpu/api.h>
0023 
0024 struct padlock_sha_desc {
0025     struct shash_desc fallback;
0026 };
0027 
0028 struct padlock_sha_ctx {
0029     struct crypto_shash *fallback;
0030 };
0031 
0032 static int padlock_sha_init(struct shash_desc *desc)
0033 {
0034     struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
0035     struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
0036 
0037     dctx->fallback.tfm = ctx->fallback;
0038     return crypto_shash_init(&dctx->fallback);
0039 }
0040 
0041 static int padlock_sha_update(struct shash_desc *desc,
0042                   const u8 *data, unsigned int length)
0043 {
0044     struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
0045 
0046     return crypto_shash_update(&dctx->fallback, data, length);
0047 }
0048 
0049 static int padlock_sha_export(struct shash_desc *desc, void *out)
0050 {
0051     struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
0052 
0053     return crypto_shash_export(&dctx->fallback, out);
0054 }
0055 
0056 static int padlock_sha_import(struct shash_desc *desc, const void *in)
0057 {
0058     struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
0059     struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
0060 
0061     dctx->fallback.tfm = ctx->fallback;
0062     return crypto_shash_import(&dctx->fallback, in);
0063 }
0064 
0065 static inline void padlock_output_block(uint32_t *src,
0066             uint32_t *dst, size_t count)
0067 {
0068     while (count--)
0069         *dst++ = swab32(*src++);
0070 }
0071 
0072 static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
0073                   unsigned int count, u8 *out)
0074 {
0075     /* We can't store directly to *out as it may be unaligned. */
0076     /* BTW Don't reduce the buffer size below 128 Bytes!
0077      *     PadLock microcode needs it that big. */
0078     char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
0079         ((aligned(STACK_ALIGN)));
0080     char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
0081     struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
0082     struct sha1_state state;
0083     unsigned int space;
0084     unsigned int leftover;
0085     int err;
0086 
0087     err = crypto_shash_export(&dctx->fallback, &state);
0088     if (err)
0089         goto out;
0090 
0091     if (state.count + count > ULONG_MAX)
0092         return crypto_shash_finup(&dctx->fallback, in, count, out);
0093 
0094     leftover = ((state.count - 1) & (SHA1_BLOCK_SIZE - 1)) + 1;
0095     space =  SHA1_BLOCK_SIZE - leftover;
0096     if (space) {
0097         if (count > space) {
0098             err = crypto_shash_update(&dctx->fallback, in, space) ?:
0099                   crypto_shash_export(&dctx->fallback, &state);
0100             if (err)
0101                 goto out;
0102             count -= space;
0103             in += space;
0104         } else {
0105             memcpy(state.buffer + leftover, in, count);
0106             in = state.buffer;
0107             count += leftover;
0108             state.count &= ~(SHA1_BLOCK_SIZE - 1);
0109         }
0110     }
0111 
0112     memcpy(result, &state.state, SHA1_DIGEST_SIZE);
0113 
0114     asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
0115               : \
0116               : "c"((unsigned long)state.count + count), \
0117             "a"((unsigned long)state.count), \
0118             "S"(in), "D"(result));
0119 
0120     padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
0121 
0122 out:
0123     return err;
0124 }
0125 
0126 static int padlock_sha1_final(struct shash_desc *desc, u8 *out)
0127 {
0128     u8 buf[4];
0129 
0130     return padlock_sha1_finup(desc, buf, 0, out);
0131 }
0132 
0133 static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
0134                 unsigned int count, u8 *out)
0135 {
0136     /* We can't store directly to *out as it may be unaligned. */
0137     /* BTW Don't reduce the buffer size below 128 Bytes!
0138      *     PadLock microcode needs it that big. */
0139     char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
0140         ((aligned(STACK_ALIGN)));
0141     char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
0142     struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
0143     struct sha256_state state;
0144     unsigned int space;
0145     unsigned int leftover;
0146     int err;
0147 
0148     err = crypto_shash_export(&dctx->fallback, &state);
0149     if (err)
0150         goto out;
0151 
0152     if (state.count + count > ULONG_MAX)
0153         return crypto_shash_finup(&dctx->fallback, in, count, out);
0154 
0155     leftover = ((state.count - 1) & (SHA256_BLOCK_SIZE - 1)) + 1;
0156     space =  SHA256_BLOCK_SIZE - leftover;
0157     if (space) {
0158         if (count > space) {
0159             err = crypto_shash_update(&dctx->fallback, in, space) ?:
0160                   crypto_shash_export(&dctx->fallback, &state);
0161             if (err)
0162                 goto out;
0163             count -= space;
0164             in += space;
0165         } else {
0166             memcpy(state.buf + leftover, in, count);
0167             in = state.buf;
0168             count += leftover;
0169             state.count &= ~(SHA1_BLOCK_SIZE - 1);
0170         }
0171     }
0172 
0173     memcpy(result, &state.state, SHA256_DIGEST_SIZE);
0174 
0175     asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
0176               : \
0177               : "c"((unsigned long)state.count + count), \
0178             "a"((unsigned long)state.count), \
0179             "S"(in), "D"(result));
0180 
0181     padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
0182 
0183 out:
0184     return err;
0185 }
0186 
0187 static int padlock_sha256_final(struct shash_desc *desc, u8 *out)
0188 {
0189     u8 buf[4];
0190 
0191     return padlock_sha256_finup(desc, buf, 0, out);
0192 }
0193 
0194 static int padlock_init_tfm(struct crypto_shash *hash)
0195 {
0196     const char *fallback_driver_name = crypto_shash_alg_name(hash);
0197     struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash);
0198     struct crypto_shash *fallback_tfm;
0199 
0200     /* Allocate a fallback and abort if it failed. */
0201     fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
0202                       CRYPTO_ALG_NEED_FALLBACK);
0203     if (IS_ERR(fallback_tfm)) {
0204         printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
0205                fallback_driver_name);
0206         return PTR_ERR(fallback_tfm);
0207     }
0208 
0209     ctx->fallback = fallback_tfm;
0210     hash->descsize += crypto_shash_descsize(fallback_tfm);
0211     return 0;
0212 }
0213 
0214 static void padlock_exit_tfm(struct crypto_shash *hash)
0215 {
0216     struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash);
0217 
0218     crypto_free_shash(ctx->fallback);
0219 }
0220 
0221 static struct shash_alg sha1_alg = {
0222     .digestsize =   SHA1_DIGEST_SIZE,
0223     .init       =   padlock_sha_init,
0224     .update     =   padlock_sha_update,
0225     .finup      =   padlock_sha1_finup,
0226     .final      =   padlock_sha1_final,
0227     .export     =   padlock_sha_export,
0228     .import     =   padlock_sha_import,
0229     .init_tfm   =   padlock_init_tfm,
0230     .exit_tfm   =   padlock_exit_tfm,
0231     .descsize   =   sizeof(struct padlock_sha_desc),
0232     .statesize  =   sizeof(struct sha1_state),
0233     .base       =   {
0234         .cra_name       =   "sha1",
0235         .cra_driver_name    =   "sha1-padlock",
0236         .cra_priority       =   PADLOCK_CRA_PRIORITY,
0237         .cra_flags      =   CRYPTO_ALG_NEED_FALLBACK,
0238         .cra_blocksize      =   SHA1_BLOCK_SIZE,
0239         .cra_ctxsize        =   sizeof(struct padlock_sha_ctx),
0240         .cra_module     =   THIS_MODULE,
0241     }
0242 };
0243 
0244 static struct shash_alg sha256_alg = {
0245     .digestsize =   SHA256_DIGEST_SIZE,
0246     .init       =   padlock_sha_init,
0247     .update     =   padlock_sha_update,
0248     .finup      =   padlock_sha256_finup,
0249     .final      =   padlock_sha256_final,
0250     .export     =   padlock_sha_export,
0251     .import     =   padlock_sha_import,
0252     .init_tfm   =   padlock_init_tfm,
0253     .exit_tfm   =   padlock_exit_tfm,
0254     .descsize   =   sizeof(struct padlock_sha_desc),
0255     .statesize  =   sizeof(struct sha256_state),
0256     .base       =   {
0257         .cra_name       =   "sha256",
0258         .cra_driver_name    =   "sha256-padlock",
0259         .cra_priority       =   PADLOCK_CRA_PRIORITY,
0260         .cra_flags      =   CRYPTO_ALG_NEED_FALLBACK,
0261         .cra_blocksize      =   SHA256_BLOCK_SIZE,
0262         .cra_ctxsize        =   sizeof(struct padlock_sha_ctx),
0263         .cra_module     =   THIS_MODULE,
0264     }
0265 };
0266 
0267 /* Add two shash_alg instance for hardware-implemented *
0268 * multiple-parts hash supported by VIA Nano Processor.*/
0269 static int padlock_sha1_init_nano(struct shash_desc *desc)
0270 {
0271     struct sha1_state *sctx = shash_desc_ctx(desc);
0272 
0273     *sctx = (struct sha1_state){
0274         .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
0275     };
0276 
0277     return 0;
0278 }
0279 
0280 static int padlock_sha1_update_nano(struct shash_desc *desc,
0281             const u8 *data, unsigned int len)
0282 {
0283     struct sha1_state *sctx = shash_desc_ctx(desc);
0284     unsigned int partial, done;
0285     const u8 *src;
0286     /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
0287     u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
0288         ((aligned(STACK_ALIGN)));
0289     u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
0290 
0291     partial = sctx->count & 0x3f;
0292     sctx->count += len;
0293     done = 0;
0294     src = data;
0295     memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE);
0296 
0297     if ((partial + len) >= SHA1_BLOCK_SIZE) {
0298 
0299         /* Append the bytes in state's buffer to a block to handle */
0300         if (partial) {
0301             done = -partial;
0302             memcpy(sctx->buffer + partial, data,
0303                 done + SHA1_BLOCK_SIZE);
0304             src = sctx->buffer;
0305             asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
0306             : "+S"(src), "+D"(dst) \
0307             : "a"((long)-1), "c"((unsigned long)1));
0308             done += SHA1_BLOCK_SIZE;
0309             src = data + done;
0310         }
0311 
0312         /* Process the left bytes from the input data */
0313         if (len - done >= SHA1_BLOCK_SIZE) {
0314             asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
0315             : "+S"(src), "+D"(dst)
0316             : "a"((long)-1),
0317             "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE)));
0318             done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE);
0319             src = data + done;
0320         }
0321         partial = 0;
0322     }
0323     memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE);
0324     memcpy(sctx->buffer + partial, src, len - done);
0325 
0326     return 0;
0327 }
0328 
0329 static int padlock_sha1_final_nano(struct shash_desc *desc, u8 *out)
0330 {
0331     struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc);
0332     unsigned int partial, padlen;
0333     __be64 bits;
0334     static const u8 padding[64] = { 0x80, };
0335 
0336     bits = cpu_to_be64(state->count << 3);
0337 
0338     /* Pad out to 56 mod 64 */
0339     partial = state->count & 0x3f;
0340     padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
0341     padlock_sha1_update_nano(desc, padding, padlen);
0342 
0343     /* Append length field bytes */
0344     padlock_sha1_update_nano(desc, (const u8 *)&bits, sizeof(bits));
0345 
0346     /* Swap to output */
0347     padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5);
0348 
0349     return 0;
0350 }
0351 
0352 static int padlock_sha256_init_nano(struct shash_desc *desc)
0353 {
0354     struct sha256_state *sctx = shash_desc_ctx(desc);
0355 
0356     *sctx = (struct sha256_state){
0357         .state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, \
0358                 SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7},
0359     };
0360 
0361     return 0;
0362 }
0363 
0364 static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data,
0365               unsigned int len)
0366 {
0367     struct sha256_state *sctx = shash_desc_ctx(desc);
0368     unsigned int partial, done;
0369     const u8 *src;
0370     /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
0371     u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
0372         ((aligned(STACK_ALIGN)));
0373     u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
0374 
0375     partial = sctx->count & 0x3f;
0376     sctx->count += len;
0377     done = 0;
0378     src = data;
0379     memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE);
0380 
0381     if ((partial + len) >= SHA256_BLOCK_SIZE) {
0382 
0383         /* Append the bytes in state's buffer to a block to handle */
0384         if (partial) {
0385             done = -partial;
0386             memcpy(sctx->buf + partial, data,
0387                 done + SHA256_BLOCK_SIZE);
0388             src = sctx->buf;
0389             asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
0390             : "+S"(src), "+D"(dst)
0391             : "a"((long)-1), "c"((unsigned long)1));
0392             done += SHA256_BLOCK_SIZE;
0393             src = data + done;
0394         }
0395 
0396         /* Process the left bytes from input data*/
0397         if (len - done >= SHA256_BLOCK_SIZE) {
0398             asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
0399             : "+S"(src), "+D"(dst)
0400             : "a"((long)-1),
0401             "c"((unsigned long)((len - done) / 64)));
0402             done += ((len - done) - (len - done) % 64);
0403             src = data + done;
0404         }
0405         partial = 0;
0406     }
0407     memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE);
0408     memcpy(sctx->buf + partial, src, len - done);
0409 
0410     return 0;
0411 }
0412 
0413 static int padlock_sha256_final_nano(struct shash_desc *desc, u8 *out)
0414 {
0415     struct sha256_state *state =
0416         (struct sha256_state *)shash_desc_ctx(desc);
0417     unsigned int partial, padlen;
0418     __be64 bits;
0419     static const u8 padding[64] = { 0x80, };
0420 
0421     bits = cpu_to_be64(state->count << 3);
0422 
0423     /* Pad out to 56 mod 64 */
0424     partial = state->count & 0x3f;
0425     padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
0426     padlock_sha256_update_nano(desc, padding, padlen);
0427 
0428     /* Append length field bytes */
0429     padlock_sha256_update_nano(desc, (const u8 *)&bits, sizeof(bits));
0430 
0431     /* Swap to output */
0432     padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8);
0433 
0434     return 0;
0435 }
0436 
0437 static int padlock_sha_export_nano(struct shash_desc *desc,
0438                 void *out)
0439 {
0440     int statesize = crypto_shash_statesize(desc->tfm);
0441     void *sctx = shash_desc_ctx(desc);
0442 
0443     memcpy(out, sctx, statesize);
0444     return 0;
0445 }
0446 
0447 static int padlock_sha_import_nano(struct shash_desc *desc,
0448                 const void *in)
0449 {
0450     int statesize = crypto_shash_statesize(desc->tfm);
0451     void *sctx = shash_desc_ctx(desc);
0452 
0453     memcpy(sctx, in, statesize);
0454     return 0;
0455 }
0456 
0457 static struct shash_alg sha1_alg_nano = {
0458     .digestsize =   SHA1_DIGEST_SIZE,
0459     .init       =   padlock_sha1_init_nano,
0460     .update     =   padlock_sha1_update_nano,
0461     .final      =   padlock_sha1_final_nano,
0462     .export     =   padlock_sha_export_nano,
0463     .import     =   padlock_sha_import_nano,
0464     .descsize   =   sizeof(struct sha1_state),
0465     .statesize  =   sizeof(struct sha1_state),
0466     .base       =   {
0467         .cra_name       =   "sha1",
0468         .cra_driver_name    =   "sha1-padlock-nano",
0469         .cra_priority       =   PADLOCK_CRA_PRIORITY,
0470         .cra_blocksize      =   SHA1_BLOCK_SIZE,
0471         .cra_module     =   THIS_MODULE,
0472     }
0473 };
0474 
0475 static struct shash_alg sha256_alg_nano = {
0476     .digestsize =   SHA256_DIGEST_SIZE,
0477     .init       =   padlock_sha256_init_nano,
0478     .update     =   padlock_sha256_update_nano,
0479     .final      =   padlock_sha256_final_nano,
0480     .export     =   padlock_sha_export_nano,
0481     .import     =   padlock_sha_import_nano,
0482     .descsize   =   sizeof(struct sha256_state),
0483     .statesize  =   sizeof(struct sha256_state),
0484     .base       =   {
0485         .cra_name       =   "sha256",
0486         .cra_driver_name    =   "sha256-padlock-nano",
0487         .cra_priority       =   PADLOCK_CRA_PRIORITY,
0488         .cra_blocksize      =   SHA256_BLOCK_SIZE,
0489         .cra_module     =   THIS_MODULE,
0490     }
0491 };
0492 
0493 static const struct x86_cpu_id padlock_sha_ids[] = {
0494     X86_MATCH_FEATURE(X86_FEATURE_PHE, NULL),
0495     {}
0496 };
0497 MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids);
0498 
0499 static int __init padlock_init(void)
0500 {
0501     int rc = -ENODEV;
0502     struct cpuinfo_x86 *c = &cpu_data(0);
0503     struct shash_alg *sha1;
0504     struct shash_alg *sha256;
0505 
0506     if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN))
0507         return -ENODEV;
0508 
0509     /* Register the newly added algorithm module if on *
0510     * VIA Nano processor, or else just do as before */
0511     if (c->x86_model < 0x0f) {
0512         sha1 = &sha1_alg;
0513         sha256 = &sha256_alg;
0514     } else {
0515         sha1 = &sha1_alg_nano;
0516         sha256 = &sha256_alg_nano;
0517     }
0518 
0519     rc = crypto_register_shash(sha1);
0520     if (rc)
0521         goto out;
0522 
0523     rc = crypto_register_shash(sha256);
0524     if (rc)
0525         goto out_unreg1;
0526 
0527     printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n");
0528 
0529     return 0;
0530 
0531 out_unreg1:
0532     crypto_unregister_shash(sha1);
0533 
0534 out:
0535     printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n");
0536     return rc;
0537 }
0538 
0539 static void __exit padlock_fini(void)
0540 {
0541     struct cpuinfo_x86 *c = &cpu_data(0);
0542 
0543     if (c->x86_model >= 0x0f) {
0544         crypto_unregister_shash(&sha1_alg_nano);
0545         crypto_unregister_shash(&sha256_alg_nano);
0546     } else {
0547         crypto_unregister_shash(&sha1_alg);
0548         crypto_unregister_shash(&sha256_alg);
0549     }
0550 }
0551 
0552 module_init(padlock_init);
0553 module_exit(padlock_fini);
0554 
0555 MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support.");
0556 MODULE_LICENSE("GPL");
0557 MODULE_AUTHOR("Michal Ludvig");
0558 
0559 MODULE_ALIAS_CRYPTO("sha1-all");
0560 MODULE_ALIAS_CRYPTO("sha256-all");
0561 MODULE_ALIAS_CRYPTO("sha1-padlock");
0562 MODULE_ALIAS_CRYPTO("sha256-padlock");