Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
0004  *
0005  * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
0006  */
0007 
0008 #include <asm/hwcap.h>
0009 #include <asm/neon.h>
0010 #include <asm/simd.h>
0011 #include <asm/unaligned.h>
0012 #include <crypto/algapi.h>
0013 #include <crypto/internal/hash.h>
0014 #include <crypto/internal/poly1305.h>
0015 #include <crypto/internal/simd.h>
0016 #include <linux/cpufeature.h>
0017 #include <linux/crypto.h>
0018 #include <linux/jump_label.h>
0019 #include <linux/module.h>
0020 
0021 asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
0022 asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
0023 asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
0024 asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
0025 
0026 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
0027 
0028 void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
0029 {
0030     poly1305_init_arm64(&dctx->h, key);
0031     dctx->s[0] = get_unaligned_le32(key + 16);
0032     dctx->s[1] = get_unaligned_le32(key + 20);
0033     dctx->s[2] = get_unaligned_le32(key + 24);
0034     dctx->s[3] = get_unaligned_le32(key + 28);
0035     dctx->buflen = 0;
0036 }
0037 EXPORT_SYMBOL(poly1305_init_arch);
0038 
0039 static int neon_poly1305_init(struct shash_desc *desc)
0040 {
0041     struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
0042 
0043     dctx->buflen = 0;
0044     dctx->rset = 0;
0045     dctx->sset = false;
0046 
0047     return 0;
0048 }
0049 
0050 static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
0051                  u32 len, u32 hibit, bool do_neon)
0052 {
0053     if (unlikely(!dctx->sset)) {
0054         if (!dctx->rset) {
0055             poly1305_init_arm64(&dctx->h, src);
0056             src += POLY1305_BLOCK_SIZE;
0057             len -= POLY1305_BLOCK_SIZE;
0058             dctx->rset = 1;
0059         }
0060         if (len >= POLY1305_BLOCK_SIZE) {
0061             dctx->s[0] = get_unaligned_le32(src +  0);
0062             dctx->s[1] = get_unaligned_le32(src +  4);
0063             dctx->s[2] = get_unaligned_le32(src +  8);
0064             dctx->s[3] = get_unaligned_le32(src + 12);
0065             src += POLY1305_BLOCK_SIZE;
0066             len -= POLY1305_BLOCK_SIZE;
0067             dctx->sset = true;
0068         }
0069         if (len < POLY1305_BLOCK_SIZE)
0070             return;
0071     }
0072 
0073     len &= ~(POLY1305_BLOCK_SIZE - 1);
0074 
0075     if (static_branch_likely(&have_neon) && likely(do_neon))
0076         poly1305_blocks_neon(&dctx->h, src, len, hibit);
0077     else
0078         poly1305_blocks(&dctx->h, src, len, hibit);
0079 }
0080 
0081 static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
0082                     const u8 *src, u32 len, bool do_neon)
0083 {
0084     if (unlikely(dctx->buflen)) {
0085         u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
0086 
0087         memcpy(dctx->buf + dctx->buflen, src, bytes);
0088         src += bytes;
0089         len -= bytes;
0090         dctx->buflen += bytes;
0091 
0092         if (dctx->buflen == POLY1305_BLOCK_SIZE) {
0093             neon_poly1305_blocks(dctx, dctx->buf,
0094                          POLY1305_BLOCK_SIZE, 1, false);
0095             dctx->buflen = 0;
0096         }
0097     }
0098 
0099     if (likely(len >= POLY1305_BLOCK_SIZE)) {
0100         neon_poly1305_blocks(dctx, src, len, 1, do_neon);
0101         src += round_down(len, POLY1305_BLOCK_SIZE);
0102         len %= POLY1305_BLOCK_SIZE;
0103     }
0104 
0105     if (unlikely(len)) {
0106         dctx->buflen = len;
0107         memcpy(dctx->buf, src, len);
0108     }
0109 }
0110 
0111 static int neon_poly1305_update(struct shash_desc *desc,
0112                 const u8 *src, unsigned int srclen)
0113 {
0114     bool do_neon = crypto_simd_usable() && srclen > 128;
0115     struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
0116 
0117     if (static_branch_likely(&have_neon) && do_neon)
0118         kernel_neon_begin();
0119     neon_poly1305_do_update(dctx, src, srclen, do_neon);
0120     if (static_branch_likely(&have_neon) && do_neon)
0121         kernel_neon_end();
0122     return 0;
0123 }
0124 
0125 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
0126               unsigned int nbytes)
0127 {
0128     if (unlikely(dctx->buflen)) {
0129         u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
0130 
0131         memcpy(dctx->buf + dctx->buflen, src, bytes);
0132         src += bytes;
0133         nbytes -= bytes;
0134         dctx->buflen += bytes;
0135 
0136         if (dctx->buflen == POLY1305_BLOCK_SIZE) {
0137             poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
0138             dctx->buflen = 0;
0139         }
0140     }
0141 
0142     if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
0143         unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
0144 
0145         if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
0146             do {
0147                 unsigned int todo = min_t(unsigned int, len, SZ_4K);
0148 
0149                 kernel_neon_begin();
0150                 poly1305_blocks_neon(&dctx->h, src, todo, 1);
0151                 kernel_neon_end();
0152 
0153                 len -= todo;
0154                 src += todo;
0155             } while (len);
0156         } else {
0157             poly1305_blocks(&dctx->h, src, len, 1);
0158             src += len;
0159         }
0160         nbytes %= POLY1305_BLOCK_SIZE;
0161     }
0162 
0163     if (unlikely(nbytes)) {
0164         dctx->buflen = nbytes;
0165         memcpy(dctx->buf, src, nbytes);
0166     }
0167 }
0168 EXPORT_SYMBOL(poly1305_update_arch);
0169 
0170 void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
0171 {
0172     if (unlikely(dctx->buflen)) {
0173         dctx->buf[dctx->buflen++] = 1;
0174         memset(dctx->buf + dctx->buflen, 0,
0175                POLY1305_BLOCK_SIZE - dctx->buflen);
0176         poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
0177     }
0178 
0179     poly1305_emit(&dctx->h, dst, dctx->s);
0180     memzero_explicit(dctx, sizeof(*dctx));
0181 }
0182 EXPORT_SYMBOL(poly1305_final_arch);
0183 
0184 static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
0185 {
0186     struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
0187 
0188     if (unlikely(!dctx->sset))
0189         return -ENOKEY;
0190 
0191     poly1305_final_arch(dctx, dst);
0192     return 0;
0193 }
0194 
0195 static struct shash_alg neon_poly1305_alg = {
0196     .init           = neon_poly1305_init,
0197     .update         = neon_poly1305_update,
0198     .final          = neon_poly1305_final,
0199     .digestsize     = POLY1305_DIGEST_SIZE,
0200     .descsize       = sizeof(struct poly1305_desc_ctx),
0201 
0202     .base.cra_name      = "poly1305",
0203     .base.cra_driver_name   = "poly1305-neon",
0204     .base.cra_priority  = 200,
0205     .base.cra_blocksize = POLY1305_BLOCK_SIZE,
0206     .base.cra_module    = THIS_MODULE,
0207 };
0208 
0209 static int __init neon_poly1305_mod_init(void)
0210 {
0211     if (!cpu_have_named_feature(ASIMD))
0212         return 0;
0213 
0214     static_branch_enable(&have_neon);
0215 
0216     return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
0217         crypto_register_shash(&neon_poly1305_alg) : 0;
0218 }
0219 
0220 static void __exit neon_poly1305_mod_exit(void)
0221 {
0222     if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD))
0223         crypto_unregister_shash(&neon_poly1305_alg);
0224 }
0225 
0226 module_init(neon_poly1305_mod_init);
0227 module_exit(neon_poly1305_mod_exit);
0228 
0229 MODULE_LICENSE("GPL v2");
0230 MODULE_ALIAS_CRYPTO("poly1305");
0231 MODULE_ALIAS_CRYPTO("poly1305-neon");