0001
0002
0003
0004
0005
0006
0007
0008 #include <asm/hwcap.h>
0009 #include <asm/neon.h>
0010 #include <asm/simd.h>
0011 #include <asm/unaligned.h>
0012 #include <crypto/algapi.h>
0013 #include <crypto/internal/hash.h>
0014 #include <crypto/internal/poly1305.h>
0015 #include <crypto/internal/simd.h>
0016 #include <linux/cpufeature.h>
0017 #include <linux/crypto.h>
0018 #include <linux/jump_label.h>
0019 #include <linux/module.h>
0020
0021 void poly1305_init_arm(void *state, const u8 *key);
0022 void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
0023 void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
0024 void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce);
0025
0026 void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
0027 {
0028 }
0029
0030 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
0031
0032 void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
0033 {
0034 poly1305_init_arm(&dctx->h, key);
0035 dctx->s[0] = get_unaligned_le32(key + 16);
0036 dctx->s[1] = get_unaligned_le32(key + 20);
0037 dctx->s[2] = get_unaligned_le32(key + 24);
0038 dctx->s[3] = get_unaligned_le32(key + 28);
0039 dctx->buflen = 0;
0040 }
0041 EXPORT_SYMBOL(poly1305_init_arch);
0042
0043 static int arm_poly1305_init(struct shash_desc *desc)
0044 {
0045 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
0046
0047 dctx->buflen = 0;
0048 dctx->rset = 0;
0049 dctx->sset = false;
0050
0051 return 0;
0052 }
0053
0054 static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
0055 u32 len, u32 hibit, bool do_neon)
0056 {
0057 if (unlikely(!dctx->sset)) {
0058 if (!dctx->rset) {
0059 poly1305_init_arm(&dctx->h, src);
0060 src += POLY1305_BLOCK_SIZE;
0061 len -= POLY1305_BLOCK_SIZE;
0062 dctx->rset = 1;
0063 }
0064 if (len >= POLY1305_BLOCK_SIZE) {
0065 dctx->s[0] = get_unaligned_le32(src + 0);
0066 dctx->s[1] = get_unaligned_le32(src + 4);
0067 dctx->s[2] = get_unaligned_le32(src + 8);
0068 dctx->s[3] = get_unaligned_le32(src + 12);
0069 src += POLY1305_BLOCK_SIZE;
0070 len -= POLY1305_BLOCK_SIZE;
0071 dctx->sset = true;
0072 }
0073 if (len < POLY1305_BLOCK_SIZE)
0074 return;
0075 }
0076
0077 len &= ~(POLY1305_BLOCK_SIZE - 1);
0078
0079 if (static_branch_likely(&have_neon) && likely(do_neon))
0080 poly1305_blocks_neon(&dctx->h, src, len, hibit);
0081 else
0082 poly1305_blocks_arm(&dctx->h, src, len, hibit);
0083 }
0084
0085 static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
0086 const u8 *src, u32 len, bool do_neon)
0087 {
0088 if (unlikely(dctx->buflen)) {
0089 u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
0090
0091 memcpy(dctx->buf + dctx->buflen, src, bytes);
0092 src += bytes;
0093 len -= bytes;
0094 dctx->buflen += bytes;
0095
0096 if (dctx->buflen == POLY1305_BLOCK_SIZE) {
0097 arm_poly1305_blocks(dctx, dctx->buf,
0098 POLY1305_BLOCK_SIZE, 1, false);
0099 dctx->buflen = 0;
0100 }
0101 }
0102
0103 if (likely(len >= POLY1305_BLOCK_SIZE)) {
0104 arm_poly1305_blocks(dctx, src, len, 1, do_neon);
0105 src += round_down(len, POLY1305_BLOCK_SIZE);
0106 len %= POLY1305_BLOCK_SIZE;
0107 }
0108
0109 if (unlikely(len)) {
0110 dctx->buflen = len;
0111 memcpy(dctx->buf, src, len);
0112 }
0113 }
0114
0115 static int arm_poly1305_update(struct shash_desc *desc,
0116 const u8 *src, unsigned int srclen)
0117 {
0118 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
0119
0120 arm_poly1305_do_update(dctx, src, srclen, false);
0121 return 0;
0122 }
0123
0124 static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
0125 const u8 *src,
0126 unsigned int srclen)
0127 {
0128 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
0129 bool do_neon = crypto_simd_usable() && srclen > 128;
0130
0131 if (static_branch_likely(&have_neon) && do_neon)
0132 kernel_neon_begin();
0133 arm_poly1305_do_update(dctx, src, srclen, do_neon);
0134 if (static_branch_likely(&have_neon) && do_neon)
0135 kernel_neon_end();
0136 return 0;
0137 }
0138
0139 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
0140 unsigned int nbytes)
0141 {
0142 bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
0143 crypto_simd_usable();
0144
0145 if (unlikely(dctx->buflen)) {
0146 u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
0147
0148 memcpy(dctx->buf + dctx->buflen, src, bytes);
0149 src += bytes;
0150 nbytes -= bytes;
0151 dctx->buflen += bytes;
0152
0153 if (dctx->buflen == POLY1305_BLOCK_SIZE) {
0154 poly1305_blocks_arm(&dctx->h, dctx->buf,
0155 POLY1305_BLOCK_SIZE, 1);
0156 dctx->buflen = 0;
0157 }
0158 }
0159
0160 if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
0161 unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
0162
0163 if (static_branch_likely(&have_neon) && do_neon) {
0164 do {
0165 unsigned int todo = min_t(unsigned int, len, SZ_4K);
0166
0167 kernel_neon_begin();
0168 poly1305_blocks_neon(&dctx->h, src, todo, 1);
0169 kernel_neon_end();
0170
0171 len -= todo;
0172 src += todo;
0173 } while (len);
0174 } else {
0175 poly1305_blocks_arm(&dctx->h, src, len, 1);
0176 src += len;
0177 }
0178 nbytes %= POLY1305_BLOCK_SIZE;
0179 }
0180
0181 if (unlikely(nbytes)) {
0182 dctx->buflen = nbytes;
0183 memcpy(dctx->buf, src, nbytes);
0184 }
0185 }
0186 EXPORT_SYMBOL(poly1305_update_arch);
0187
0188 void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
0189 {
0190 if (unlikely(dctx->buflen)) {
0191 dctx->buf[dctx->buflen++] = 1;
0192 memset(dctx->buf + dctx->buflen, 0,
0193 POLY1305_BLOCK_SIZE - dctx->buflen);
0194 poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
0195 }
0196
0197 poly1305_emit_arm(&dctx->h, dst, dctx->s);
0198 *dctx = (struct poly1305_desc_ctx){};
0199 }
0200 EXPORT_SYMBOL(poly1305_final_arch);
0201
0202 static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
0203 {
0204 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
0205
0206 if (unlikely(!dctx->sset))
0207 return -ENOKEY;
0208
0209 poly1305_final_arch(dctx, dst);
0210 return 0;
0211 }
0212
0213 static struct shash_alg arm_poly1305_algs[] = {{
0214 .init = arm_poly1305_init,
0215 .update = arm_poly1305_update,
0216 .final = arm_poly1305_final,
0217 .digestsize = POLY1305_DIGEST_SIZE,
0218 .descsize = sizeof(struct poly1305_desc_ctx),
0219
0220 .base.cra_name = "poly1305",
0221 .base.cra_driver_name = "poly1305-arm",
0222 .base.cra_priority = 150,
0223 .base.cra_blocksize = POLY1305_BLOCK_SIZE,
0224 .base.cra_module = THIS_MODULE,
0225 #ifdef CONFIG_KERNEL_MODE_NEON
0226 }, {
0227 .init = arm_poly1305_init,
0228 .update = arm_poly1305_update_neon,
0229 .final = arm_poly1305_final,
0230 .digestsize = POLY1305_DIGEST_SIZE,
0231 .descsize = sizeof(struct poly1305_desc_ctx),
0232
0233 .base.cra_name = "poly1305",
0234 .base.cra_driver_name = "poly1305-neon",
0235 .base.cra_priority = 200,
0236 .base.cra_blocksize = POLY1305_BLOCK_SIZE,
0237 .base.cra_module = THIS_MODULE,
0238 #endif
0239 }};
0240
0241 static int __init arm_poly1305_mod_init(void)
0242 {
0243 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
0244 (elf_hwcap & HWCAP_NEON))
0245 static_branch_enable(&have_neon);
0246 else if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
0247
0248 return crypto_register_shash(&arm_poly1305_algs[0]);
0249
0250 return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
0251 crypto_register_shashes(arm_poly1305_algs,
0252 ARRAY_SIZE(arm_poly1305_algs)) : 0;
0253 }
0254
0255 static void __exit arm_poly1305_mod_exit(void)
0256 {
0257 if (!IS_REACHABLE(CONFIG_CRYPTO_HASH))
0258 return;
0259 if (!static_branch_likely(&have_neon)) {
0260 crypto_unregister_shash(&arm_poly1305_algs[0]);
0261 return;
0262 }
0263 crypto_unregister_shashes(arm_poly1305_algs,
0264 ARRAY_SIZE(arm_poly1305_algs));
0265 }
0266
0267 module_init(arm_poly1305_mod_init);
0268 module_exit(arm_poly1305_mod_exit);
0269
0270 MODULE_LICENSE("GPL v2");
0271 MODULE_ALIAS_CRYPTO("poly1305");
0272 MODULE_ALIAS_CRYPTO("poly1305-arm");
0273 MODULE_ALIAS_CRYPTO("poly1305-neon");