Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * VMAC: Message Authentication Code using Universal Hashing
0003  *
0004  * Reference: https://tools.ietf.org/html/draft-krovetz-vmac-01
0005  *
0006  * Copyright (c) 2009, Intel Corporation.
0007  * Copyright (c) 2018, Google Inc.
0008  *
0009  * This program is free software; you can redistribute it and/or modify it
0010  * under the terms and conditions of the GNU General Public License,
0011  * version 2, as published by the Free Software Foundation.
0012  *
0013  * This program is distributed in the hope it will be useful, but WITHOUT
0014  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0015  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
0016  * more details.
0017  *
0018  * You should have received a copy of the GNU General Public License along with
0019  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
0020  * Place - Suite 330, Boston, MA 02111-1307 USA.
0021  */
0022 
0023 /*
0024  * Derived from:
0025  *  VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai.
0026  *  This implementation is herby placed in the public domain.
0027  *  The authors offers no warranty. Use at your own risk.
0028  *  Last modified: 17 APR 08, 1700 PDT
0029  */
0030 
0031 #include <asm/unaligned.h>
0032 #include <linux/init.h>
0033 #include <linux/types.h>
0034 #include <linux/crypto.h>
0035 #include <linux/module.h>
0036 #include <linux/scatterlist.h>
0037 #include <asm/byteorder.h>
0038 #include <crypto/scatterwalk.h>
0039 #include <crypto/internal/cipher.h>
0040 #include <crypto/internal/hash.h>
0041 
0042 /*
0043  * User definable settings.
0044  */
0045 #define VMAC_TAG_LEN    64
0046 #define VMAC_KEY_SIZE   128/* Must be 128, 192 or 256           */
0047 #define VMAC_KEY_LEN    (VMAC_KEY_SIZE/8)
0048 #define VMAC_NHBYTES    128/* Must 2^i for any 3 < i < 13 Standard = 128*/
0049 #define VMAC_NONCEBYTES 16
0050 
0051 /* per-transform (per-key) context */
0052 struct vmac_tfm_ctx {
0053     struct crypto_cipher *cipher;
0054     u64 nhkey[(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)];
0055     u64 polykey[2*VMAC_TAG_LEN/64];
0056     u64 l3key[2*VMAC_TAG_LEN/64];
0057 };
0058 
0059 /* per-request context */
0060 struct vmac_desc_ctx {
0061     union {
0062         u8 partial[VMAC_NHBYTES];   /* partial block */
0063         __le64 partial_words[VMAC_NHBYTES / 8];
0064     };
0065     unsigned int partial_size;  /* size of the partial block */
0066     bool first_block_processed;
0067     u64 polytmp[2*VMAC_TAG_LEN/64]; /* running total of L2-hash */
0068     union {
0069         u8 bytes[VMAC_NONCEBYTES];
0070         __be64 pads[VMAC_NONCEBYTES / 8];
0071     } nonce;
0072     unsigned int nonce_size; /* nonce bytes filled so far */
0073 };
0074 
0075 /*
0076  * Constants and masks
0077  */
0078 #define UINT64_C(x) x##ULL
0079 static const u64 p64   = UINT64_C(0xfffffffffffffeff);  /* 2^64 - 257 prime  */
0080 static const u64 m62   = UINT64_C(0x3fffffffffffffff);  /* 62-bit mask       */
0081 static const u64 m63   = UINT64_C(0x7fffffffffffffff);  /* 63-bit mask       */
0082 static const u64 m64   = UINT64_C(0xffffffffffffffff);  /* 64-bit mask       */
0083 static const u64 mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
0084 
0085 #define pe64_to_cpup le64_to_cpup       /* Prefer little endian */
0086 
0087 #ifdef __LITTLE_ENDIAN
0088 #define INDEX_HIGH 1
0089 #define INDEX_LOW 0
0090 #else
0091 #define INDEX_HIGH 0
0092 #define INDEX_LOW 1
0093 #endif
0094 
0095 /*
0096  * The following routines are used in this implementation. They are
0097  * written via macros to simulate zero-overhead call-by-reference.
0098  *
0099  * MUL64: 64x64->128-bit multiplication
0100  * PMUL64: assumes top bits cleared on inputs
0101  * ADD128: 128x128->128-bit addition
0102  */
0103 
0104 #define ADD128(rh, rl, ih, il)                      \
0105     do {                                \
0106         u64 _il = (il);                     \
0107         (rl) += (_il);                      \
0108         if ((rl) < (_il))                   \
0109             (rh)++;                     \
0110         (rh) += (ih);                       \
0111     } while (0)
0112 
0113 #define MUL32(i1, i2)   ((u64)(u32)(i1)*(u32)(i2))
0114 
0115 #define PMUL64(rh, rl, i1, i2)  /* Assumes m doesn't overflow */    \
0116     do {                                \
0117         u64 _i1 = (i1), _i2 = (i2);             \
0118         u64 m = MUL32(_i1, _i2>>32) + MUL32(_i1>>32, _i2);  \
0119         rh = MUL32(_i1>>32, _i2>>32);               \
0120         rl = MUL32(_i1, _i2);                   \
0121         ADD128(rh, rl, (m >> 32), (m << 32));           \
0122     } while (0)
0123 
0124 #define MUL64(rh, rl, i1, i2)                       \
0125     do {                                \
0126         u64 _i1 = (i1), _i2 = (i2);             \
0127         u64 m1 = MUL32(_i1, _i2>>32);               \
0128         u64 m2 = MUL32(_i1>>32, _i2);               \
0129         rh = MUL32(_i1>>32, _i2>>32);               \
0130         rl = MUL32(_i1, _i2);                   \
0131         ADD128(rh, rl, (m1 >> 32), (m1 << 32));         \
0132         ADD128(rh, rl, (m2 >> 32), (m2 << 32));         \
0133     } while (0)
0134 
0135 /*
0136  * For highest performance the L1 NH and L2 polynomial hashes should be
0137  * carefully implemented to take advantage of one's target architecture.
0138  * Here these two hash functions are defined multiple time; once for
0139  * 64-bit architectures, once for 32-bit SSE2 architectures, and once
0140  * for the rest (32-bit) architectures.
0141  * For each, nh_16 *must* be defined (works on multiples of 16 bytes).
0142  * Optionally, nh_vmac_nhbytes can be defined (for multiples of
0143  * VMAC_NHBYTES), and nh_16_2 and nh_vmac_nhbytes_2 (versions that do two
0144  * NH computations at once).
0145  */
0146 
0147 #ifdef CONFIG_64BIT
0148 
0149 #define nh_16(mp, kp, nw, rh, rl)                   \
0150     do {                                \
0151         int i; u64 th, tl;                  \
0152         rh = rl = 0;                        \
0153         for (i = 0; i < nw; i += 2) {               \
0154             MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i], \
0155                 pe64_to_cpup((mp)+i+1)+(kp)[i+1]);  \
0156             ADD128(rh, rl, th, tl);             \
0157         }                           \
0158     } while (0)
0159 
0160 #define nh_16_2(mp, kp, nw, rh, rl, rh1, rl1)               \
0161     do {                                \
0162         int i; u64 th, tl;                  \
0163         rh1 = rl1 = rh = rl = 0;                \
0164         for (i = 0; i < nw; i += 2) {               \
0165             MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i], \
0166                 pe64_to_cpup((mp)+i+1)+(kp)[i+1]);  \
0167             ADD128(rh, rl, th, tl);             \
0168             MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i+2],   \
0169                 pe64_to_cpup((mp)+i+1)+(kp)[i+3]);  \
0170             ADD128(rh1, rl1, th, tl);           \
0171         }                           \
0172     } while (0)
0173 
0174 #if (VMAC_NHBYTES >= 64) /* These versions do 64-bytes of message at a time */
0175 #define nh_vmac_nhbytes(mp, kp, nw, rh, rl)             \
0176     do {                                \
0177         int i; u64 th, tl;                  \
0178         rh = rl = 0;                        \
0179         for (i = 0; i < nw; i += 8) {               \
0180             MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i], \
0181                 pe64_to_cpup((mp)+i+1)+(kp)[i+1]);  \
0182             ADD128(rh, rl, th, tl);             \
0183             MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+2], \
0184                 pe64_to_cpup((mp)+i+3)+(kp)[i+3]);  \
0185             ADD128(rh, rl, th, tl);             \
0186             MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+4], \
0187                 pe64_to_cpup((mp)+i+5)+(kp)[i+5]);  \
0188             ADD128(rh, rl, th, tl);             \
0189             MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+6], \
0190                 pe64_to_cpup((mp)+i+7)+(kp)[i+7]);  \
0191             ADD128(rh, rl, th, tl);             \
0192         }                           \
0193     } while (0)
0194 
0195 #define nh_vmac_nhbytes_2(mp, kp, nw, rh, rl, rh1, rl1)         \
0196     do {                                \
0197         int i; u64 th, tl;                  \
0198         rh1 = rl1 = rh = rl = 0;                \
0199         for (i = 0; i < nw; i += 8) {               \
0200             MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i], \
0201                 pe64_to_cpup((mp)+i+1)+(kp)[i+1]);  \
0202             ADD128(rh, rl, th, tl);             \
0203             MUL64(th, tl, pe64_to_cpup((mp)+i)+(kp)[i+2],   \
0204                 pe64_to_cpup((mp)+i+1)+(kp)[i+3]);  \
0205             ADD128(rh1, rl1, th, tl);           \
0206             MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+2], \
0207                 pe64_to_cpup((mp)+i+3)+(kp)[i+3]);  \
0208             ADD128(rh, rl, th, tl);             \
0209             MUL64(th, tl, pe64_to_cpup((mp)+i+2)+(kp)[i+4], \
0210                 pe64_to_cpup((mp)+i+3)+(kp)[i+5]);  \
0211             ADD128(rh1, rl1, th, tl);           \
0212             MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+4], \
0213                 pe64_to_cpup((mp)+i+5)+(kp)[i+5]);  \
0214             ADD128(rh, rl, th, tl);             \
0215             MUL64(th, tl, pe64_to_cpup((mp)+i+4)+(kp)[i+6], \
0216                 pe64_to_cpup((mp)+i+5)+(kp)[i+7]);  \
0217             ADD128(rh1, rl1, th, tl);           \
0218             MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+6], \
0219                 pe64_to_cpup((mp)+i+7)+(kp)[i+7]);  \
0220             ADD128(rh, rl, th, tl);             \
0221             MUL64(th, tl, pe64_to_cpup((mp)+i+6)+(kp)[i+8], \
0222                 pe64_to_cpup((mp)+i+7)+(kp)[i+9]);  \
0223             ADD128(rh1, rl1, th, tl);           \
0224         }                           \
0225     } while (0)
0226 #endif
0227 
0228 #define poly_step(ah, al, kh, kl, mh, ml)               \
0229     do {                                \
0230         u64 t1h, t1l, t2h, t2l, t3h, t3l, z = 0;        \
0231         /* compute ab*cd, put bd into result registers */   \
0232         PMUL64(t3h, t3l, al, kh);               \
0233         PMUL64(t2h, t2l, ah, kl);               \
0234         PMUL64(t1h, t1l, ah, 2*kh);             \
0235         PMUL64(ah, al, al, kl);                 \
0236         /* add 2 * ac to result */              \
0237         ADD128(ah, al, t1h, t1l);               \
0238         /* add together ad + bc */              \
0239         ADD128(t2h, t2l, t3h, t3l);             \
0240         /* now (ah,al), (t2l,2*t2h) need summing */     \
0241         /* first add the high registers, carrying into t2h */   \
0242         ADD128(t2h, ah, z, t2l);                \
0243         /* double t2h and add top bit of ah */          \
0244         t2h = 2 * t2h + (ah >> 63);             \
0245         ah &= m63;                      \
0246         /* now add the low registers */             \
0247         ADD128(ah, al, mh, ml);                 \
0248         ADD128(ah, al, z, t2h);                 \
0249     } while (0)
0250 
0251 #else /* ! CONFIG_64BIT */
0252 
0253 #ifndef nh_16
0254 #define nh_16(mp, kp, nw, rh, rl)                   \
0255     do {                                \
0256         u64 t1, t2, m1, m2, t;                  \
0257         int i;                          \
0258         rh = rl = t = 0;                    \
0259         for (i = 0; i < nw; i += 2)  {              \
0260             t1 = pe64_to_cpup(mp+i) + kp[i];        \
0261             t2 = pe64_to_cpup(mp+i+1) + kp[i+1];        \
0262             m2 = MUL32(t1 >> 32, t2);           \
0263             m1 = MUL32(t1, t2 >> 32);           \
0264             ADD128(rh, rl, MUL32(t1 >> 32, t2 >> 32),   \
0265                 MUL32(t1, t2));             \
0266             rh += (u64)(u32)(m1 >> 32)          \
0267                 + (u32)(m2 >> 32);          \
0268             t += (u64)(u32)m1 + (u32)m2;            \
0269         }                           \
0270         ADD128(rh, rl, (t >> 32), (t << 32));           \
0271     } while (0)
0272 #endif
0273 
0274 static void poly_step_func(u64 *ahi, u64 *alo,
0275             const u64 *kh, const u64 *kl,
0276             const u64 *mh, const u64 *ml)
0277 {
0278 #define a0 (*(((u32 *)alo)+INDEX_LOW))
0279 #define a1 (*(((u32 *)alo)+INDEX_HIGH))
0280 #define a2 (*(((u32 *)ahi)+INDEX_LOW))
0281 #define a3 (*(((u32 *)ahi)+INDEX_HIGH))
0282 #define k0 (*(((u32 *)kl)+INDEX_LOW))
0283 #define k1 (*(((u32 *)kl)+INDEX_HIGH))
0284 #define k2 (*(((u32 *)kh)+INDEX_LOW))
0285 #define k3 (*(((u32 *)kh)+INDEX_HIGH))
0286 
0287     u64 p, q, t;
0288     u32 t2;
0289 
0290     p = MUL32(a3, k3);
0291     p += p;
0292     p += *(u64 *)mh;
0293     p += MUL32(a0, k2);
0294     p += MUL32(a1, k1);
0295     p += MUL32(a2, k0);
0296     t = (u32)(p);
0297     p >>= 32;
0298     p += MUL32(a0, k3);
0299     p += MUL32(a1, k2);
0300     p += MUL32(a2, k1);
0301     p += MUL32(a3, k0);
0302     t |= ((u64)((u32)p & 0x7fffffff)) << 32;
0303     p >>= 31;
0304     p += (u64)(((u32 *)ml)[INDEX_LOW]);
0305     p += MUL32(a0, k0);
0306     q =  MUL32(a1, k3);
0307     q += MUL32(a2, k2);
0308     q += MUL32(a3, k1);
0309     q += q;
0310     p += q;
0311     t2 = (u32)(p);
0312     p >>= 32;
0313     p += (u64)(((u32 *)ml)[INDEX_HIGH]);
0314     p += MUL32(a0, k1);
0315     p += MUL32(a1, k0);
0316     q =  MUL32(a2, k3);
0317     q += MUL32(a3, k2);
0318     q += q;
0319     p += q;
0320     *(u64 *)(alo) = (p << 32) | t2;
0321     p >>= 32;
0322     *(u64 *)(ahi) = p + t;
0323 
0324 #undef a0
0325 #undef a1
0326 #undef a2
0327 #undef a3
0328 #undef k0
0329 #undef k1
0330 #undef k2
0331 #undef k3
0332 }
0333 
0334 #define poly_step(ah, al, kh, kl, mh, ml)               \
0335     poly_step_func(&(ah), &(al), &(kh), &(kl), &(mh), &(ml))
0336 
0337 #endif  /* end of specialized NH and poly definitions */
0338 
0339 /* At least nh_16 is defined. Defined others as needed here */
0340 #ifndef nh_16_2
0341 #define nh_16_2(mp, kp, nw, rh, rl, rh2, rl2)               \
0342     do {                                \
0343         nh_16(mp, kp, nw, rh, rl);              \
0344         nh_16(mp, ((kp)+2), nw, rh2, rl2);          \
0345     } while (0)
0346 #endif
0347 #ifndef nh_vmac_nhbytes
0348 #define nh_vmac_nhbytes(mp, kp, nw, rh, rl)             \
0349     nh_16(mp, kp, nw, rh, rl)
0350 #endif
0351 #ifndef nh_vmac_nhbytes_2
0352 #define nh_vmac_nhbytes_2(mp, kp, nw, rh, rl, rh2, rl2)         \
0353     do {                                \
0354         nh_vmac_nhbytes(mp, kp, nw, rh, rl);            \
0355         nh_vmac_nhbytes(mp, ((kp)+2), nw, rh2, rl2);        \
0356     } while (0)
0357 #endif
0358 
0359 static u64 l3hash(u64 p1, u64 p2, u64 k1, u64 k2, u64 len)
0360 {
0361     u64 rh, rl, t, z = 0;
0362 
0363     /* fully reduce (p1,p2)+(len,0) mod p127 */
0364     t = p1 >> 63;
0365     p1 &= m63;
0366     ADD128(p1, p2, len, t);
0367     /* At this point, (p1,p2) is at most 2^127+(len<<64) */
0368     t = (p1 > m63) + ((p1 == m63) && (p2 == m64));
0369     ADD128(p1, p2, z, t);
0370     p1 &= m63;
0371 
0372     /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
0373     t = p1 + (p2 >> 32);
0374     t += (t >> 32);
0375     t += (u32)t > 0xfffffffeu;
0376     p1 += (t >> 32);
0377     p2 += (p1 << 32);
0378 
0379     /* compute (p1+k1)%p64 and (p2+k2)%p64 */
0380     p1 += k1;
0381     p1 += (0 - (p1 < k1)) & 257;
0382     p2 += k2;
0383     p2 += (0 - (p2 < k2)) & 257;
0384 
0385     /* compute (p1+k1)*(p2+k2)%p64 */
0386     MUL64(rh, rl, p1, p2);
0387     t = rh >> 56;
0388     ADD128(t, rl, z, rh);
0389     rh <<= 8;
0390     ADD128(t, rl, z, rh);
0391     t += t << 8;
0392     rl += t;
0393     rl += (0 - (rl < t)) & 257;
0394     rl += (0 - (rl > p64-1)) & 257;
0395     return rl;
0396 }
0397 
0398 /* L1 and L2-hash one or more VMAC_NHBYTES-byte blocks */
0399 static void vhash_blocks(const struct vmac_tfm_ctx *tctx,
0400              struct vmac_desc_ctx *dctx,
0401              const __le64 *mptr, unsigned int blocks)
0402 {
0403     const u64 *kptr = tctx->nhkey;
0404     const u64 pkh = tctx->polykey[0];
0405     const u64 pkl = tctx->polykey[1];
0406     u64 ch = dctx->polytmp[0];
0407     u64 cl = dctx->polytmp[1];
0408     u64 rh, rl;
0409 
0410     if (!dctx->first_block_processed) {
0411         dctx->first_block_processed = true;
0412         nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl);
0413         rh &= m62;
0414         ADD128(ch, cl, rh, rl);
0415         mptr += (VMAC_NHBYTES/sizeof(u64));
0416         blocks--;
0417     }
0418 
0419     while (blocks--) {
0420         nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl);
0421         rh &= m62;
0422         poly_step(ch, cl, pkh, pkl, rh, rl);
0423         mptr += (VMAC_NHBYTES/sizeof(u64));
0424     }
0425 
0426     dctx->polytmp[0] = ch;
0427     dctx->polytmp[1] = cl;
0428 }
0429 
0430 static int vmac_setkey(struct crypto_shash *tfm,
0431                const u8 *key, unsigned int keylen)
0432 {
0433     struct vmac_tfm_ctx *tctx = crypto_shash_ctx(tfm);
0434     __be64 out[2];
0435     u8 in[16] = { 0 };
0436     unsigned int i;
0437     int err;
0438 
0439     if (keylen != VMAC_KEY_LEN)
0440         return -EINVAL;
0441 
0442     err = crypto_cipher_setkey(tctx->cipher, key, keylen);
0443     if (err)
0444         return err;
0445 
0446     /* Fill nh key */
0447     in[0] = 0x80;
0448     for (i = 0; i < ARRAY_SIZE(tctx->nhkey); i += 2) {
0449         crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in);
0450         tctx->nhkey[i] = be64_to_cpu(out[0]);
0451         tctx->nhkey[i+1] = be64_to_cpu(out[1]);
0452         in[15]++;
0453     }
0454 
0455     /* Fill poly key */
0456     in[0] = 0xC0;
0457     in[15] = 0;
0458     for (i = 0; i < ARRAY_SIZE(tctx->polykey); i += 2) {
0459         crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in);
0460         tctx->polykey[i] = be64_to_cpu(out[0]) & mpoly;
0461         tctx->polykey[i+1] = be64_to_cpu(out[1]) & mpoly;
0462         in[15]++;
0463     }
0464 
0465     /* Fill ip key */
0466     in[0] = 0xE0;
0467     in[15] = 0;
0468     for (i = 0; i < ARRAY_SIZE(tctx->l3key); i += 2) {
0469         do {
0470             crypto_cipher_encrypt_one(tctx->cipher, (u8 *)out, in);
0471             tctx->l3key[i] = be64_to_cpu(out[0]);
0472             tctx->l3key[i+1] = be64_to_cpu(out[1]);
0473             in[15]++;
0474         } while (tctx->l3key[i] >= p64 || tctx->l3key[i+1] >= p64);
0475     }
0476 
0477     return 0;
0478 }
0479 
0480 static int vmac_init(struct shash_desc *desc)
0481 {
0482     const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
0483     struct vmac_desc_ctx *dctx = shash_desc_ctx(desc);
0484 
0485     dctx->partial_size = 0;
0486     dctx->first_block_processed = false;
0487     memcpy(dctx->polytmp, tctx->polykey, sizeof(dctx->polytmp));
0488     dctx->nonce_size = 0;
0489     return 0;
0490 }
0491 
0492 static int vmac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
0493 {
0494     const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
0495     struct vmac_desc_ctx *dctx = shash_desc_ctx(desc);
0496     unsigned int n;
0497 
0498     /* Nonce is passed as first VMAC_NONCEBYTES bytes of data */
0499     if (dctx->nonce_size < VMAC_NONCEBYTES) {
0500         n = min(len, VMAC_NONCEBYTES - dctx->nonce_size);
0501         memcpy(&dctx->nonce.bytes[dctx->nonce_size], p, n);
0502         dctx->nonce_size += n;
0503         p += n;
0504         len -= n;
0505     }
0506 
0507     if (dctx->partial_size) {
0508         n = min(len, VMAC_NHBYTES - dctx->partial_size);
0509         memcpy(&dctx->partial[dctx->partial_size], p, n);
0510         dctx->partial_size += n;
0511         p += n;
0512         len -= n;
0513         if (dctx->partial_size == VMAC_NHBYTES) {
0514             vhash_blocks(tctx, dctx, dctx->partial_words, 1);
0515             dctx->partial_size = 0;
0516         }
0517     }
0518 
0519     if (len >= VMAC_NHBYTES) {
0520         n = round_down(len, VMAC_NHBYTES);
0521         /* TODO: 'p' may be misaligned here */
0522         vhash_blocks(tctx, dctx, (const __le64 *)p, n / VMAC_NHBYTES);
0523         p += n;
0524         len -= n;
0525     }
0526 
0527     if (len) {
0528         memcpy(dctx->partial, p, len);
0529         dctx->partial_size = len;
0530     }
0531 
0532     return 0;
0533 }
0534 
0535 static u64 vhash_final(const struct vmac_tfm_ctx *tctx,
0536                struct vmac_desc_ctx *dctx)
0537 {
0538     unsigned int partial = dctx->partial_size;
0539     u64 ch = dctx->polytmp[0];
0540     u64 cl = dctx->polytmp[1];
0541 
0542     /* L1 and L2-hash the final block if needed */
0543     if (partial) {
0544         /* Zero-pad to next 128-bit boundary */
0545         unsigned int n = round_up(partial, 16);
0546         u64 rh, rl;
0547 
0548         memset(&dctx->partial[partial], 0, n - partial);
0549         nh_16(dctx->partial_words, tctx->nhkey, n / 8, rh, rl);
0550         rh &= m62;
0551         if (dctx->first_block_processed)
0552             poly_step(ch, cl, tctx->polykey[0], tctx->polykey[1],
0553                   rh, rl);
0554         else
0555             ADD128(ch, cl, rh, rl);
0556     }
0557 
0558     /* L3-hash the 128-bit output of L2-hash */
0559     return l3hash(ch, cl, tctx->l3key[0], tctx->l3key[1], partial * 8);
0560 }
0561 
0562 static int vmac_final(struct shash_desc *desc, u8 *out)
0563 {
0564     const struct vmac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
0565     struct vmac_desc_ctx *dctx = shash_desc_ctx(desc);
0566     int index;
0567     u64 hash, pad;
0568 
0569     if (dctx->nonce_size != VMAC_NONCEBYTES)
0570         return -EINVAL;
0571 
0572     /*
0573      * The VMAC specification requires a nonce at least 1 bit shorter than
0574      * the block cipher's block length, so we actually only accept a 127-bit
0575      * nonce.  We define the unused bit to be the first one and require that
0576      * it be 0, so the needed prepending of a 0 bit is implicit.
0577      */
0578     if (dctx->nonce.bytes[0] & 0x80)
0579         return -EINVAL;
0580 
0581     /* Finish calculating the VHASH of the message */
0582     hash = vhash_final(tctx, dctx);
0583 
0584     /* Generate pseudorandom pad by encrypting the nonce */
0585     BUILD_BUG_ON(VMAC_NONCEBYTES != 2 * (VMAC_TAG_LEN / 8));
0586     index = dctx->nonce.bytes[VMAC_NONCEBYTES - 1] & 1;
0587     dctx->nonce.bytes[VMAC_NONCEBYTES - 1] &= ~1;
0588     crypto_cipher_encrypt_one(tctx->cipher, dctx->nonce.bytes,
0589                   dctx->nonce.bytes);
0590     pad = be64_to_cpu(dctx->nonce.pads[index]);
0591 
0592     /* The VMAC is the sum of VHASH and the pseudorandom pad */
0593     put_unaligned_be64(hash + pad, out);
0594     return 0;
0595 }
0596 
0597 static int vmac_init_tfm(struct crypto_tfm *tfm)
0598 {
0599     struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
0600     struct crypto_cipher_spawn *spawn = crypto_instance_ctx(inst);
0601     struct vmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
0602     struct crypto_cipher *cipher;
0603 
0604     cipher = crypto_spawn_cipher(spawn);
0605     if (IS_ERR(cipher))
0606         return PTR_ERR(cipher);
0607 
0608     tctx->cipher = cipher;
0609     return 0;
0610 }
0611 
0612 static void vmac_exit_tfm(struct crypto_tfm *tfm)
0613 {
0614     struct vmac_tfm_ctx *tctx = crypto_tfm_ctx(tfm);
0615 
0616     crypto_free_cipher(tctx->cipher);
0617 }
0618 
0619 static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb)
0620 {
0621     struct shash_instance *inst;
0622     struct crypto_cipher_spawn *spawn;
0623     struct crypto_alg *alg;
0624     u32 mask;
0625     int err;
0626 
0627     err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH, &mask);
0628     if (err)
0629         return err;
0630 
0631     inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
0632     if (!inst)
0633         return -ENOMEM;
0634     spawn = shash_instance_ctx(inst);
0635 
0636     err = crypto_grab_cipher(spawn, shash_crypto_instance(inst),
0637                  crypto_attr_alg_name(tb[1]), 0, mask);
0638     if (err)
0639         goto err_free_inst;
0640     alg = crypto_spawn_cipher_alg(spawn);
0641 
0642     err = -EINVAL;
0643     if (alg->cra_blocksize != VMAC_NONCEBYTES)
0644         goto err_free_inst;
0645 
0646     err = crypto_inst_setname(shash_crypto_instance(inst), tmpl->name, alg);
0647     if (err)
0648         goto err_free_inst;
0649 
0650     inst->alg.base.cra_priority = alg->cra_priority;
0651     inst->alg.base.cra_blocksize = alg->cra_blocksize;
0652     inst->alg.base.cra_alignmask = alg->cra_alignmask;
0653 
0654     inst->alg.base.cra_ctxsize = sizeof(struct vmac_tfm_ctx);
0655     inst->alg.base.cra_init = vmac_init_tfm;
0656     inst->alg.base.cra_exit = vmac_exit_tfm;
0657 
0658     inst->alg.descsize = sizeof(struct vmac_desc_ctx);
0659     inst->alg.digestsize = VMAC_TAG_LEN / 8;
0660     inst->alg.init = vmac_init;
0661     inst->alg.update = vmac_update;
0662     inst->alg.final = vmac_final;
0663     inst->alg.setkey = vmac_setkey;
0664 
0665     inst->free = shash_free_singlespawn_instance;
0666 
0667     err = shash_register_instance(tmpl, inst);
0668     if (err) {
0669 err_free_inst:
0670         shash_free_singlespawn_instance(inst);
0671     }
0672     return err;
0673 }
0674 
0675 static struct crypto_template vmac64_tmpl = {
0676     .name = "vmac64",
0677     .create = vmac_create,
0678     .module = THIS_MODULE,
0679 };
0680 
0681 static int __init vmac_module_init(void)
0682 {
0683     return crypto_register_template(&vmac64_tmpl);
0684 }
0685 
0686 static void __exit vmac_module_exit(void)
0687 {
0688     crypto_unregister_template(&vmac64_tmpl);
0689 }
0690 
0691 subsys_initcall(vmac_module_init);
0692 module_exit(vmac_module_exit);
0693 
0694 MODULE_LICENSE("GPL");
0695 MODULE_DESCRIPTION("VMAC hash algorithm");
0696 MODULE_ALIAS_CRYPTO("vmac64");
0697 MODULE_IMPORT_NS(CRYPTO_INTERNAL);