the-tree/crypto/gf128mul.c

0001 /* gf128mul.c - GF(2^128) multiplication functions
0002  *
0003  * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
0004  * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
0005  *
0006  * Based on Dr Brian Gladman's (GPL'd) work published at
0007  * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
0008  * See the original copyright notice below.
0009  *
0010  * This program is free software; you can redistribute it and/or modify it
0011  * under the terms of the GNU General Public License as published by the Free
0012  * Software Foundation; either version 2 of the License, or (at your option)
0013  * any later version.
0014  */
0015
0016 /*
0017  ---------------------------------------------------------------------------
0018  Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
0019
0020  LICENSE TERMS
0021
0022  The free distribution and use of this software in both source and binary
0023  form is allowed (with or without changes) provided that:
0024
0025    1. distributions of this source code include the above copyright
0026       notice, this list of conditions and the following disclaimer;
0027
0028    2. distributions in binary form include the above copyright
0029       notice, this list of conditions and the following disclaimer
0030       in the documentation and/or other associated materials;
0031
0032    3. the copyright holder's name is not used to endorse products
0033       built using this software without specific written permission.
0034
0035  ALTERNATIVELY, provided that this notice is retained in full, this product
0036  may be distributed under the terms of the GNU General Public License (GPL),
0037  in which case the provisions of the GPL apply INSTEAD OF those given above.
0038
0039  DISCLAIMER
0040
0041  This software is provided 'as is' with no explicit or implied warranties
0042  in respect of its properties, including, but not limited to, correctness
0043  and/or fitness for purpose.
0044  ---------------------------------------------------------------------------
0045  Issue 31/01/2006
0046
0047  This file provides fast multiplication in GF(2^128) as required by several
0048  cryptographic authentication modes
0049 */
0050
0051 #include <crypto/gf128mul.h>
0052 #include <linux/kernel.h>
0053 #include <linux/module.h>
0054 #include <linux/slab.h>
0055
0056 #define gf128mul_dat(q) { \
0057     q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
0058     q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
0059     q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
0060     q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
0061     q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
0062     q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
0063     q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
0064     q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
0065     q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
0066     q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
0067     q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
0068     q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
0069     q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
0070     q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
0071     q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
0072     q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
0073     q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
0074     q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
0075     q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
0076     q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
0077     q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
0078     q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
0079     q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
0080     q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
0081     q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
0082     q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
0083     q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
0084     q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
0085     q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
0086     q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
0087     q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
0088     q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
0089 }
0090
0091 /*
0092  * Given a value i in 0..255 as the byte overflow when a field element
0093  * in GF(2^128) is multiplied by x^8, the following macro returns the
0094  * 16-bit value that must be XOR-ed into the low-degree end of the
0095  * product to reduce it modulo the polynomial x^128 + x^7 + x^2 + x + 1.
0096  *
0097  * There are two versions of the macro, and hence two tables: one for
0098  * the "be" convention where the highest-order bit is the coefficient of
0099  * the highest-degree polynomial term, and one for the "le" convention
0100  * where the highest-order bit is the coefficient of the lowest-degree
0101  * polynomial term.  In both cases the values are stored in CPU byte
0102  * endianness such that the coefficients are ordered consistently across
0103  * bytes, i.e. in the "be" table bits 15..0 of the stored value
0104  * correspond to the coefficients of x^15..x^0, and in the "le" table
0105  * bits 15..0 correspond to the coefficients of x^0..x^15.
0106  *
0107  * Therefore, provided that the appropriate byte endianness conversions
0108  * are done by the multiplication functions (and these must be in place
0109  * anyway to support both little endian and big endian CPUs), the "be"
0110  * table can be used for multiplications of both "bbe" and "ble"
0111  * elements, and the "le" table can be used for multiplications of both
0112  * "lle" and "lbe" elements.
0113  */
0114
0115 #define xda_be(i) ( \
0116     (i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
0117     (i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
0118     (i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
0119     (i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
0120 )
0121
0122 #define xda_le(i) ( \
0123     (i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
0124     (i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
0125     (i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
0126     (i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
0127 )
0128
0129 static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
0130 static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
0131
0132 /*
0133  * The following functions multiply a field element by x^8 in
0134  * the polynomial field representation.  They use 64-bit word operations
0135  * to gain speed but compensate for machine endianness and hence work
0136  * correctly on both styles of machine.
0137  */
0138
0139 static void gf128mul_x8_lle(be128 *x)
0140 {
0141     u64 a = be64_to_cpu(x->a);
0142     u64 b = be64_to_cpu(x->b);
0143     u64 _tt = gf128mul_table_le[b & 0xff];
0144
0145     x->b = cpu_to_be64((b >> 8) | (a << 56));
0146     x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
0147 }
0148
0149 static void gf128mul_x8_bbe(be128 *x)
0150 {
0151     u64 a = be64_to_cpu(x->a);
0152     u64 b = be64_to_cpu(x->b);
0153     u64 _tt = gf128mul_table_be[a >> 56];
0154
0155     x->a = cpu_to_be64((a << 8) | (b >> 56));
0156     x->b = cpu_to_be64((b << 8) ^ _tt);
0157 }
0158
0159 void gf128mul_x8_ble(le128 *r, const le128 *x)
0160 {
0161     u64 a = le64_to_cpu(x->a);
0162     u64 b = le64_to_cpu(x->b);
0163     u64 _tt = gf128mul_table_be[a >> 56];
0164
0165     r->a = cpu_to_le64((a << 8) | (b >> 56));
0166     r->b = cpu_to_le64((b << 8) ^ _tt);
0167 }
0168 EXPORT_SYMBOL(gf128mul_x8_ble);
0169
0170 void gf128mul_lle(be128 *r, const be128 *b)
0171 {
0172     be128 p[8];
0173     int i;
0174
0175     p[0] = *r;
0176     for (i = 0; i < 7; ++i)
0177         gf128mul_x_lle(&p[i + 1], &p[i]);
0178
0179     memset(r, 0, sizeof(*r));
0180     for (i = 0;;) {
0181         u8 ch = ((u8 *)b)[15 - i];
0182
0183         if (ch & 0x80)
0184             be128_xor(r, r, &p[0]);
0185         if (ch & 0x40)
0186             be128_xor(r, r, &p[1]);
0187         if (ch & 0x20)
0188             be128_xor(r, r, &p[2]);
0189         if (ch & 0x10)
0190             be128_xor(r, r, &p[3]);
0191         if (ch & 0x08)
0192             be128_xor(r, r, &p[4]);
0193         if (ch & 0x04)
0194             be128_xor(r, r, &p[5]);
0195         if (ch & 0x02)
0196             be128_xor(r, r, &p[6]);
0197         if (ch & 0x01)
0198             be128_xor(r, r, &p[7]);
0199
0200         if (++i >= 16)
0201             break;
0202
0203         gf128mul_x8_lle(r);
0204     }
0205 }
0206 EXPORT_SYMBOL(gf128mul_lle);
0207
0208 void gf128mul_bbe(be128 *r, const be128 *b)
0209 {
0210     be128 p[8];
0211     int i;
0212
0213     p[0] = *r;
0214     for (i = 0; i < 7; ++i)
0215         gf128mul_x_bbe(&p[i + 1], &p[i]);
0216
0217     memset(r, 0, sizeof(*r));
0218     for (i = 0;;) {
0219         u8 ch = ((u8 *)b)[i];
0220
0221         if (ch & 0x80)
0222             be128_xor(r, r, &p[7]);
0223         if (ch & 0x40)
0224             be128_xor(r, r, &p[6]);
0225         if (ch & 0x20)
0226             be128_xor(r, r, &p[5]);
0227         if (ch & 0x10)
0228             be128_xor(r, r, &p[4]);
0229         if (ch & 0x08)
0230             be128_xor(r, r, &p[3]);
0231         if (ch & 0x04)
0232             be128_xor(r, r, &p[2]);
0233         if (ch & 0x02)
0234             be128_xor(r, r, &p[1]);
0235         if (ch & 0x01)
0236             be128_xor(r, r, &p[0]);
0237
0238         if (++i >= 16)
0239             break;
0240
0241         gf128mul_x8_bbe(r);
0242     }
0243 }
0244 EXPORT_SYMBOL(gf128mul_bbe);
0245
0246 /*      This version uses 64k bytes of table space.
0247     A 16 byte buffer has to be multiplied by a 16 byte key
0248     value in GF(2^128).  If we consider a GF(2^128) value in
0249     the buffer's lowest byte, we can construct a table of
0250     the 256 16 byte values that result from the 256 values
0251     of this byte.  This requires 4096 bytes. But we also
0252     need tables for each of the 16 higher bytes in the
0253     buffer as well, which makes 64 kbytes in total.
0254 */
0255 /* additional explanation
0256  * t[0][BYTE] contains g*BYTE
0257  * t[1][BYTE] contains g*x^8*BYTE
0258  *  ..
0259  * t[15][BYTE] contains g*x^120*BYTE */
0260 struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
0261 {
0262     struct gf128mul_64k *t;
0263     int i, j, k;
0264
0265     t = kzalloc(sizeof(*t), GFP_KERNEL);
0266     if (!t)
0267         goto out;
0268
0269     for (i = 0; i < 16; i++) {
0270         t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
0271         if (!t->t[i]) {
0272             gf128mul_free_64k(t);
0273             t = NULL;
0274             goto out;
0275         }
0276     }
0277
0278     t->t[0]->t[1] = *g;
0279     for (j = 1; j <= 64; j <<= 1)
0280         gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
0281
0282     for (i = 0;;) {
0283         for (j = 2; j < 256; j += j)
0284             for (k = 1; k < j; ++k)
0285                 be128_xor(&t->t[i]->t[j + k],
0286                       &t->t[i]->t[j], &t->t[i]->t[k]);
0287
0288         if (++i >= 16)
0289             break;
0290
0291         for (j = 128; j > 0; j >>= 1) {
0292             t->t[i]->t[j] = t->t[i - 1]->t[j];
0293             gf128mul_x8_bbe(&t->t[i]->t[j]);
0294         }
0295     }
0296
0297 out:
0298     return t;
0299 }
0300 EXPORT_SYMBOL(gf128mul_init_64k_bbe);
0301
0302 void gf128mul_free_64k(struct gf128mul_64k *t)
0303 {
0304     int i;
0305
0306     for (i = 0; i < 16; i++)
0307         kfree_sensitive(t->t[i]);
0308     kfree_sensitive(t);
0309 }
0310 EXPORT_SYMBOL(gf128mul_free_64k);
0311
0312 void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t)
0313 {
0314     u8 *ap = (u8 *)a;
0315     be128 r[1];
0316     int i;
0317
0318     *r = t->t[0]->t[ap[15]];
0319     for (i = 1; i < 16; ++i)
0320         be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
0321     *a = *r;
0322 }
0323 EXPORT_SYMBOL(gf128mul_64k_bbe);
0324
0325 /*      This version uses 4k bytes of table space.
0326     A 16 byte buffer has to be multiplied by a 16 byte key
0327     value in GF(2^128).  If we consider a GF(2^128) value in a
0328     single byte, we can construct a table of the 256 16 byte
0329     values that result from the 256 values of this byte.
0330     This requires 4096 bytes. If we take the highest byte in
0331     the buffer and use this table to get the result, we then
0332     have to multiply by x^120 to get the final value. For the
0333     next highest byte the result has to be multiplied by x^112
0334     and so on. But we can do this by accumulating the result
0335     in an accumulator starting with the result for the top
0336     byte.  We repeatedly multiply the accumulator value by
0337     x^8 and then add in (i.e. xor) the 16 bytes of the next
0338     lower byte in the buffer, stopping when we reach the
0339     lowest byte. This requires a 4096 byte table.
0340 */
0341 struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
0342 {
0343     struct gf128mul_4k *t;
0344     int j, k;
0345
0346     t = kzalloc(sizeof(*t), GFP_KERNEL);
0347     if (!t)
0348         goto out;
0349
0350     t->t[128] = *g;
0351     for (j = 64; j > 0; j >>= 1)
0352         gf128mul_x_lle(&t->t[j], &t->t[j+j]);
0353
0354     for (j = 2; j < 256; j += j)
0355         for (k = 1; k < j; ++k)
0356             be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
0357
0358 out:
0359     return t;
0360 }
0361 EXPORT_SYMBOL(gf128mul_init_4k_lle);
0362
0363 struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
0364 {
0365     struct gf128mul_4k *t;
0366     int j, k;
0367
0368     t = kzalloc(sizeof(*t), GFP_KERNEL);
0369     if (!t)
0370         goto out;
0371
0372     t->t[1] = *g;
0373     for (j = 1; j <= 64; j <<= 1)
0374         gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
0375
0376     for (j = 2; j < 256; j += j)
0377         for (k = 1; k < j; ++k)
0378             be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
0379
0380 out:
0381     return t;
0382 }
0383 EXPORT_SYMBOL(gf128mul_init_4k_bbe);
0384
0385 void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t)
0386 {
0387     u8 *ap = (u8 *)a;
0388     be128 r[1];
0389     int i = 15;
0390
0391     *r = t->t[ap[15]];
0392     while (i--) {
0393         gf128mul_x8_lle(r);
0394         be128_xor(r, r, &t->t[ap[i]]);
0395     }
0396     *a = *r;
0397 }
0398 EXPORT_SYMBOL(gf128mul_4k_lle);
0399
0400 void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t)
0401 {
0402     u8 *ap = (u8 *)a;
0403     be128 r[1];
0404     int i = 0;
0405
0406     *r = t->t[ap[0]];
0407     while (++i < 16) {
0408         gf128mul_x8_bbe(r);
0409         be128_xor(r, r, &t->t[ap[i]]);
0410     }
0411     *a = *r;
0412 }
0413 EXPORT_SYMBOL(gf128mul_4k_bbe);
0414
0415 MODULE_LICENSE("GPL");
0416 MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");