Back to home page

LXR

 
 

    


0001 /* gf128mul.c - GF(2^128) multiplication functions
0002  *
0003  * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
0004  * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
0005  *
0006  * Based on Dr Brian Gladman's (GPL'd) work published at
0007  * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
0008  * See the original copyright notice below.
0009  *
0010  * This program is free software; you can redistribute it and/or modify it
0011  * under the terms of the GNU General Public License as published by the Free
0012  * Software Foundation; either version 2 of the License, or (at your option)
0013  * any later version.
0014  */
0015 
0016 /*
0017  ---------------------------------------------------------------------------
0018  Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
0019 
0020  LICENSE TERMS
0021 
0022  The free distribution and use of this software in both source and binary
0023  form is allowed (with or without changes) provided that:
0024 
0025    1. distributions of this source code include the above copyright
0026       notice, this list of conditions and the following disclaimer;
0027 
0028    2. distributions in binary form include the above copyright
0029       notice, this list of conditions and the following disclaimer
0030       in the documentation and/or other associated materials;
0031 
0032    3. the copyright holder's name is not used to endorse products
0033       built using this software without specific written permission.
0034 
0035  ALTERNATIVELY, provided that this notice is retained in full, this product
0036  may be distributed under the terms of the GNU General Public License (GPL),
0037  in which case the provisions of the GPL apply INSTEAD OF those given above.
0038 
0039  DISCLAIMER
0040 
0041  This software is provided 'as is' with no explicit or implied warranties
0042  in respect of its properties, including, but not limited to, correctness
0043  and/or fitness for purpose.
0044  ---------------------------------------------------------------------------
0045  Issue 31/01/2006
0046 
0047  This file provides fast multiplication in GF(128) as required by several
0048  cryptographic authentication modes
0049 */
0050 
0051 #include <crypto/gf128mul.h>
0052 #include <linux/kernel.h>
0053 #include <linux/module.h>
0054 #include <linux/slab.h>
0055 
0056 #define gf128mul_dat(q) { \
0057     q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
0058     q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
0059     q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
0060     q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
0061     q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
0062     q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
0063     q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
0064     q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
0065     q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
0066     q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
0067     q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
0068     q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
0069     q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
0070     q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
0071     q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
0072     q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
0073     q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
0074     q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
0075     q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
0076     q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
0077     q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
0078     q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
0079     q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
0080     q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
0081     q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
0082     q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
0083     q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
0084     q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
0085     q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
0086     q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
0087     q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
0088     q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
0089 }
0090 
0091 /*  Given the value i in 0..255 as the byte overflow when a field element
0092     in GHASH is multiplied by x^8, this function will return the values that
0093     are generated in the lo 16-bit word of the field value by applying the
0094     modular polynomial. The values lo_byte and hi_byte are returned via the
0095     macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into
0096     memory as required by a suitable definition of this macro operating on
0097     the table above
0098 */
0099 
0100 #define xx(p, q)    0x##p##q
0101 
0102 #define xda_bbe(i) ( \
0103     (i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \
0104     (i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \
0105     (i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \
0106     (i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \
0107 )
0108 
0109 #define xda_lle(i) ( \
0110     (i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \
0111     (i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \
0112     (i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \
0113     (i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \
0114 )
0115 
0116 static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
0117 static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);
0118 
0119 /* These functions multiply a field element by x, by x^4 and by x^8
0120  * in the polynomial field representation. It uses 32-bit word operations
0121  * to gain speed but compensates for machine endianess and hence works
0122  * correctly on both styles of machine.
0123  */
0124 
0125 static void gf128mul_x_lle(be128 *r, const be128 *x)
0126 {
0127     u64 a = be64_to_cpu(x->a);
0128     u64 b = be64_to_cpu(x->b);
0129     u64 _tt = gf128mul_table_lle[(b << 7) & 0xff];
0130 
0131     r->b = cpu_to_be64((b >> 1) | (a << 63));
0132     r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
0133 }
0134 
0135 static void gf128mul_x_bbe(be128 *r, const be128 *x)
0136 {
0137     u64 a = be64_to_cpu(x->a);
0138     u64 b = be64_to_cpu(x->b);
0139     u64 _tt = gf128mul_table_bbe[a >> 63];
0140 
0141     r->a = cpu_to_be64((a << 1) | (b >> 63));
0142     r->b = cpu_to_be64((b << 1) ^ _tt);
0143 }
0144 
0145 void gf128mul_x_ble(be128 *r, const be128 *x)
0146 {
0147     u64 a = le64_to_cpu(x->a);
0148     u64 b = le64_to_cpu(x->b);
0149     u64 _tt = gf128mul_table_bbe[b >> 63];
0150 
0151     r->a = cpu_to_le64((a << 1) ^ _tt);
0152     r->b = cpu_to_le64((b << 1) | (a >> 63));
0153 }
0154 EXPORT_SYMBOL(gf128mul_x_ble);
0155 
0156 static void gf128mul_x8_lle(be128 *x)
0157 {
0158     u64 a = be64_to_cpu(x->a);
0159     u64 b = be64_to_cpu(x->b);
0160     u64 _tt = gf128mul_table_lle[b & 0xff];
0161 
0162     x->b = cpu_to_be64((b >> 8) | (a << 56));
0163     x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
0164 }
0165 
0166 static void gf128mul_x8_bbe(be128 *x)
0167 {
0168     u64 a = be64_to_cpu(x->a);
0169     u64 b = be64_to_cpu(x->b);
0170     u64 _tt = gf128mul_table_bbe[a >> 56];
0171 
0172     x->a = cpu_to_be64((a << 8) | (b >> 56));
0173     x->b = cpu_to_be64((b << 8) ^ _tt);
0174 }
0175 
0176 void gf128mul_lle(be128 *r, const be128 *b)
0177 {
0178     be128 p[8];
0179     int i;
0180 
0181     p[0] = *r;
0182     for (i = 0; i < 7; ++i)
0183         gf128mul_x_lle(&p[i + 1], &p[i]);
0184 
0185     memset(r, 0, sizeof(*r));
0186     for (i = 0;;) {
0187         u8 ch = ((u8 *)b)[15 - i];
0188 
0189         if (ch & 0x80)
0190             be128_xor(r, r, &p[0]);
0191         if (ch & 0x40)
0192             be128_xor(r, r, &p[1]);
0193         if (ch & 0x20)
0194             be128_xor(r, r, &p[2]);
0195         if (ch & 0x10)
0196             be128_xor(r, r, &p[3]);
0197         if (ch & 0x08)
0198             be128_xor(r, r, &p[4]);
0199         if (ch & 0x04)
0200             be128_xor(r, r, &p[5]);
0201         if (ch & 0x02)
0202             be128_xor(r, r, &p[6]);
0203         if (ch & 0x01)
0204             be128_xor(r, r, &p[7]);
0205 
0206         if (++i >= 16)
0207             break;
0208 
0209         gf128mul_x8_lle(r);
0210     }
0211 }
0212 EXPORT_SYMBOL(gf128mul_lle);
0213 
0214 void gf128mul_bbe(be128 *r, const be128 *b)
0215 {
0216     be128 p[8];
0217     int i;
0218 
0219     p[0] = *r;
0220     for (i = 0; i < 7; ++i)
0221         gf128mul_x_bbe(&p[i + 1], &p[i]);
0222 
0223     memset(r, 0, sizeof(*r));
0224     for (i = 0;;) {
0225         u8 ch = ((u8 *)b)[i];
0226 
0227         if (ch & 0x80)
0228             be128_xor(r, r, &p[7]);
0229         if (ch & 0x40)
0230             be128_xor(r, r, &p[6]);
0231         if (ch & 0x20)
0232             be128_xor(r, r, &p[5]);
0233         if (ch & 0x10)
0234             be128_xor(r, r, &p[4]);
0235         if (ch & 0x08)
0236             be128_xor(r, r, &p[3]);
0237         if (ch & 0x04)
0238             be128_xor(r, r, &p[2]);
0239         if (ch & 0x02)
0240             be128_xor(r, r, &p[1]);
0241         if (ch & 0x01)
0242             be128_xor(r, r, &p[0]);
0243 
0244         if (++i >= 16)
0245             break;
0246 
0247         gf128mul_x8_bbe(r);
0248     }
0249 }
0250 EXPORT_SYMBOL(gf128mul_bbe);
0251 
0252 /*      This version uses 64k bytes of table space.
0253     A 16 byte buffer has to be multiplied by a 16 byte key
0254     value in GF(128).  If we consider a GF(128) value in
0255     the buffer's lowest byte, we can construct a table of
0256     the 256 16 byte values that result from the 256 values
0257     of this byte.  This requires 4096 bytes. But we also
0258     need tables for each of the 16 higher bytes in the
0259     buffer as well, which makes 64 kbytes in total.
0260 */
0261 /* additional explanation
0262  * t[0][BYTE] contains g*BYTE
0263  * t[1][BYTE] contains g*x^8*BYTE
0264  *  ..
0265  * t[15][BYTE] contains g*x^120*BYTE */
0266 struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
0267 {
0268     struct gf128mul_64k *t;
0269     int i, j, k;
0270 
0271     t = kzalloc(sizeof(*t), GFP_KERNEL);
0272     if (!t)
0273         goto out;
0274 
0275     for (i = 0; i < 16; i++) {
0276         t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
0277         if (!t->t[i]) {
0278             gf128mul_free_64k(t);
0279             t = NULL;
0280             goto out;
0281         }
0282     }
0283 
0284     t->t[0]->t[1] = *g;
0285     for (j = 1; j <= 64; j <<= 1)
0286         gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
0287 
0288     for (i = 0;;) {
0289         for (j = 2; j < 256; j += j)
0290             for (k = 1; k < j; ++k)
0291                 be128_xor(&t->t[i]->t[j + k],
0292                       &t->t[i]->t[j], &t->t[i]->t[k]);
0293 
0294         if (++i >= 16)
0295             break;
0296 
0297         for (j = 128; j > 0; j >>= 1) {
0298             t->t[i]->t[j] = t->t[i - 1]->t[j];
0299             gf128mul_x8_bbe(&t->t[i]->t[j]);
0300         }
0301     }
0302 
0303 out:
0304     return t;
0305 }
0306 EXPORT_SYMBOL(gf128mul_init_64k_bbe);
0307 
0308 void gf128mul_free_64k(struct gf128mul_64k *t)
0309 {
0310     int i;
0311 
0312     for (i = 0; i < 16; i++)
0313         kzfree(t->t[i]);
0314     kzfree(t);
0315 }
0316 EXPORT_SYMBOL(gf128mul_free_64k);
0317 
0318 void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t)
0319 {
0320     u8 *ap = (u8 *)a;
0321     be128 r[1];
0322     int i;
0323 
0324     *r = t->t[0]->t[ap[15]];
0325     for (i = 1; i < 16; ++i)
0326         be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
0327     *a = *r;
0328 }
0329 EXPORT_SYMBOL(gf128mul_64k_bbe);
0330 
0331 /*      This version uses 4k bytes of table space.
0332     A 16 byte buffer has to be multiplied by a 16 byte key
0333     value in GF(128).  If we consider a GF(128) value in a
0334     single byte, we can construct a table of the 256 16 byte
0335     values that result from the 256 values of this byte.
0336     This requires 4096 bytes. If we take the highest byte in
0337     the buffer and use this table to get the result, we then
0338     have to multiply by x^120 to get the final value. For the
0339     next highest byte the result has to be multiplied by x^112
0340     and so on. But we can do this by accumulating the result
0341     in an accumulator starting with the result for the top
0342     byte.  We repeatedly multiply the accumulator value by
0343     x^8 and then add in (i.e. xor) the 16 bytes of the next
0344     lower byte in the buffer, stopping when we reach the
0345     lowest byte. This requires a 4096 byte table.
0346 */
0347 struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
0348 {
0349     struct gf128mul_4k *t;
0350     int j, k;
0351 
0352     t = kzalloc(sizeof(*t), GFP_KERNEL);
0353     if (!t)
0354         goto out;
0355 
0356     t->t[128] = *g;
0357     for (j = 64; j > 0; j >>= 1)
0358         gf128mul_x_lle(&t->t[j], &t->t[j+j]);
0359 
0360     for (j = 2; j < 256; j += j)
0361         for (k = 1; k < j; ++k)
0362             be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
0363 
0364 out:
0365     return t;
0366 }
0367 EXPORT_SYMBOL(gf128mul_init_4k_lle);
0368 
0369 struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
0370 {
0371     struct gf128mul_4k *t;
0372     int j, k;
0373 
0374     t = kzalloc(sizeof(*t), GFP_KERNEL);
0375     if (!t)
0376         goto out;
0377 
0378     t->t[1] = *g;
0379     for (j = 1; j <= 64; j <<= 1)
0380         gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
0381 
0382     for (j = 2; j < 256; j += j)
0383         for (k = 1; k < j; ++k)
0384             be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
0385 
0386 out:
0387     return t;
0388 }
0389 EXPORT_SYMBOL(gf128mul_init_4k_bbe);
0390 
0391 void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t)
0392 {
0393     u8 *ap = (u8 *)a;
0394     be128 r[1];
0395     int i = 15;
0396 
0397     *r = t->t[ap[15]];
0398     while (i--) {
0399         gf128mul_x8_lle(r);
0400         be128_xor(r, r, &t->t[ap[i]]);
0401     }
0402     *a = *r;
0403 }
0404 EXPORT_SYMBOL(gf128mul_4k_lle);
0405 
0406 void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t)
0407 {
0408     u8 *ap = (u8 *)a;
0409     be128 r[1];
0410     int i = 0;
0411 
0412     *r = t->t[ap[0]];
0413     while (++i < 16) {
0414         gf128mul_x8_bbe(r);
0415         be128_xor(r, r, &t->t[ap[i]]);
0416     }
0417     *a = *r;
0418 }
0419 EXPORT_SYMBOL(gf128mul_4k_bbe);
0420 
0421 MODULE_LICENSE("GPL");
0422 MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");