the-tree/lib/bch.c

0001 /*
0002  * Generic binary BCH encoding/decoding library
0003  *
0004  * This program is free software; you can redistribute it and/or modify it
0005  * under the terms of the GNU General Public License version 2 as published by
0006  * the Free Software Foundation.
0007  *
0008  * This program is distributed in the hope that it will be useful, but WITHOUT
0009  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0010  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
0011  * more details.
0012  *
0013  * You should have received a copy of the GNU General Public License along with
0014  * this program; if not, write to the Free Software Foundation, Inc., 51
0015  * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0016  *
0017  * Copyright © 2011 Parrot S.A.
0018  *
0019  * Author: Ivan Djelic <ivan.djelic@parrot.com>
0020  *
0021  * Description:
0022  *
0023  * This library provides runtime configurable encoding/decoding of binary
0024  * Bose-Chaudhuri-Hocquenghem (BCH) codes.
0025  *
0026  * Call bch_init to get a pointer to a newly allocated bch_control structure for
0027  * the given m (Galois field order), t (error correction capability) and
0028  * (optional) primitive polynomial parameters.
0029  *
0030  * Call bch_encode to compute and store ecc parity bytes to a given buffer.
0031  * Call bch_decode to detect and locate errors in received data.
0032  *
0033  * On systems supporting hw BCH features, intermediate results may be provided
0034  * to bch_decode in order to skip certain steps. See bch_decode() documentation
0035  * for details.
0036  *
0037  * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
0038  * parameters m and t; thus allowing extra compiler optimizations and providing
0039  * better (up to 2x) encoding performance. Using this option makes sense when
0040  * (m,t) are fixed and known in advance, e.g. when using BCH error correction
0041  * on a particular NAND flash device.
0042  *
0043  * Algorithmic details:
0044  *
0045  * Encoding is performed by processing 32 input bits in parallel, using 4
0046  * remainder lookup tables.
0047  *
0048  * The final stage of decoding involves the following internal steps:
0049  * a. Syndrome computation
0050  * b. Error locator polynomial computation using Berlekamp-Massey algorithm
0051  * c. Error locator root finding (by far the most expensive step)
0052  *
0053  * In this implementation, step c is not performed using the usual Chien search.
0054  * Instead, an alternative approach described in [1] is used. It consists in
0055  * factoring the error locator polynomial using the Berlekamp Trace algorithm
0056  * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
0057  * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
0058  * much better performance than Chien search for usual (m,t) values (typically
0059  * m >= 13, t < 32, see [1]).
0060  *
0061  * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
0062  * of characteristic 2, in: Western European Workshop on Research in Cryptology
0063  * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
0064  * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
0065  * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
0066  */
0067
0068 #include <linux/kernel.h>
0069 #include <linux/errno.h>
0070 #include <linux/init.h>
0071 #include <linux/module.h>
0072 #include <linux/slab.h>
0073 #include <linux/bitops.h>
0074 #include <asm/byteorder.h>
0075 #include <linux/bch.h>
0076
0077 #if defined(CONFIG_BCH_CONST_PARAMS)
0078 #define GF_M(_p)               (CONFIG_BCH_CONST_M)
0079 #define GF_T(_p)               (CONFIG_BCH_CONST_T)
0080 #define GF_N(_p)               ((1 << (CONFIG_BCH_CONST_M))-1)
0081 #define BCH_MAX_M              (CONFIG_BCH_CONST_M)
0082 #define BCH_MAX_T          (CONFIG_BCH_CONST_T)
0083 #else
0084 #define GF_M(_p)               ((_p)->m)
0085 #define GF_T(_p)               ((_p)->t)
0086 #define GF_N(_p)               ((_p)->n)
0087 #define BCH_MAX_M              15 /* 2KB */
0088 #define BCH_MAX_T              64 /* 64 bit correction */
0089 #endif
0090
0091 #define BCH_ECC_WORDS(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
0092 #define BCH_ECC_BYTES(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
0093
0094 #define BCH_ECC_MAX_WORDS      DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
0095
0096 #ifndef dbg
0097 #define dbg(_fmt, args...)     do {} while (0)
0098 #endif
0099
0100 /*
0101  * represent a polynomial over GF(2^m)
0102  */
0103 struct gf_poly {
0104     unsigned int deg;    /* polynomial degree */
0105     unsigned int c[];   /* polynomial terms */
0106 };
0107
0108 /* given its degree, compute a polynomial size in bytes */
0109 #define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
0110
0111 /* polynomial of degree 1 */
0112 struct gf_poly_deg1 {
0113     struct gf_poly poly;
0114     unsigned int   c[2];
0115 };
0116
0117 static u8 swap_bits_table[] = {
0118     0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
0119     0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
0120     0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
0121     0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
0122     0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
0123     0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
0124     0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
0125     0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
0126     0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
0127     0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
0128     0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
0129     0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
0130     0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
0131     0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
0132     0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
0133     0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
0134     0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
0135     0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
0136     0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
0137     0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
0138     0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
0139     0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
0140     0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
0141     0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
0142     0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
0143     0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
0144     0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
0145     0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
0146     0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
0147     0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
0148     0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
0149     0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
0150 };
0151
0152 static u8 swap_bits(struct bch_control *bch, u8 in)
0153 {
0154     if (!bch->swap_bits)
0155         return in;
0156
0157     return swap_bits_table[in];
0158 }
0159
0160 /*
0161  * same as bch_encode(), but process input data one byte at a time
0162  */
0163 static void bch_encode_unaligned(struct bch_control *bch,
0164                  const unsigned char *data, unsigned int len,
0165                  uint32_t *ecc)
0166 {
0167     int i;
0168     const uint32_t *p;
0169     const int l = BCH_ECC_WORDS(bch)-1;
0170
0171     while (len--) {
0172         u8 tmp = swap_bits(bch, *data++);
0173
0174         p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(tmp)) & 0xff);
0175
0176         for (i = 0; i < l; i++)
0177             ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
0178
0179         ecc[l] = (ecc[l] << 8)^(*p);
0180     }
0181 }
0182
0183 /*
0184  * convert ecc bytes to aligned, zero-padded 32-bit ecc words
0185  */
0186 static void load_ecc8(struct bch_control *bch, uint32_t *dst,
0187               const uint8_t *src)
0188 {
0189     uint8_t pad[4] = {0, 0, 0, 0};
0190     unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
0191
0192     for (i = 0; i < nwords; i++, src += 4)
0193         dst[i] = ((u32)swap_bits(bch, src[0]) << 24) |
0194             ((u32)swap_bits(bch, src[1]) << 16) |
0195             ((u32)swap_bits(bch, src[2]) << 8) |
0196             swap_bits(bch, src[3]);
0197
0198     memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
0199     dst[nwords] = ((u32)swap_bits(bch, pad[0]) << 24) |
0200         ((u32)swap_bits(bch, pad[1]) << 16) |
0201         ((u32)swap_bits(bch, pad[2]) << 8) |
0202         swap_bits(bch, pad[3]);
0203 }
0204
0205 /*
0206  * convert 32-bit ecc words to ecc bytes
0207  */
0208 static void store_ecc8(struct bch_control *bch, uint8_t *dst,
0209                const uint32_t *src)
0210 {
0211     uint8_t pad[4];
0212     unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
0213
0214     for (i = 0; i < nwords; i++) {
0215         *dst++ = swap_bits(bch, src[i] >> 24);
0216         *dst++ = swap_bits(bch, src[i] >> 16);
0217         *dst++ = swap_bits(bch, src[i] >> 8);
0218         *dst++ = swap_bits(bch, src[i]);
0219     }
0220     pad[0] = swap_bits(bch, src[nwords] >> 24);
0221     pad[1] = swap_bits(bch, src[nwords] >> 16);
0222     pad[2] = swap_bits(bch, src[nwords] >> 8);
0223     pad[3] = swap_bits(bch, src[nwords]);
0224     memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
0225 }
0226
0227 /**
0228  * bch_encode - calculate BCH ecc parity of data
0229  * @bch:   BCH control structure
0230  * @data:  data to encode
0231  * @len:   data length in bytes
0232  * @ecc:   ecc parity data, must be initialized by caller
0233  *
0234  * The @ecc parity array is used both as input and output parameter, in order to
0235  * allow incremental computations. It should be of the size indicated by member
0236  * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
0237  *
0238  * The exact number of computed ecc parity bits is given by member @ecc_bits of
0239  * @bch; it may be less than m*t for large values of t.
0240  */
0241 void bch_encode(struct bch_control *bch, const uint8_t *data,
0242         unsigned int len, uint8_t *ecc)
0243 {
0244     const unsigned int l = BCH_ECC_WORDS(bch)-1;
0245     unsigned int i, mlen;
0246     unsigned long m;
0247     uint32_t w, r[BCH_ECC_MAX_WORDS];
0248     const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r);
0249     const uint32_t * const tab0 = bch->mod8_tab;
0250     const uint32_t * const tab1 = tab0 + 256*(l+1);
0251     const uint32_t * const tab2 = tab1 + 256*(l+1);
0252     const uint32_t * const tab3 = tab2 + 256*(l+1);
0253     const uint32_t *pdata, *p0, *p1, *p2, *p3;
0254
0255     if (WARN_ON(r_bytes > sizeof(r)))
0256         return;
0257
0258     if (ecc) {
0259         /* load ecc parity bytes into internal 32-bit buffer */
0260         load_ecc8(bch, bch->ecc_buf, ecc);
0261     } else {
0262         memset(bch->ecc_buf, 0, r_bytes);
0263     }
0264
0265     /* process first unaligned data bytes */
0266     m = ((unsigned long)data) & 3;
0267     if (m) {
0268         mlen = (len < (4-m)) ? len : 4-m;
0269         bch_encode_unaligned(bch, data, mlen, bch->ecc_buf);
0270         data += mlen;
0271         len  -= mlen;
0272     }
0273
0274     /* process 32-bit aligned data words */
0275     pdata = (uint32_t *)data;
0276     mlen  = len/4;
0277     data += 4*mlen;
0278     len  -= 4*mlen;
0279     memcpy(r, bch->ecc_buf, r_bytes);
0280
0281     /*
0282      * split each 32-bit word into 4 polynomials of weight 8 as follows:
0283      *
0284      * 31 ...24  23 ...16  15 ... 8  7 ... 0
0285      * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt
0286      *                               tttttttt  mod g = r0 (precomputed)
0287      *                     zzzzzzzz  00000000  mod g = r1 (precomputed)
0288      *           yyyyyyyy  00000000  00000000  mod g = r2 (precomputed)
0289      * xxxxxxxx  00000000  00000000  00000000  mod g = r3 (precomputed)
0290      * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt  mod g = r0^r1^r2^r3
0291      */
0292     while (mlen--) {
0293         /* input data is read in big-endian format */
0294         w = cpu_to_be32(*pdata++);
0295         if (bch->swap_bits)
0296             w = (u32)swap_bits(bch, w) |
0297                 ((u32)swap_bits(bch, w >> 8) << 8) |
0298                 ((u32)swap_bits(bch, w >> 16) << 16) |
0299                 ((u32)swap_bits(bch, w >> 24) << 24);
0300         w ^= r[0];
0301         p0 = tab0 + (l+1)*((w >>  0) & 0xff);
0302         p1 = tab1 + (l+1)*((w >>  8) & 0xff);
0303         p2 = tab2 + (l+1)*((w >> 16) & 0xff);
0304         p3 = tab3 + (l+1)*((w >> 24) & 0xff);
0305
0306         for (i = 0; i < l; i++)
0307             r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
0308
0309         r[l] = p0[l]^p1[l]^p2[l]^p3[l];
0310     }
0311     memcpy(bch->ecc_buf, r, r_bytes);
0312
0313     /* process last unaligned bytes */
0314     if (len)
0315         bch_encode_unaligned(bch, data, len, bch->ecc_buf);
0316
0317     /* store ecc parity bytes into original parity buffer */
0318     if (ecc)
0319         store_ecc8(bch, ecc, bch->ecc_buf);
0320 }
0321 EXPORT_SYMBOL_GPL(bch_encode);
0322
0323 static inline int modulo(struct bch_control *bch, unsigned int v)
0324 {
0325     const unsigned int n = GF_N(bch);
0326     while (v >= n) {
0327         v -= n;
0328         v = (v & n) + (v >> GF_M(bch));
0329     }
0330     return v;
0331 }
0332
0333 /*
0334  * shorter and faster modulo function, only works when v < 2N.
0335  */
0336 static inline int mod_s(struct bch_control *bch, unsigned int v)
0337 {
0338     const unsigned int n = GF_N(bch);
0339     return (v < n) ? v : v-n;
0340 }
0341
0342 static inline int deg(unsigned int poly)
0343 {
0344     /* polynomial degree is the most-significant bit index */
0345     return fls(poly)-1;
0346 }
0347
0348 static inline int parity(unsigned int x)
0349 {
0350     /*
0351      * public domain code snippet, lifted from
0352      * http://www-graphics.stanford.edu/~seander/bithacks.html
0353      */
0354     x ^= x >> 1;
0355     x ^= x >> 2;
0356     x = (x & 0x11111111U) * 0x11111111U;
0357     return (x >> 28) & 1;
0358 }
0359
0360 /* Galois field basic operations: multiply, divide, inverse, etc. */
0361
0362 static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
0363                   unsigned int b)
0364 {
0365     return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
0366                            bch->a_log_tab[b])] : 0;
0367 }
0368
0369 static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
0370 {
0371     return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
0372 }
0373
0374 static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
0375                   unsigned int b)
0376 {
0377     return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
0378                     GF_N(bch)-bch->a_log_tab[b])] : 0;
0379 }
0380
0381 static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
0382 {
0383     return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
0384 }
0385
0386 static inline unsigned int a_pow(struct bch_control *bch, int i)
0387 {
0388     return bch->a_pow_tab[modulo(bch, i)];
0389 }
0390
0391 static inline int a_log(struct bch_control *bch, unsigned int x)
0392 {
0393     return bch->a_log_tab[x];
0394 }
0395
0396 static inline int a_ilog(struct bch_control *bch, unsigned int x)
0397 {
0398     return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
0399 }
0400
0401 /*
0402  * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
0403  */
0404 static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
0405                   unsigned int *syn)
0406 {
0407     int i, j, s;
0408     unsigned int m;
0409     uint32_t poly;
0410     const int t = GF_T(bch);
0411
0412     s = bch->ecc_bits;
0413
0414     /* make sure extra bits in last ecc word are cleared */
0415     m = ((unsigned int)s) & 31;
0416     if (m)
0417         ecc[s/32] &= ~((1u << (32-m))-1);
0418     memset(syn, 0, 2*t*sizeof(*syn));
0419
0420     /* compute v(a^j) for j=1 .. 2t-1 */
0421     do {
0422         poly = *ecc++;
0423         s -= 32;
0424         while (poly) {
0425             i = deg(poly);
0426             for (j = 0; j < 2*t; j += 2)
0427                 syn[j] ^= a_pow(bch, (j+1)*(i+s));
0428
0429             poly ^= (1 << i);
0430         }
0431     } while (s > 0);
0432
0433     /* v(a^(2j)) = v(a^j)^2 */
0434     for (j = 0; j < t; j++)
0435         syn[2*j+1] = gf_sqr(bch, syn[j]);
0436 }
0437
0438 static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
0439 {
0440     memcpy(dst, src, GF_POLY_SZ(src->deg));
0441 }
0442
0443 static int compute_error_locator_polynomial(struct bch_control *bch,
0444                         const unsigned int *syn)
0445 {
0446     const unsigned int t = GF_T(bch);
0447     const unsigned int n = GF_N(bch);
0448     unsigned int i, j, tmp, l, pd = 1, d = syn[0];
0449     struct gf_poly *elp = bch->elp;
0450     struct gf_poly *pelp = bch->poly_2t[0];
0451     struct gf_poly *elp_copy = bch->poly_2t[1];
0452     int k, pp = -1;
0453
0454     memset(pelp, 0, GF_POLY_SZ(2*t));
0455     memset(elp, 0, GF_POLY_SZ(2*t));
0456
0457     pelp->deg = 0;
0458     pelp->c[0] = 1;
0459     elp->deg = 0;
0460     elp->c[0] = 1;
0461
0462     /* use simplified binary Berlekamp-Massey algorithm */
0463     for (i = 0; (i < t) && (elp->deg <= t); i++) {
0464         if (d) {
0465             k = 2*i-pp;
0466             gf_poly_copy(elp_copy, elp);
0467             /* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
0468             tmp = a_log(bch, d)+n-a_log(bch, pd);
0469             for (j = 0; j <= pelp->deg; j++) {
0470                 if (pelp->c[j]) {
0471                     l = a_log(bch, pelp->c[j]);
0472                     elp->c[j+k] ^= a_pow(bch, tmp+l);
0473                 }
0474             }
0475             /* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
0476             tmp = pelp->deg+k;
0477             if (tmp > elp->deg) {
0478                 elp->deg = tmp;
0479                 gf_poly_copy(pelp, elp_copy);
0480                 pd = d;
0481                 pp = 2*i;
0482             }
0483         }
0484         /* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
0485         if (i < t-1) {
0486             d = syn[2*i+2];
0487             for (j = 1; j <= elp->deg; j++)
0488                 d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
0489         }
0490     }
0491     dbg("elp=%s\n", gf_poly_str(elp));
0492     return (elp->deg > t) ? -1 : (int)elp->deg;
0493 }
0494
0495 /*
0496  * solve a m x m linear system in GF(2) with an expected number of solutions,
0497  * and return the number of found solutions
0498  */
0499 static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
0500                    unsigned int *sol, int nsol)
0501 {
0502     const int m = GF_M(bch);
0503     unsigned int tmp, mask;
0504     int rem, c, r, p, k, param[BCH_MAX_M];
0505
0506     k = 0;
0507     mask = 1 << m;
0508
0509     /* Gaussian elimination */
0510     for (c = 0; c < m; c++) {
0511         rem = 0;
0512         p = c-k;
0513         /* find suitable row for elimination */
0514         for (r = p; r < m; r++) {
0515             if (rows[r] & mask) {
0516                 if (r != p) {
0517                     tmp = rows[r];
0518                     rows[r] = rows[p];
0519                     rows[p] = tmp;
0520                 }
0521                 rem = r+1;
0522                 break;
0523             }
0524         }
0525         if (rem) {
0526             /* perform elimination on remaining rows */
0527             tmp = rows[p];
0528             for (r = rem; r < m; r++) {
0529                 if (rows[r] & mask)
0530                     rows[r] ^= tmp;
0531             }
0532         } else {
0533             /* elimination not needed, store defective row index */
0534             param[k++] = c;
0535         }
0536         mask >>= 1;
0537     }
0538     /* rewrite system, inserting fake parameter rows */
0539     if (k > 0) {
0540         p = k;
0541         for (r = m-1; r >= 0; r--) {
0542             if ((r > m-1-k) && rows[r])
0543                 /* system has no solution */
0544                 return 0;
0545
0546             rows[r] = (p && (r == param[p-1])) ?
0547                 p--, 1u << (m-r) : rows[r-p];
0548         }
0549     }
0550
0551     if (nsol != (1 << k))
0552         /* unexpected number of solutions */
0553         return 0;
0554
0555     for (p = 0; p < nsol; p++) {
0556         /* set parameters for p-th solution */
0557         for (c = 0; c < k; c++)
0558             rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
0559
0560         /* compute unique solution */
0561         tmp = 0;
0562         for (r = m-1; r >= 0; r--) {
0563             mask = rows[r] & (tmp|1);
0564             tmp |= parity(mask) << (m-r);
0565         }
0566         sol[p] = tmp >> 1;
0567     }
0568     return nsol;
0569 }
0570
0571 /*
0572  * this function builds and solves a linear system for finding roots of a degree
0573  * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
0574  */
0575 static int find_affine4_roots(struct bch_control *bch, unsigned int a,
0576                   unsigned int b, unsigned int c,
0577                   unsigned int *roots)
0578 {
0579     int i, j, k;
0580     const int m = GF_M(bch);
0581     unsigned int mask = 0xff, t, rows[16] = {0,};
0582
0583     j = a_log(bch, b);
0584     k = a_log(bch, a);
0585     rows[0] = c;
0586
0587     /* build linear system to solve X^4+aX^2+bX+c = 0 */
0588     for (i = 0; i < m; i++) {
0589         rows[i+1] = bch->a_pow_tab[4*i]^
0590             (a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
0591             (b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
0592         j++;
0593         k += 2;
0594     }
0595     /*
0596      * transpose 16x16 matrix before passing it to linear solver
0597      * warning: this code assumes m < 16
0598      */
0599     for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
0600         for (k = 0; k < 16; k = (k+j+1) & ~j) {
0601             t = ((rows[k] >> j)^rows[k+j]) & mask;
0602             rows[k] ^= (t << j);
0603             rows[k+j] ^= t;
0604         }
0605     }
0606     return solve_linear_system(bch, rows, roots, 4);
0607 }
0608
0609 /*
0610  * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
0611  */
0612 static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
0613                 unsigned int *roots)
0614 {
0615     int n = 0;
0616
0617     if (poly->c[0])
0618         /* poly[X] = bX+c with c!=0, root=c/b */
0619         roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
0620                    bch->a_log_tab[poly->c[1]]);
0621     return n;
0622 }
0623
0624 /*
0625  * compute roots of a degree 2 polynomial over GF(2^m)
0626  */
0627 static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
0628                 unsigned int *roots)
0629 {
0630     int n = 0, i, l0, l1, l2;
0631     unsigned int u, v, r;
0632
0633     if (poly->c[0] && poly->c[1]) {
0634
0635         l0 = bch->a_log_tab[poly->c[0]];
0636         l1 = bch->a_log_tab[poly->c[1]];
0637         l2 = bch->a_log_tab[poly->c[2]];
0638
0639         /* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
0640         u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
0641         /*
0642          * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
0643          * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
0644          * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
0645          * i.e. r and r+1 are roots iff Tr(u)=0
0646          */
0647         r = 0;
0648         v = u;
0649         while (v) {
0650             i = deg(v);
0651             r ^= bch->xi_tab[i];
0652             v ^= (1 << i);
0653         }
0654         /* verify root */
0655         if ((gf_sqr(bch, r)^r) == u) {
0656             /* reverse z=a/bX transformation and compute log(1/r) */
0657             roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
0658                         bch->a_log_tab[r]+l2);
0659             roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
0660                         bch->a_log_tab[r^1]+l2);
0661         }
0662     }
0663     return n;
0664 }
0665
0666 /*
0667  * compute roots of a degree 3 polynomial over GF(2^m)
0668  */
0669 static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
0670                 unsigned int *roots)
0671 {
0672     int i, n = 0;
0673     unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
0674
0675     if (poly->c[0]) {
0676         /* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
0677         e3 = poly->c[3];
0678         c2 = gf_div(bch, poly->c[0], e3);
0679         b2 = gf_div(bch, poly->c[1], e3);
0680         a2 = gf_div(bch, poly->c[2], e3);
0681
0682         /* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
0683         c = gf_mul(bch, a2, c2);           /* c = a2c2      */
0684         b = gf_mul(bch, a2, b2)^c2;        /* b = a2b2 + c2 */
0685         a = gf_sqr(bch, a2)^b2;            /* a = a2^2 + b2 */
0686
0687         /* find the 4 roots of this affine polynomial */
0688         if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
0689             /* remove a2 from final list of roots */
0690             for (i = 0; i < 4; i++) {
0691                 if (tmp[i] != a2)
0692                     roots[n++] = a_ilog(bch, tmp[i]);
0693             }
0694         }
0695     }
0696     return n;
0697 }
0698
0699 /*
0700  * compute roots of a degree 4 polynomial over GF(2^m)
0701  */
0702 static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
0703                 unsigned int *roots)
0704 {
0705     int i, l, n = 0;
0706     unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
0707
0708     if (poly->c[0] == 0)
0709         return 0;
0710
0711     /* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
0712     e4 = poly->c[4];
0713     d = gf_div(bch, poly->c[0], e4);
0714     c = gf_div(bch, poly->c[1], e4);
0715     b = gf_div(bch, poly->c[2], e4);
0716     a = gf_div(bch, poly->c[3], e4);
0717
0718     /* use Y=1/X transformation to get an affine polynomial */
0719     if (a) {
0720         /* first, eliminate cX by using z=X+e with ae^2+c=0 */
0721         if (c) {
0722             /* compute e such that e^2 = c/a */
0723             f = gf_div(bch, c, a);
0724             l = a_log(bch, f);
0725             l += (l & 1) ? GF_N(bch) : 0;
0726             e = a_pow(bch, l/2);
0727             /*
0728              * use transformation z=X+e:
0729              * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
0730              * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
0731              * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
0732              * z^4 + az^3 +     b'z^2 + d'
0733              */
0734             d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
0735             b = gf_mul(bch, a, e)^b;
0736         }
0737         /* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
0738         if (d == 0)
0739             /* assume all roots have multiplicity 1 */
0740             return 0;
0741
0742         c2 = gf_inv(bch, d);
0743         b2 = gf_div(bch, a, d);
0744         a2 = gf_div(bch, b, d);
0745     } else {
0746         /* polynomial is already affine */
0747         c2 = d;
0748         b2 = c;
0749         a2 = b;
0750     }
0751     /* find the 4 roots of this affine polynomial */
0752     if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
0753         for (i = 0; i < 4; i++) {
0754             /* post-process roots (reverse transformations) */
0755             f = a ? gf_inv(bch, roots[i]) : roots[i];
0756             roots[i] = a_ilog(bch, f^e);
0757         }
0758         n = 4;
0759     }
0760     return n;
0761 }
0762
0763 /*
0764  * build monic, log-based representation of a polynomial
0765  */
0766 static void gf_poly_logrep(struct bch_control *bch,
0767                const struct gf_poly *a, int *rep)
0768 {
0769     int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
0770
0771     /* represent 0 values with -1; warning, rep[d] is not set to 1 */
0772     for (i = 0; i < d; i++)
0773         rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
0774 }
0775
0776 /*
0777  * compute polynomial Euclidean division remainder in GF(2^m)[X]
0778  */
0779 static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
0780             const struct gf_poly *b, int *rep)
0781 {
0782     int la, p, m;
0783     unsigned int i, j, *c = a->c;
0784     const unsigned int d = b->deg;
0785
0786     if (a->deg < d)
0787         return;
0788
0789     /* reuse or compute log representation of denominator */
0790     if (!rep) {
0791         rep = bch->cache;
0792         gf_poly_logrep(bch, b, rep);
0793     }
0794
0795     for (j = a->deg; j >= d; j--) {
0796         if (c[j]) {
0797             la = a_log(bch, c[j]);
0798             p = j-d;
0799             for (i = 0; i < d; i++, p++) {
0800                 m = rep[i];
0801                 if (m >= 0)
0802                     c[p] ^= bch->a_pow_tab[mod_s(bch,
0803                                      m+la)];
0804             }
0805         }
0806     }
0807     a->deg = d-1;
0808     while (!c[a->deg] && a->deg)
0809         a->deg--;
0810 }
0811
0812 /*
0813  * compute polynomial Euclidean division quotient in GF(2^m)[X]
0814  */
0815 static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
0816             const struct gf_poly *b, struct gf_poly *q)
0817 {
0818     if (a->deg >= b->deg) {
0819         q->deg = a->deg-b->deg;
0820         /* compute a mod b (modifies a) */
0821         gf_poly_mod(bch, a, b, NULL);
0822         /* quotient is stored in upper part of polynomial a */
0823         memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
0824     } else {
0825         q->deg = 0;
0826         q->c[0] = 0;
0827     }
0828 }
0829
0830 /*
0831  * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
0832  */
0833 static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
0834                    struct gf_poly *b)
0835 {
0836     struct gf_poly *tmp;
0837
0838     dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
0839
0840     if (a->deg < b->deg) {
0841         tmp = b;
0842         b = a;
0843         a = tmp;
0844     }
0845
0846     while (b->deg > 0) {
0847         gf_poly_mod(bch, a, b, NULL);
0848         tmp = b;
0849         b = a;
0850         a = tmp;
0851     }
0852
0853     dbg("%s\n", gf_poly_str(a));
0854
0855     return a;
0856 }
0857
0858 /*
0859  * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
0860  * This is used in Berlekamp Trace algorithm for splitting polynomials
0861  */
0862 static void compute_trace_bk_mod(struct bch_control *bch, int k,
0863                  const struct gf_poly *f, struct gf_poly *z,
0864                  struct gf_poly *out)
0865 {
0866     const int m = GF_M(bch);
0867     int i, j;
0868
0869     /* z contains z^2j mod f */
0870     z->deg = 1;
0871     z->c[0] = 0;
0872     z->c[1] = bch->a_pow_tab[k];
0873
0874     out->deg = 0;
0875     memset(out, 0, GF_POLY_SZ(f->deg));
0876
0877     /* compute f log representation only once */
0878     gf_poly_logrep(bch, f, bch->cache);
0879
0880     for (i = 0; i < m; i++) {
0881         /* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
0882         for (j = z->deg; j >= 0; j--) {
0883             out->c[j] ^= z->c[j];
0884             z->c[2*j] = gf_sqr(bch, z->c[j]);
0885             z->c[2*j+1] = 0;
0886         }
0887         if (z->deg > out->deg)
0888             out->deg = z->deg;
0889
0890         if (i < m-1) {
0891             z->deg *= 2;
0892             /* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
0893             gf_poly_mod(bch, z, f, bch->cache);
0894         }
0895     }
0896     while (!out->c[out->deg] && out->deg)
0897         out->deg--;
0898
0899     dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
0900 }
0901
0902 /*
0903  * factor a polynomial using Berlekamp Trace algorithm (BTA)
0904  */
0905 static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
0906                   struct gf_poly **g, struct gf_poly **h)
0907 {
0908     struct gf_poly *f2 = bch->poly_2t[0];
0909     struct gf_poly *q  = bch->poly_2t[1];
0910     struct gf_poly *tk = bch->poly_2t[2];
0911     struct gf_poly *z  = bch->poly_2t[3];
0912     struct gf_poly *gcd;
0913
0914     dbg("factoring %s...\n", gf_poly_str(f));
0915
0916     *g = f;
0917     *h = NULL;
0918
0919     /* tk = Tr(a^k.X) mod f */
0920     compute_trace_bk_mod(bch, k, f, z, tk);
0921
0922     if (tk->deg > 0) {
0923         /* compute g = gcd(f, tk) (destructive operation) */
0924         gf_poly_copy(f2, f);
0925         gcd = gf_poly_gcd(bch, f2, tk);
0926         if (gcd->deg < f->deg) {
0927             /* compute h=f/gcd(f,tk); this will modify f and q */
0928             gf_poly_div(bch, f, gcd, q);
0929             /* store g and h in-place (clobbering f) */
0930             *h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
0931             gf_poly_copy(*g, gcd);
0932             gf_poly_copy(*h, q);
0933         }
0934     }
0935 }
0936
0937 /*
0938  * find roots of a polynomial, using BTZ algorithm; see the beginning of this
0939  * file for details
0940  */
0941 static int find_poly_roots(struct bch_control *bch, unsigned int k,
0942                struct gf_poly *poly, unsigned int *roots)
0943 {
0944     int cnt;
0945     struct gf_poly *f1, *f2;
0946
0947     switch (poly->deg) {
0948         /* handle low degree polynomials with ad hoc techniques */
0949     case 1:
0950         cnt = find_poly_deg1_roots(bch, poly, roots);
0951         break;
0952     case 2:
0953         cnt = find_poly_deg2_roots(bch, poly, roots);
0954         break;
0955     case 3:
0956         cnt = find_poly_deg3_roots(bch, poly, roots);
0957         break;
0958     case 4:
0959         cnt = find_poly_deg4_roots(bch, poly, roots);
0960         break;
0961     default:
0962         /* factor polynomial using Berlekamp Trace Algorithm (BTA) */
0963         cnt = 0;
0964         if (poly->deg && (k <= GF_M(bch))) {
0965             factor_polynomial(bch, k, poly, &f1, &f2);
0966             if (f1)
0967                 cnt += find_poly_roots(bch, k+1, f1, roots);
0968             if (f2)
0969                 cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
0970         }
0971         break;
0972     }
0973     return cnt;
0974 }
0975
0976 #if defined(USE_CHIEN_SEARCH)
0977 /*
0978  * exhaustive root search (Chien) implementation - not used, included only for
0979  * reference/comparison tests
0980  */
0981 static int chien_search(struct bch_control *bch, unsigned int len,
0982             struct gf_poly *p, unsigned int *roots)
0983 {
0984     int m;
0985     unsigned int i, j, syn, syn0, count = 0;
0986     const unsigned int k = 8*len+bch->ecc_bits;
0987
0988     /* use a log-based representation of polynomial */
0989     gf_poly_logrep(bch, p, bch->cache);
0990     bch->cache[p->deg] = 0;
0991     syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
0992
0993     for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
0994         /* compute elp(a^i) */
0995         for (j = 1, syn = syn0; j <= p->deg; j++) {
0996             m = bch->cache[j];
0997             if (m >= 0)
0998                 syn ^= a_pow(bch, m+j*i);
0999         }
1000         if (syn == 0) {
1001             roots[count++] = GF_N(bch)-i;
1002             if (count == p->deg)
1003                 break;
1004         }
1005     }
1006     return (count == p->deg) ? count : 0;
1007 }
1008 #define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
1009 #endif /* USE_CHIEN_SEARCH */
1010
1011 /**
1012  * bch_decode - decode received codeword and find bit error locations
1013  * @bch:      BCH control structure
1014  * @data:     received data, ignored if @calc_ecc is provided
1015  * @len:      data length in bytes, must always be provided
1016  * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
1017  * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
1018  * @syn:      hw computed syndrome data (if NULL, syndrome is calculated)
1019  * @errloc:   output array of error locations
1020  *
1021  * Returns:
1022  *  The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
1023  *  invalid parameters were provided
1024  *
1025  * Depending on the available hw BCH support and the need to compute @calc_ecc
1026  * separately (using bch_encode()), this function should be called with one of
1027  * the following parameter configurations -
1028  *
1029  * by providing @data and @recv_ecc only:
1030  *   bch_decode(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
1031  *
1032  * by providing @recv_ecc and @calc_ecc:
1033  *   bch_decode(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
1034  *
1035  * by providing ecc = recv_ecc XOR calc_ecc:
1036  *   bch_decode(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
1037  *
1038  * by providing syndrome results @syn:
1039  *   bch_decode(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
1040  *
1041  * Once bch_decode() has successfully returned with a positive value, error
1042  * locations returned in array @errloc should be interpreted as follows -
1043  *
1044  * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
1045  * data correction)
1046  *
1047  * if (errloc[n] < 8*len), then n-th error is located in data and can be
1048  * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
1049  *
1050  * Note that this function does not perform any data correction by itself, it
1051  * merely indicates error locations.
1052  */
1053 int bch_decode(struct bch_control *bch, const uint8_t *data, unsigned int len,
1054            const uint8_t *recv_ecc, const uint8_t *calc_ecc,
1055            const unsigned int *syn, unsigned int *errloc)
1056 {
1057     const unsigned int ecc_words = BCH_ECC_WORDS(bch);
1058     unsigned int nbits;
1059     int i, err, nroots;
1060     uint32_t sum;
1061
1062     /* sanity check: make sure data length can be handled */
1063     if (8*len > (bch->n-bch->ecc_bits))
1064         return -EINVAL;
1065
1066     /* if caller does not provide syndromes, compute them */
1067     if (!syn) {
1068         if (!calc_ecc) {
1069             /* compute received data ecc into an internal buffer */
1070             if (!data || !recv_ecc)
1071                 return -EINVAL;
1072             bch_encode(bch, data, len, NULL);
1073         } else {
1074             /* load provided calculated ecc */
1075             load_ecc8(bch, bch->ecc_buf, calc_ecc);
1076         }
1077         /* load received ecc or assume it was XORed in calc_ecc */
1078         if (recv_ecc) {
1079             load_ecc8(bch, bch->ecc_buf2, recv_ecc);
1080             /* XOR received and calculated ecc */
1081             for (i = 0, sum = 0; i < (int)ecc_words; i++) {
1082                 bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1083                 sum |= bch->ecc_buf[i];
1084             }
1085             if (!sum)
1086                 /* no error found */
1087                 return 0;
1088         }
1089         compute_syndromes(bch, bch->ecc_buf, bch->syn);
1090         syn = bch->syn;
1091     }
1092
1093     err = compute_error_locator_polynomial(bch, syn);
1094     if (err > 0) {
1095         nroots = find_poly_roots(bch, 1, bch->elp, errloc);
1096         if (err != nroots)
1097             err = -1;
1098     }
1099     if (err > 0) {
1100         /* post-process raw error locations for easier correction */
1101         nbits = (len*8)+bch->ecc_bits;
1102         for (i = 0; i < err; i++) {
1103             if (errloc[i] >= nbits) {
1104                 err = -1;
1105                 break;
1106             }
1107             errloc[i] = nbits-1-errloc[i];
1108             if (!bch->swap_bits)
1109                 errloc[i] = (errloc[i] & ~7) |
1110                         (7-(errloc[i] & 7));
1111         }
1112     }
1113     return (err >= 0) ? err : -EBADMSG;
1114 }
1115 EXPORT_SYMBOL_GPL(bch_decode);
1116
1117 /*
1118  * generate Galois field lookup tables
1119  */
1120 static int build_gf_tables(struct bch_control *bch, unsigned int poly)
1121 {
1122     unsigned int i, x = 1;
1123     const unsigned int k = 1 << deg(poly);
1124
1125     /* primitive polynomial must be of degree m */
1126     if (k != (1u << GF_M(bch)))
1127         return -1;
1128
1129     for (i = 0; i < GF_N(bch); i++) {
1130         bch->a_pow_tab[i] = x;
1131         bch->a_log_tab[x] = i;
1132         if (i && (x == 1))
1133             /* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
1134             return -1;
1135         x <<= 1;
1136         if (x & k)
1137             x ^= poly;
1138     }
1139     bch->a_pow_tab[GF_N(bch)] = 1;
1140     bch->a_log_tab[0] = 0;
1141
1142     return 0;
1143 }
1144
1145 /*
1146  * compute generator polynomial remainder tables for fast encoding
1147  */
1148 static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
1149 {
1150     int i, j, b, d;
1151     uint32_t data, hi, lo, *tab;
1152     const int l = BCH_ECC_WORDS(bch);
1153     const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
1154     const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
1155
1156     memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
1157
1158     for (i = 0; i < 256; i++) {
1159         /* p(X)=i is a small polynomial of weight <= 8 */
1160         for (b = 0; b < 4; b++) {
1161             /* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
1162             tab = bch->mod8_tab + (b*256+i)*l;
1163             data = i << (8*b);
1164             while (data) {
1165                 d = deg(data);
1166                 /* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
1167                 data ^= g[0] >> (31-d);
1168                 for (j = 0; j < ecclen; j++) {
1169                     hi = (d < 31) ? g[j] << (d+1) : 0;
1170                     lo = (j+1 < plen) ?
1171                         g[j+1] >> (31-d) : 0;
1172                     tab[j] ^= hi|lo;
1173                 }
1174             }
1175         }
1176     }
1177 }
1178
1179 /*
1180  * build a base for factoring degree 2 polynomials
1181  */
1182 static int build_deg2_base(struct bch_control *bch)
1183 {
1184     const int m = GF_M(bch);
1185     int i, j, r;
1186     unsigned int sum, x, y, remaining, ak = 0, xi[BCH_MAX_M];
1187
1188     /* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
1189     for (i = 0; i < m; i++) {
1190         for (j = 0, sum = 0; j < m; j++)
1191             sum ^= a_pow(bch, i*(1 << j));
1192
1193         if (sum) {
1194             ak = bch->a_pow_tab[i];
1195             break;
1196         }
1197     }
1198     /* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
1199     remaining = m;
1200     memset(xi, 0, sizeof(xi));
1201
1202     for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
1203         y = gf_sqr(bch, x)^x;
1204         for (i = 0; i < 2; i++) {
1205             r = a_log(bch, y);
1206             if (y && (r < m) && !xi[r]) {
1207                 bch->xi_tab[r] = x;
1208                 xi[r] = 1;
1209                 remaining--;
1210                 dbg("x%d = %x\n", r, x);
1211                 break;
1212             }
1213             y ^= ak;
1214         }
1215     }
1216     /* should not happen but check anyway */
1217     return remaining ? -1 : 0;
1218 }
1219
1220 static void *bch_alloc(size_t size, int *err)
1221 {
1222     void *ptr;
1223
1224     ptr = kmalloc(size, GFP_KERNEL);
1225     if (ptr == NULL)
1226         *err = 1;
1227     return ptr;
1228 }
1229
1230 /*
1231  * compute generator polynomial for given (m,t) parameters.
1232  */
1233 static uint32_t *compute_generator_polynomial(struct bch_control *bch)
1234 {
1235     const unsigned int m = GF_M(bch);
1236     const unsigned int t = GF_T(bch);
1237     int n, err = 0;
1238     unsigned int i, j, nbits, r, word, *roots;
1239     struct gf_poly *g;
1240     uint32_t *genpoly;
1241
1242     g = bch_alloc(GF_POLY_SZ(m*t), &err);
1243     roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
1244     genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
1245
1246     if (err) {
1247         kfree(genpoly);
1248         genpoly = NULL;
1249         goto finish;
1250     }
1251
1252     /* enumerate all roots of g(X) */
1253     memset(roots , 0, (bch->n+1)*sizeof(*roots));
1254     for (i = 0; i < t; i++) {
1255         for (j = 0, r = 2*i+1; j < m; j++) {
1256             roots[r] = 1;
1257             r = mod_s(bch, 2*r);
1258         }
1259     }
1260     /* build generator polynomial g(X) */
1261     g->deg = 0;
1262     g->c[0] = 1;
1263     for (i = 0; i < GF_N(bch); i++) {
1264         if (roots[i]) {
1265             /* multiply g(X) by (X+root) */
1266             r = bch->a_pow_tab[i];
1267             g->c[g->deg+1] = 1;
1268             for (j = g->deg; j > 0; j--)
1269                 g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
1270
1271             g->c[0] = gf_mul(bch, g->c[0], r);
1272             g->deg++;
1273         }
1274     }
1275     /* store left-justified binary representation of g(X) */
1276     n = g->deg+1;
1277     i = 0;
1278
1279     while (n > 0) {
1280         nbits = (n > 32) ? 32 : n;
1281         for (j = 0, word = 0; j < nbits; j++) {
1282             if (g->c[n-1-j])
1283                 word |= 1u << (31-j);
1284         }
1285         genpoly[i++] = word;
1286         n -= nbits;
1287     }
1288     bch->ecc_bits = g->deg;
1289
1290 finish:
1291     kfree(g);
1292     kfree(roots);
1293
1294     return genpoly;
1295 }
1296
1297 /**
1298  * bch_init - initialize a BCH encoder/decoder
1299  * @m:          Galois field order, should be in the range 5-15
1300  * @t:          maximum error correction capability, in bits
1301  * @prim_poly:  user-provided primitive polynomial (or 0 to use default)
1302  * @swap_bits:  swap bits within data and syndrome bytes
1303  *
1304  * Returns:
1305  *  a newly allocated BCH control structure if successful, NULL otherwise
1306  *
1307  * This initialization can take some time, as lookup tables are built for fast
1308  * encoding/decoding; make sure not to call this function from a time critical
1309  * path. Usually, bch_init() should be called on module/driver init and
1310  * bch_free() should be called to release memory on exit.
1311  *
1312  * You may provide your own primitive polynomial of degree @m in argument
1313  * @prim_poly, or let bch_init() use its default polynomial.
1314  *
1315  * Once bch_init() has successfully returned a pointer to a newly allocated
1316  * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1317  * the structure.
1318  */
1319 struct bch_control *bch_init(int m, int t, unsigned int prim_poly,
1320                  bool swap_bits)
1321 {
1322     int err = 0;
1323     unsigned int i, words;
1324     uint32_t *genpoly;
1325     struct bch_control *bch = NULL;
1326
1327     const int min_m = 5;
1328
1329     /* default primitive polynomials */
1330     static const unsigned int prim_poly_tab[] = {
1331         0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
1332         0x402b, 0x8003,
1333     };
1334
1335 #if defined(CONFIG_BCH_CONST_PARAMS)
1336     if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
1337         printk(KERN_ERR "bch encoder/decoder was configured to support "
1338                "parameters m=%d, t=%d only!\n",
1339                CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1340         goto fail;
1341     }
1342 #endif
1343     if ((m < min_m) || (m > BCH_MAX_M))
1344         /*
1345          * values of m greater than 15 are not currently supported;
1346          * supporting m > 15 would require changing table base type
1347          * (uint16_t) and a small patch in matrix transposition
1348          */
1349         goto fail;
1350
1351     if (t > BCH_MAX_T)
1352         /*
1353          * we can support larger than 64 bits if necessary, at the
1354          * cost of higher stack usage.
1355          */
1356         goto fail;
1357
1358     /* sanity checks */
1359     if ((t < 1) || (m*t >= ((1 << m)-1)))
1360         /* invalid t value */
1361         goto fail;
1362
1363     /* select a primitive polynomial for generating GF(2^m) */
1364     if (prim_poly == 0)
1365         prim_poly = prim_poly_tab[m-min_m];
1366
1367     bch = kzalloc(sizeof(*bch), GFP_KERNEL);
1368     if (bch == NULL)
1369         goto fail;
1370
1371     bch->m = m;
1372     bch->t = t;
1373     bch->n = (1 << m)-1;
1374     words  = DIV_ROUND_UP(m*t, 32);
1375     bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
1376     bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
1377     bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
1378     bch->mod8_tab  = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
1379     bch->ecc_buf   = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
1380     bch->ecc_buf2  = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
1381     bch->xi_tab    = bch_alloc(m*sizeof(*bch->xi_tab), &err);
1382     bch->syn       = bch_alloc(2*t*sizeof(*bch->syn), &err);
1383     bch->cache     = bch_alloc(2*t*sizeof(*bch->cache), &err);
1384     bch->elp       = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
1385     bch->swap_bits = swap_bits;
1386
1387     for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1388         bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
1389
1390     if (err)
1391         goto fail;
1392
1393     err = build_gf_tables(bch, prim_poly);
1394     if (err)
1395         goto fail;
1396
1397     /* use generator polynomial for computing encoding tables */
1398     genpoly = compute_generator_polynomial(bch);
1399     if (genpoly == NULL)
1400         goto fail;
1401
1402     build_mod8_tables(bch, genpoly);
1403     kfree(genpoly);
1404
1405     err = build_deg2_base(bch);
1406     if (err)
1407         goto fail;
1408
1409     return bch;
1410
1411 fail:
1412     bch_free(bch);
1413     return NULL;
1414 }
1415 EXPORT_SYMBOL_GPL(bch_init);
1416
1417 /**
1418  *  bch_free - free the BCH control structure
1419  *  @bch:    BCH control structure to release
1420  */
1421 void bch_free(struct bch_control *bch)
1422 {
1423     unsigned int i;
1424
1425     if (bch) {
1426         kfree(bch->a_pow_tab);
1427         kfree(bch->a_log_tab);
1428         kfree(bch->mod8_tab);
1429         kfree(bch->ecc_buf);
1430         kfree(bch->ecc_buf2);
1431         kfree(bch->xi_tab);
1432         kfree(bch->syn);
1433         kfree(bch->cache);
1434         kfree(bch->elp);
1435
1436         for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1437             kfree(bch->poly_2t[i]);
1438
1439         kfree(bch);
1440     }
1441 }
1442 EXPORT_SYMBOL_GPL(bch_free);
1443
1444 MODULE_LICENSE("GPL");
1445 MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1446 MODULE_DESCRIPTION("Binary BCH encoder/decoder");