Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * 842 Software Compression
0004  *
0005  * Copyright (C) 2015 Dan Streetman, IBM Corp
0006  *
0007  * See 842.h for details of the 842 compressed format.
0008  */
0009 
0010 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0011 #define MODULE_NAME "842_compress"
0012 
0013 #include <linux/hashtable.h>
0014 
0015 #include "842.h"
0016 #include "842_debugfs.h"
0017 
0018 #define SW842_HASHTABLE8_BITS   (10)
0019 #define SW842_HASHTABLE4_BITS   (11)
0020 #define SW842_HASHTABLE2_BITS   (10)
0021 
0022 /* By default, we allow compressing input buffers of any length, but we must
0023  * use the non-standard "short data" template so the decompressor can correctly
0024  * reproduce the uncompressed data buffer at the right length.  However the
0025  * hardware 842 compressor will not recognize the "short data" template, and
0026  * will fail to decompress any compressed buffer containing it (I have no idea
0027  * why anyone would want to use software to compress and hardware to decompress
0028  * but that's beside the point).  This parameter forces the compression
0029  * function to simply reject any input buffer that isn't a multiple of 8 bytes
0030  * long, instead of using the "short data" template, so that all compressed
0031  * buffers produced by this function will be decompressable by the 842 hardware
0032  * decompressor.  Unless you have a specific need for that, leave this disabled
0033  * so that any length buffer can be compressed.
0034  */
0035 static bool sw842_strict;
0036 module_param_named(strict, sw842_strict, bool, 0644);
0037 
0038 static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
0039     { I8, N0, N0, N0, 0x19 }, /* 8 */
0040     { I4, I4, N0, N0, 0x18 }, /* 18 */
0041     { I4, I2, I2, N0, 0x17 }, /* 25 */
0042     { I2, I2, I4, N0, 0x13 }, /* 25 */
0043     { I2, I2, I2, I2, 0x12 }, /* 32 */
0044     { I4, I2, D2, N0, 0x16 }, /* 33 */
0045     { I4, D2, I2, N0, 0x15 }, /* 33 */
0046     { I2, D2, I4, N0, 0x0e }, /* 33 */
0047     { D2, I2, I4, N0, 0x09 }, /* 33 */
0048     { I2, I2, I2, D2, 0x11 }, /* 40 */
0049     { I2, I2, D2, I2, 0x10 }, /* 40 */
0050     { I2, D2, I2, I2, 0x0d }, /* 40 */
0051     { D2, I2, I2, I2, 0x08 }, /* 40 */
0052     { I4, D4, N0, N0, 0x14 }, /* 41 */
0053     { D4, I4, N0, N0, 0x04 }, /* 41 */
0054     { I2, I2, D4, N0, 0x0f }, /* 48 */
0055     { I2, D2, I2, D2, 0x0c }, /* 48 */
0056     { I2, D4, I2, N0, 0x0b }, /* 48 */
0057     { D2, I2, I2, D2, 0x07 }, /* 48 */
0058     { D2, I2, D2, I2, 0x06 }, /* 48 */
0059     { D4, I2, I2, N0, 0x03 }, /* 48 */
0060     { I2, D2, D4, N0, 0x0a }, /* 56 */
0061     { D2, I2, D4, N0, 0x05 }, /* 56 */
0062     { D4, I2, D2, N0, 0x02 }, /* 56 */
0063     { D4, D2, I2, N0, 0x01 }, /* 56 */
0064     { D8, N0, N0, N0, 0x00 }, /* 64 */
0065 };
0066 
0067 struct sw842_hlist_node8 {
0068     struct hlist_node node;
0069     u64 data;
0070     u8 index;
0071 };
0072 
0073 struct sw842_hlist_node4 {
0074     struct hlist_node node;
0075     u32 data;
0076     u16 index;
0077 };
0078 
0079 struct sw842_hlist_node2 {
0080     struct hlist_node node;
0081     u16 data;
0082     u8 index;
0083 };
0084 
0085 #define INDEX_NOT_FOUND     (-1)
0086 #define INDEX_NOT_CHECKED   (-2)
0087 
0088 struct sw842_param {
0089     u8 *in;
0090     u8 *instart;
0091     u64 ilen;
0092     u8 *out;
0093     u64 olen;
0094     u8 bit;
0095     u64 data8[1];
0096     u32 data4[2];
0097     u16 data2[4];
0098     int index8[1];
0099     int index4[2];
0100     int index2[4];
0101     DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
0102     DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
0103     DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
0104     struct sw842_hlist_node8 node8[1 << I8_BITS];
0105     struct sw842_hlist_node4 node4[1 << I4_BITS];
0106     struct sw842_hlist_node2 node2[1 << I2_BITS];
0107 };
0108 
0109 #define get_input_data(p, o, b)                     \
0110     be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
0111 
0112 #define init_hashtable_nodes(p, b)  do {            \
0113     int _i;                         \
0114     hash_init((p)->htable##b);              \
0115     for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) { \
0116         (p)->node##b[_i].index = _i;            \
0117         (p)->node##b[_i].data = 0;          \
0118         INIT_HLIST_NODE(&(p)->node##b[_i].node);    \
0119     }                           \
0120 } while (0)
0121 
0122 #define find_index(p, b, n) ({                  \
0123     struct sw842_hlist_node##b *_n;                 \
0124     p->index##b[n] = INDEX_NOT_FOUND;               \
0125     hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) { \
0126         if (p->data##b[n] == _n->data) {            \
0127             p->index##b[n] = _n->index;         \
0128             break;                      \
0129         }                           \
0130     }                               \
0131     p->index##b[n] >= 0;                        \
0132 })
0133 
0134 #define check_index(p, b, n)            \
0135     ((p)->index##b[n] == INDEX_NOT_CHECKED  \
0136      ? find_index(p, b, n)          \
0137      : (p)->index##b[n] >= 0)
0138 
0139 #define replace_hash(p, b, i, d)    do {                \
0140     struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)];    \
0141     hash_del(&_n->node);                        \
0142     _n->data = (p)->data##b[d];                 \
0143     pr_debug("add hash index%x %x pos %x data %lx\n", b,        \
0144          (unsigned int)_n->index,               \
0145          (unsigned int)((p)->in - (p)->instart),        \
0146          (unsigned long)_n->data);              \
0147     hash_add((p)->htable##b, &_n->node, _n->data);          \
0148 } while (0)
0149 
0150 static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
0151 
0152 static int add_bits(struct sw842_param *p, u64 d, u8 n);
0153 
0154 static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
0155 {
0156     int ret;
0157 
0158     if (n <= s)
0159         return -EINVAL;
0160 
0161     ret = add_bits(p, d >> s, n - s);
0162     if (ret)
0163         return ret;
0164     return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
0165 }
0166 
0167 static int add_bits(struct sw842_param *p, u64 d, u8 n)
0168 {
0169     int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
0170     u64 o;
0171     u8 *out = p->out;
0172 
0173     pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
0174 
0175     if (n > 64)
0176         return -EINVAL;
0177 
0178     /* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
0179      * or if we're at the end of the output buffer and would write past end
0180      */
0181     if (bits > 64)
0182         return __split_add_bits(p, d, n, 32);
0183     else if (p->olen < 8 && bits > 32 && bits <= 56)
0184         return __split_add_bits(p, d, n, 16);
0185     else if (p->olen < 4 && bits > 16 && bits <= 24)
0186         return __split_add_bits(p, d, n, 8);
0187 
0188     if (DIV_ROUND_UP(bits, 8) > p->olen)
0189         return -ENOSPC;
0190 
0191     o = *out & bmask[b];
0192     d <<= s;
0193 
0194     if (bits <= 8)
0195         *out = o | d;
0196     else if (bits <= 16)
0197         put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
0198     else if (bits <= 24)
0199         put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
0200     else if (bits <= 32)
0201         put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
0202     else if (bits <= 40)
0203         put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
0204     else if (bits <= 48)
0205         put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
0206     else if (bits <= 56)
0207         put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
0208     else
0209         put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
0210 
0211     p->bit += n;
0212 
0213     if (p->bit > 7) {
0214         p->out += p->bit / 8;
0215         p->olen -= p->bit / 8;
0216         p->bit %= 8;
0217     }
0218 
0219     return 0;
0220 }
0221 
0222 static int add_template(struct sw842_param *p, u8 c)
0223 {
0224     int ret, i, b = 0;
0225     u8 *t = comp_ops[c];
0226     bool inv = false;
0227 
0228     if (c >= OPS_MAX)
0229         return -EINVAL;
0230 
0231     pr_debug("template %x\n", t[4]);
0232 
0233     ret = add_bits(p, t[4], OP_BITS);
0234     if (ret)
0235         return ret;
0236 
0237     for (i = 0; i < 4; i++) {
0238         pr_debug("op %x\n", t[i]);
0239 
0240         switch (t[i] & OP_AMOUNT) {
0241         case OP_AMOUNT_8:
0242             if (b)
0243                 inv = true;
0244             else if (t[i] & OP_ACTION_INDEX)
0245                 ret = add_bits(p, p->index8[0], I8_BITS);
0246             else if (t[i] & OP_ACTION_DATA)
0247                 ret = add_bits(p, p->data8[0], 64);
0248             else
0249                 inv = true;
0250             break;
0251         case OP_AMOUNT_4:
0252             if (b == 2 && t[i] & OP_ACTION_DATA)
0253                 ret = add_bits(p, get_input_data(p, 2, 32), 32);
0254             else if (b != 0 && b != 4)
0255                 inv = true;
0256             else if (t[i] & OP_ACTION_INDEX)
0257                 ret = add_bits(p, p->index4[b >> 2], I4_BITS);
0258             else if (t[i] & OP_ACTION_DATA)
0259                 ret = add_bits(p, p->data4[b >> 2], 32);
0260             else
0261                 inv = true;
0262             break;
0263         case OP_AMOUNT_2:
0264             if (b != 0 && b != 2 && b != 4 && b != 6)
0265                 inv = true;
0266             if (t[i] & OP_ACTION_INDEX)
0267                 ret = add_bits(p, p->index2[b >> 1], I2_BITS);
0268             else if (t[i] & OP_ACTION_DATA)
0269                 ret = add_bits(p, p->data2[b >> 1], 16);
0270             else
0271                 inv = true;
0272             break;
0273         case OP_AMOUNT_0:
0274             inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
0275             break;
0276         default:
0277             inv = true;
0278             break;
0279         }
0280 
0281         if (ret)
0282             return ret;
0283 
0284         if (inv) {
0285             pr_err("Invalid templ %x op %d : %x %x %x %x\n",
0286                    c, i, t[0], t[1], t[2], t[3]);
0287             return -EINVAL;
0288         }
0289 
0290         b += t[i] & OP_AMOUNT;
0291     }
0292 
0293     if (b != 8) {
0294         pr_err("Invalid template %x len %x : %x %x %x %x\n",
0295                c, b, t[0], t[1], t[2], t[3]);
0296         return -EINVAL;
0297     }
0298 
0299     if (sw842_template_counts)
0300         atomic_inc(&template_count[t[4]]);
0301 
0302     return 0;
0303 }
0304 
0305 static int add_repeat_template(struct sw842_param *p, u8 r)
0306 {
0307     int ret;
0308 
0309     /* repeat param is 0-based */
0310     if (!r || --r > REPEAT_BITS_MAX)
0311         return -EINVAL;
0312 
0313     ret = add_bits(p, OP_REPEAT, OP_BITS);
0314     if (ret)
0315         return ret;
0316 
0317     ret = add_bits(p, r, REPEAT_BITS);
0318     if (ret)
0319         return ret;
0320 
0321     if (sw842_template_counts)
0322         atomic_inc(&template_repeat_count);
0323 
0324     return 0;
0325 }
0326 
0327 static int add_short_data_template(struct sw842_param *p, u8 b)
0328 {
0329     int ret, i;
0330 
0331     if (!b || b > SHORT_DATA_BITS_MAX)
0332         return -EINVAL;
0333 
0334     ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
0335     if (ret)
0336         return ret;
0337 
0338     ret = add_bits(p, b, SHORT_DATA_BITS);
0339     if (ret)
0340         return ret;
0341 
0342     for (i = 0; i < b; i++) {
0343         ret = add_bits(p, p->in[i], 8);
0344         if (ret)
0345             return ret;
0346     }
0347 
0348     if (sw842_template_counts)
0349         atomic_inc(&template_short_data_count);
0350 
0351     return 0;
0352 }
0353 
0354 static int add_zeros_template(struct sw842_param *p)
0355 {
0356     int ret = add_bits(p, OP_ZEROS, OP_BITS);
0357 
0358     if (ret)
0359         return ret;
0360 
0361     if (sw842_template_counts)
0362         atomic_inc(&template_zeros_count);
0363 
0364     return 0;
0365 }
0366 
0367 static int add_end_template(struct sw842_param *p)
0368 {
0369     int ret = add_bits(p, OP_END, OP_BITS);
0370 
0371     if (ret)
0372         return ret;
0373 
0374     if (sw842_template_counts)
0375         atomic_inc(&template_end_count);
0376 
0377     return 0;
0378 }
0379 
0380 static bool check_template(struct sw842_param *p, u8 c)
0381 {
0382     u8 *t = comp_ops[c];
0383     int i, match, b = 0;
0384 
0385     if (c >= OPS_MAX)
0386         return false;
0387 
0388     for (i = 0; i < 4; i++) {
0389         if (t[i] & OP_ACTION_INDEX) {
0390             if (t[i] & OP_AMOUNT_2)
0391                 match = check_index(p, 2, b >> 1);
0392             else if (t[i] & OP_AMOUNT_4)
0393                 match = check_index(p, 4, b >> 2);
0394             else if (t[i] & OP_AMOUNT_8)
0395                 match = check_index(p, 8, 0);
0396             else
0397                 return false;
0398             if (!match)
0399                 return false;
0400         }
0401 
0402         b += t[i] & OP_AMOUNT;
0403     }
0404 
0405     return true;
0406 }
0407 
0408 static void get_next_data(struct sw842_param *p)
0409 {
0410     p->data8[0] = get_input_data(p, 0, 64);
0411     p->data4[0] = get_input_data(p, 0, 32);
0412     p->data4[1] = get_input_data(p, 4, 32);
0413     p->data2[0] = get_input_data(p, 0, 16);
0414     p->data2[1] = get_input_data(p, 2, 16);
0415     p->data2[2] = get_input_data(p, 4, 16);
0416     p->data2[3] = get_input_data(p, 6, 16);
0417 }
0418 
0419 /* update the hashtable entries.
0420  * only call this after finding/adding the current template
0421  * the dataN fields for the current 8 byte block must be already updated
0422  */
0423 static void update_hashtables(struct sw842_param *p)
0424 {
0425     u64 pos = p->in - p->instart;
0426     u64 n8 = (pos >> 3) % (1 << I8_BITS);
0427     u64 n4 = (pos >> 2) % (1 << I4_BITS);
0428     u64 n2 = (pos >> 1) % (1 << I2_BITS);
0429 
0430     replace_hash(p, 8, n8, 0);
0431     replace_hash(p, 4, n4, 0);
0432     replace_hash(p, 4, n4, 1);
0433     replace_hash(p, 2, n2, 0);
0434     replace_hash(p, 2, n2, 1);
0435     replace_hash(p, 2, n2, 2);
0436     replace_hash(p, 2, n2, 3);
0437 }
0438 
0439 /* find the next template to use, and add it
0440  * the p->dataN fields must already be set for the current 8 byte block
0441  */
0442 static int process_next(struct sw842_param *p)
0443 {
0444     int ret, i;
0445 
0446     p->index8[0] = INDEX_NOT_CHECKED;
0447     p->index4[0] = INDEX_NOT_CHECKED;
0448     p->index4[1] = INDEX_NOT_CHECKED;
0449     p->index2[0] = INDEX_NOT_CHECKED;
0450     p->index2[1] = INDEX_NOT_CHECKED;
0451     p->index2[2] = INDEX_NOT_CHECKED;
0452     p->index2[3] = INDEX_NOT_CHECKED;
0453 
0454     /* check up to OPS_MAX - 1; last op is our fallback */
0455     for (i = 0; i < OPS_MAX - 1; i++) {
0456         if (check_template(p, i))
0457             break;
0458     }
0459 
0460     ret = add_template(p, i);
0461     if (ret)
0462         return ret;
0463 
0464     return 0;
0465 }
0466 
0467 /**
0468  * sw842_compress
0469  *
0470  * Compress the uncompressed buffer of length @ilen at @in to the output buffer
0471  * @out, using no more than @olen bytes, using the 842 compression format.
0472  *
0473  * Returns: 0 on success, error on failure.  The @olen parameter
0474  * will contain the number of output bytes written on success, or
0475  * 0 on error.
0476  */
0477 int sw842_compress(const u8 *in, unsigned int ilen,
0478            u8 *out, unsigned int *olen, void *wmem)
0479 {
0480     struct sw842_param *p = (struct sw842_param *)wmem;
0481     int ret;
0482     u64 last, next, pad, total;
0483     u8 repeat_count = 0;
0484     u32 crc;
0485 
0486     BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
0487 
0488     init_hashtable_nodes(p, 8);
0489     init_hashtable_nodes(p, 4);
0490     init_hashtable_nodes(p, 2);
0491 
0492     p->in = (u8 *)in;
0493     p->instart = p->in;
0494     p->ilen = ilen;
0495     p->out = out;
0496     p->olen = *olen;
0497     p->bit = 0;
0498 
0499     total = p->olen;
0500 
0501     *olen = 0;
0502 
0503     /* if using strict mode, we can only compress a multiple of 8 */
0504     if (sw842_strict && (ilen % 8)) {
0505         pr_err("Using strict mode, can't compress len %d\n", ilen);
0506         return -EINVAL;
0507     }
0508 
0509     /* let's compress at least 8 bytes, mkay? */
0510     if (unlikely(ilen < 8))
0511         goto skip_comp;
0512 
0513     /* make initial 'last' different so we don't match the first time */
0514     last = ~get_unaligned((u64 *)p->in);
0515 
0516     while (p->ilen > 7) {
0517         next = get_unaligned((u64 *)p->in);
0518 
0519         /* must get the next data, as we need to update the hashtable
0520          * entries with the new data every time
0521          */
0522         get_next_data(p);
0523 
0524         /* we don't care about endianness in last or next;
0525          * we're just comparing 8 bytes to another 8 bytes,
0526          * they're both the same endianness
0527          */
0528         if (next == last) {
0529             /* repeat count bits are 0-based, so we stop at +1 */
0530             if (++repeat_count <= REPEAT_BITS_MAX)
0531                 goto repeat;
0532         }
0533         if (repeat_count) {
0534             ret = add_repeat_template(p, repeat_count);
0535             repeat_count = 0;
0536             if (next == last) /* reached max repeat bits */
0537                 goto repeat;
0538         }
0539 
0540         if (next == 0)
0541             ret = add_zeros_template(p);
0542         else
0543             ret = process_next(p);
0544 
0545         if (ret)
0546             return ret;
0547 
0548 repeat:
0549         last = next;
0550         update_hashtables(p);
0551         p->in += 8;
0552         p->ilen -= 8;
0553     }
0554 
0555     if (repeat_count) {
0556         ret = add_repeat_template(p, repeat_count);
0557         if (ret)
0558             return ret;
0559     }
0560 
0561 skip_comp:
0562     if (p->ilen > 0) {
0563         ret = add_short_data_template(p, p->ilen);
0564         if (ret)
0565             return ret;
0566 
0567         p->in += p->ilen;
0568         p->ilen = 0;
0569     }
0570 
0571     ret = add_end_template(p);
0572     if (ret)
0573         return ret;
0574 
0575     /*
0576      * crc(0:31) is appended to target data starting with the next
0577      * bit after End of stream template.
0578      * nx842 calculates CRC for data in big-endian format. So doing
0579      * same here so that sw842 decompression can be used for both
0580      * compressed data.
0581      */
0582     crc = crc32_be(0, in, ilen);
0583     ret = add_bits(p, crc, CRC_BITS);
0584     if (ret)
0585         return ret;
0586 
0587     if (p->bit) {
0588         p->out++;
0589         p->olen--;
0590         p->bit = 0;
0591     }
0592 
0593     /* pad compressed length to multiple of 8 */
0594     pad = (8 - ((total - p->olen) % 8)) % 8;
0595     if (pad) {
0596         if (pad > p->olen) /* we were so close! */
0597             return -ENOSPC;
0598         memset(p->out, 0, pad);
0599         p->out += pad;
0600         p->olen -= pad;
0601     }
0602 
0603     if (unlikely((total - p->olen) > UINT_MAX))
0604         return -ENOSPC;
0605 
0606     *olen = total - p->olen;
0607 
0608     return 0;
0609 }
0610 EXPORT_SYMBOL_GPL(sw842_compress);
0611 
0612 static int __init sw842_init(void)
0613 {
0614     if (sw842_template_counts)
0615         sw842_debugfs_create();
0616 
0617     return 0;
0618 }
0619 module_init(sw842_init);
0620 
0621 static void __exit sw842_exit(void)
0622 {
0623     if (sw842_template_counts)
0624         sw842_debugfs_remove();
0625 }
0626 module_exit(sw842_exit);
0627 
0628 MODULE_LICENSE("GPL");
0629 MODULE_DESCRIPTION("Software 842 Compressor");
0630 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");