Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * lib/ts_bm.c      Boyer-Moore text search implementation
0004  *
0005  * Authors: Pablo Neira Ayuso <pablo@eurodev.net>
0006  *
0007  * ==========================================================================
0008  * 
0009  *   Implements Boyer-Moore string matching algorithm:
0010  *
0011  *   [1] A Fast String Searching Algorithm, R.S. Boyer and Moore.
0012  *       Communications of the Association for Computing Machinery, 
0013  *       20(10), 1977, pp. 762-772.
0014  *       https://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf
0015  *
0016  *   [2] Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004
0017  *       http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf
0018  *
0019  *   Note: Since Boyer-Moore (BM) performs searches for matchings from right 
0020  *   to left, it's still possible that a matching could be spread over 
0021  *   multiple blocks, in that case this algorithm won't find any coincidence.
0022  *   
0023  *   If you're willing to ensure that such thing won't ever happen, use the
0024  *   Knuth-Pratt-Morris (KMP) implementation instead. In conclusion, choose 
0025  *   the proper string search algorithm depending on your setting. 
0026  *
0027  *   Say you're using the textsearch infrastructure for filtering, NIDS or 
0028  *   any similar security focused purpose, then go KMP. Otherwise, if you 
0029  *   really care about performance, say you're classifying packets to apply
0030  *   Quality of Service (QoS) policies, and you don't mind about possible
0031  *   matchings spread over multiple fragments, then go BM.
0032  */
0033 
0034 #include <linux/kernel.h>
0035 #include <linux/module.h>
0036 #include <linux/types.h>
0037 #include <linux/string.h>
0038 #include <linux/ctype.h>
0039 #include <linux/textsearch.h>
0040 
0041 /* Alphabet size, use ASCII */
0042 #define ASIZE 256
0043 
0044 #if 0
0045 #define DEBUGP printk
0046 #else
0047 #define DEBUGP(args, format...)
0048 #endif
0049 
0050 struct ts_bm
0051 {
0052     u8 *        pattern;
0053     unsigned int    patlen;
0054     unsigned int    bad_shift[ASIZE];
0055     unsigned int    good_shift[];
0056 };
0057 
0058 static unsigned int bm_find(struct ts_config *conf, struct ts_state *state)
0059 {
0060     struct ts_bm *bm = ts_config_priv(conf);
0061     unsigned int i, text_len, consumed = state->offset;
0062     const u8 *text;
0063     int shift = bm->patlen - 1, bs;
0064     const u8 icase = conf->flags & TS_IGNORECASE;
0065 
0066     for (;;) {
0067         text_len = conf->get_next_block(consumed, &text, conf, state);
0068 
0069         if (unlikely(text_len == 0))
0070             break;
0071 
0072         while (shift < text_len) {
0073             DEBUGP("Searching in position %d (%c)\n", 
0074                 shift, text[shift]);
0075             for (i = 0; i < bm->patlen; i++) 
0076                 if ((icase ? toupper(text[shift-i])
0077                     : text[shift-i])
0078                     != bm->pattern[bm->patlen-1-i])
0079                      goto next;
0080 
0081             /* London calling... */
0082             DEBUGP("found!\n");
0083             return consumed + (shift-(bm->patlen-1));
0084 
0085 next:           bs = bm->bad_shift[text[shift-i]];
0086 
0087             /* Now jumping to... */
0088             shift = max_t(int, shift-i+bs, shift+bm->good_shift[i]);
0089         }
0090         consumed += text_len;
0091     }
0092 
0093     return UINT_MAX;
0094 }
0095 
0096 static int subpattern(u8 *pattern, int i, int j, int g)
0097 {
0098     int x = i+g-1, y = j+g-1, ret = 0;
0099 
0100     while(pattern[x--] == pattern[y--]) {
0101         if (y < 0) {
0102             ret = 1;
0103             break;
0104         }
0105         if (--g == 0) {
0106             ret = pattern[i-1] != pattern[j-1];
0107             break;
0108         }
0109     }
0110 
0111     return ret;
0112 }
0113 
0114 static void compute_prefix_tbl(struct ts_bm *bm, int flags)
0115 {
0116     int i, j, g;
0117 
0118     for (i = 0; i < ASIZE; i++)
0119         bm->bad_shift[i] = bm->patlen;
0120     for (i = 0; i < bm->patlen - 1; i++) {
0121         bm->bad_shift[bm->pattern[i]] = bm->patlen - 1 - i;
0122         if (flags & TS_IGNORECASE)
0123             bm->bad_shift[tolower(bm->pattern[i])]
0124                 = bm->patlen - 1 - i;
0125     }
0126 
0127     /* Compute the good shift array, used to match reocurrences 
0128      * of a subpattern */
0129     bm->good_shift[0] = 1;
0130     for (i = 1; i < bm->patlen; i++)
0131         bm->good_shift[i] = bm->patlen;
0132         for (i = bm->patlen-1, g = 1; i > 0; g++, i--) {
0133         for (j = i-1; j >= 1-g ; j--)
0134             if (subpattern(bm->pattern, i, j, g)) {
0135                 bm->good_shift[g] = bm->patlen-j-g;
0136                 break;
0137             }
0138     }
0139 }
0140 
0141 static struct ts_config *bm_init(const void *pattern, unsigned int len,
0142                  gfp_t gfp_mask, int flags)
0143 {
0144     struct ts_config *conf;
0145     struct ts_bm *bm;
0146     int i;
0147     unsigned int prefix_tbl_len = len * sizeof(unsigned int);
0148     size_t priv_size = sizeof(*bm) + len + prefix_tbl_len;
0149 
0150     conf = alloc_ts_config(priv_size, gfp_mask);
0151     if (IS_ERR(conf))
0152         return conf;
0153 
0154     conf->flags = flags;
0155     bm = ts_config_priv(conf);
0156     bm->patlen = len;
0157     bm->pattern = (u8 *) bm->good_shift + prefix_tbl_len;
0158     if (flags & TS_IGNORECASE)
0159         for (i = 0; i < len; i++)
0160             bm->pattern[i] = toupper(((u8 *)pattern)[i]);
0161     else
0162         memcpy(bm->pattern, pattern, len);
0163     compute_prefix_tbl(bm, flags);
0164 
0165     return conf;
0166 }
0167 
0168 static void *bm_get_pattern(struct ts_config *conf)
0169 {
0170     struct ts_bm *bm = ts_config_priv(conf);
0171     return bm->pattern;
0172 }
0173 
0174 static unsigned int bm_get_pattern_len(struct ts_config *conf)
0175 {
0176     struct ts_bm *bm = ts_config_priv(conf);
0177     return bm->patlen;
0178 }
0179 
0180 static struct ts_ops bm_ops = {
0181     .name         = "bm",
0182     .find         = bm_find,
0183     .init         = bm_init,
0184     .get_pattern      = bm_get_pattern,
0185     .get_pattern_len  = bm_get_pattern_len,
0186     .owner        = THIS_MODULE,
0187     .list         = LIST_HEAD_INIT(bm_ops.list)
0188 };
0189 
0190 static int __init init_bm(void)
0191 {
0192     return textsearch_register(&bm_ops);
0193 }
0194 
0195 static void __exit exit_bm(void)
0196 {
0197     textsearch_unregister(&bm_ops);
0198 }
0199 
0200 MODULE_LICENSE("GPL");
0201 
0202 module_init(init_bm);
0203 module_exit(exit_bm);