Back to home page

LXR

 
 

    


0001 /*
0002  * lib/ts_kmp.c     Knuth-Morris-Pratt text search implementation
0003  *
0004  *      This program is free software; you can redistribute it and/or
0005  *      modify it under the terms of the GNU General Public License
0006  *      as published by the Free Software Foundation; either version
0007  *      2 of the License, or (at your option) any later version.
0008  *
0009  * Authors: Thomas Graf <tgraf@suug.ch>
0010  *
0011  * ==========================================================================
0012  * 
0013  *   Implements a linear-time string-matching algorithm due to Knuth,
0014  *   Morris, and Pratt [1]. Their algorithm avoids the explicit
0015  *   computation of the transition function DELTA altogether. Its
0016  *   matching time is O(n), for n being length(text), using just an
0017  *   auxiliary function PI[1..m], for m being length(pattern),
0018  *   precomputed from the pattern in time O(m). The array PI allows
0019  *   the transition function DELTA to be computed efficiently
0020  *   "on the fly" as needed. Roughly speaking, for any state
0021  *   "q" = 0,1,...,m and any character "a" in SIGMA, the value
0022  *   PI["q"] contains the information that is independent of "a" and
0023  *   is needed to compute DELTA("q", "a") [2]. Since the array PI
0024  *   has only m entries, whereas DELTA has O(m|SIGMA|) entries, we
0025  *   save a factor of |SIGMA| in the preprocessing time by computing
0026  *   PI rather than DELTA.
0027  *
0028  *   [1] Cormen, Leiserson, Rivest, Stein
0029  *       Introdcution to Algorithms, 2nd Edition, MIT Press
0030  *   [2] See finite automation theory
0031  */
0032 
0033 #include <linux/module.h>
0034 #include <linux/types.h>
0035 #include <linux/string.h>
0036 #include <linux/ctype.h>
0037 #include <linux/textsearch.h>
0038 
0039 struct ts_kmp
0040 {
0041     u8 *        pattern;
0042     unsigned int    pattern_len;
0043     unsigned int    prefix_tbl[0];
0044 };
0045 
0046 static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state)
0047 {
0048     struct ts_kmp *kmp = ts_config_priv(conf);
0049     unsigned int i, q = 0, text_len, consumed = state->offset;
0050     const u8 *text;
0051     const int icase = conf->flags & TS_IGNORECASE;
0052 
0053     for (;;) {
0054         text_len = conf->get_next_block(consumed, &text, conf, state);
0055 
0056         if (unlikely(text_len == 0))
0057             break;
0058 
0059         for (i = 0; i < text_len; i++) {
0060             while (q > 0 && kmp->pattern[q]
0061                 != (icase ? toupper(text[i]) : text[i]))
0062                 q = kmp->prefix_tbl[q - 1];
0063             if (kmp->pattern[q]
0064                 == (icase ? toupper(text[i]) : text[i]))
0065                 q++;
0066             if (unlikely(q == kmp->pattern_len)) {
0067                 state->offset = consumed + i + 1;
0068                 return state->offset - kmp->pattern_len;
0069             }
0070         }
0071 
0072         consumed += text_len;
0073     }
0074 
0075     return UINT_MAX;
0076 }
0077 
0078 static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len,
0079                       unsigned int *prefix_tbl, int flags)
0080 {
0081     unsigned int k, q;
0082     const u8 icase = flags & TS_IGNORECASE;
0083 
0084     for (k = 0, q = 1; q < len; q++) {
0085         while (k > 0 && (icase ? toupper(pattern[k]) : pattern[k])
0086             != (icase ? toupper(pattern[q]) : pattern[q]))
0087             k = prefix_tbl[k-1];
0088         if ((icase ? toupper(pattern[k]) : pattern[k])
0089             == (icase ? toupper(pattern[q]) : pattern[q]))
0090             k++;
0091         prefix_tbl[q] = k;
0092     }
0093 }
0094 
0095 static struct ts_config *kmp_init(const void *pattern, unsigned int len,
0096                   gfp_t gfp_mask, int flags)
0097 {
0098     struct ts_config *conf;
0099     struct ts_kmp *kmp;
0100     int i;
0101     unsigned int prefix_tbl_len = len * sizeof(unsigned int);
0102     size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len;
0103 
0104     conf = alloc_ts_config(priv_size, gfp_mask);
0105     if (IS_ERR(conf))
0106         return conf;
0107 
0108     conf->flags = flags;
0109     kmp = ts_config_priv(conf);
0110     kmp->pattern_len = len;
0111     compute_prefix_tbl(pattern, len, kmp->prefix_tbl, flags);
0112     kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len;
0113     if (flags & TS_IGNORECASE)
0114         for (i = 0; i < len; i++)
0115             kmp->pattern[i] = toupper(((u8 *)pattern)[i]);
0116     else
0117         memcpy(kmp->pattern, pattern, len);
0118 
0119     return conf;
0120 }
0121 
0122 static void *kmp_get_pattern(struct ts_config *conf)
0123 {
0124     struct ts_kmp *kmp = ts_config_priv(conf);
0125     return kmp->pattern;
0126 }
0127 
0128 static unsigned int kmp_get_pattern_len(struct ts_config *conf)
0129 {
0130     struct ts_kmp *kmp = ts_config_priv(conf);
0131     return kmp->pattern_len;
0132 }
0133 
0134 static struct ts_ops kmp_ops = {
0135     .name         = "kmp",
0136     .find         = kmp_find,
0137     .init         = kmp_init,
0138     .get_pattern      = kmp_get_pattern,
0139     .get_pattern_len  = kmp_get_pattern_len,
0140     .owner        = THIS_MODULE,
0141     .list         = LIST_HEAD_INIT(kmp_ops.list)
0142 };
0143 
0144 static int __init init_kmp(void)
0145 {
0146     return textsearch_register(&kmp_ops);
0147 }
0148 
0149 static void __exit exit_kmp(void)
0150 {
0151     textsearch_unregister(&kmp_ops);
0152 }
0153 
0154 MODULE_LICENSE("GPL");
0155 
0156 module_init(init_kmp);
0157 module_exit(exit_kmp);