0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025 #include <linux/module.h>
0026 #include <linux/types.h>
0027 #include <linux/string.h>
0028 #include <linux/ctype.h>
0029 #include <linux/textsearch.h>
0030 #include <linux/textsearch_fsm.h>
0031
0032 struct ts_fsm
0033 {
0034 unsigned int ntokens;
0035 struct ts_fsm_token tokens[];
0036 };
0037
0038
0039 #define _A 0x100
0040 #define _W 0x200
0041
0042
0043 static const u16 token_map[TS_FSM_TYPE_MAX+1] = {
0044 [TS_FSM_SPECIFIC] = 0,
0045 [TS_FSM_WILDCARD] = _W,
0046 [TS_FSM_CNTRL] = _C,
0047 [TS_FSM_LOWER] = _L,
0048 [TS_FSM_UPPER] = _U,
0049 [TS_FSM_PUNCT] = _P,
0050 [TS_FSM_SPACE] = _S,
0051 [TS_FSM_DIGIT] = _D,
0052 [TS_FSM_XDIGIT] = _D | _X,
0053 [TS_FSM_ALPHA] = _U | _L,
0054 [TS_FSM_ALNUM] = _U | _L | _D,
0055 [TS_FSM_PRINT] = _P | _U | _L | _D | _SP,
0056 [TS_FSM_GRAPH] = _P | _U | _L | _D,
0057 [TS_FSM_ASCII] = _A,
0058 };
0059
0060 static const u16 token_lookup_tbl[256] = {
0061 _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
0062 _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
0063 _W|_A|_C, _W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C|_S,
0064 _W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C, _W|_A|_C,
0065 _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
0066 _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
0067 _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
0068 _W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C,
0069 _W|_A|_S|_SP, _W|_A|_P, _W|_A|_P, _W|_A|_P,
0070 _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P,
0071 _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P,
0072 _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P,
0073 _W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D,
0074 _W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D,
0075 _W|_A|_D, _W|_A|_D, _W|_A|_P, _W|_A|_P,
0076 _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P,
0077 _W|_A|_P, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X,
0078 _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U,
0079 _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U,
0080 _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U,
0081 _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U,
0082 _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U,
0083 _W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_P,
0084 _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P,
0085 _W|_A|_P, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X,
0086 _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L,
0087 _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L,
0088 _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L,
0089 _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L,
0090 _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L,
0091 _W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_P,
0092 _W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_C,
0093 _W, _W, _W, _W,
0094 _W, _W, _W, _W,
0095 _W, _W, _W, _W,
0096 _W, _W, _W, _W,
0097 _W, _W, _W, _W,
0098 _W, _W, _W, _W,
0099 _W, _W, _W, _W,
0100 _W, _W, _W, _W,
0101 _W|_S|_SP, _W|_P, _W|_P, _W|_P,
0102 _W|_P, _W|_P, _W|_P, _W|_P,
0103 _W|_P, _W|_P, _W|_P, _W|_P,
0104 _W|_P, _W|_P, _W|_P, _W|_P,
0105 _W|_P, _W|_P, _W|_P, _W|_P,
0106 _W|_P, _W|_P, _W|_P, _W|_P,
0107 _W|_P, _W|_P, _W|_P, _W|_P,
0108 _W|_P, _W|_P, _W|_P, _W|_P,
0109 _W|_U, _W|_U, _W|_U, _W|_U,
0110 _W|_U, _W|_U, _W|_U, _W|_U,
0111 _W|_U, _W|_U, _W|_U, _W|_U,
0112 _W|_U, _W|_U, _W|_U, _W|_U,
0113 _W|_U, _W|_U, _W|_U, _W|_U,
0114 _W|_U, _W|_U, _W|_U, _W|_P,
0115 _W|_U, _W|_U, _W|_U, _W|_U,
0116 _W|_U, _W|_U, _W|_U, _W|_L,
0117 _W|_L, _W|_L, _W|_L, _W|_L,
0118 _W|_L, _W|_L, _W|_L, _W|_L,
0119 _W|_L, _W|_L, _W|_L, _W|_L,
0120 _W|_L, _W|_L, _W|_L, _W|_L,
0121 _W|_L, _W|_L, _W|_L, _W|_L,
0122 _W|_L, _W|_L, _W|_L, _W|_P,
0123 _W|_L, _W|_L, _W|_L, _W|_L,
0124 _W|_L, _W|_L, _W|_L, _W|_L};
0125
0126 static inline int match_token(struct ts_fsm_token *t, u8 d)
0127 {
0128 if (t->type)
0129 return (token_lookup_tbl[d] & t->type) != 0;
0130 else
0131 return t->value == d;
0132 }
0133
0134 static unsigned int fsm_find(struct ts_config *conf, struct ts_state *state)
0135 {
0136 struct ts_fsm *fsm = ts_config_priv(conf);
0137 struct ts_fsm_token *cur = NULL, *next;
0138 unsigned int match_start, block_idx = 0, tok_idx;
0139 unsigned block_len = 0, strict, consumed = state->offset;
0140 const u8 *data;
0141
0142 #define GET_NEXT_BLOCK() \
0143 ({ consumed += block_idx; \
0144 block_idx = 0; \
0145 block_len = conf->get_next_block(consumed, &data, conf, state); })
0146
0147 #define TOKEN_MISMATCH() \
0148 do { \
0149 if (strict) \
0150 goto no_match; \
0151 block_idx++; \
0152 goto startover; \
0153 } while(0)
0154
0155 #define end_of_data() unlikely(block_idx >= block_len && !GET_NEXT_BLOCK())
0156
0157 if (end_of_data())
0158 goto no_match;
0159
0160 strict = fsm->tokens[0].recur != TS_FSM_HEAD_IGNORE;
0161
0162 startover:
0163 match_start = consumed + block_idx;
0164
0165 for (tok_idx = 0; tok_idx < fsm->ntokens; tok_idx++) {
0166 cur = &fsm->tokens[tok_idx];
0167
0168 if (likely(tok_idx < (fsm->ntokens - 1)))
0169 next = &fsm->tokens[tok_idx + 1];
0170 else
0171 next = NULL;
0172
0173 switch (cur->recur) {
0174 case TS_FSM_SINGLE:
0175 if (end_of_data())
0176 goto no_match;
0177
0178 if (!match_token(cur, data[block_idx]))
0179 TOKEN_MISMATCH();
0180 break;
0181
0182 case TS_FSM_PERHAPS:
0183 if (end_of_data() ||
0184 !match_token(cur, data[block_idx]))
0185 continue;
0186 break;
0187
0188 case TS_FSM_MULTI:
0189 if (end_of_data())
0190 goto no_match;
0191
0192 if (!match_token(cur, data[block_idx]))
0193 TOKEN_MISMATCH();
0194
0195 block_idx++;
0196 fallthrough;
0197
0198 case TS_FSM_ANY:
0199 if (next == NULL)
0200 goto found_match;
0201
0202 if (end_of_data())
0203 continue;
0204
0205 while (!match_token(next, data[block_idx])) {
0206 if (!match_token(cur, data[block_idx]))
0207 TOKEN_MISMATCH();
0208 block_idx++;
0209 if (end_of_data())
0210 goto no_match;
0211 }
0212 continue;
0213
0214
0215
0216
0217
0218 case TS_FSM_HEAD_IGNORE:
0219 if (end_of_data())
0220 continue;
0221
0222 while (!match_token(next, data[block_idx])) {
0223
0224
0225
0226
0227
0228
0229 if (!match_token(cur, data[block_idx]))
0230 goto no_match;
0231
0232 block_idx++;
0233 if (end_of_data())
0234 goto no_match;
0235 }
0236
0237 match_start = consumed + block_idx;
0238 continue;
0239 }
0240
0241 block_idx++;
0242 }
0243
0244 if (end_of_data())
0245 goto found_match;
0246
0247 no_match:
0248 return UINT_MAX;
0249
0250 found_match:
0251 state->offset = consumed + block_idx;
0252 return match_start;
0253 }
0254
0255 static struct ts_config *fsm_init(const void *pattern, unsigned int len,
0256 gfp_t gfp_mask, int flags)
0257 {
0258 int i, err = -EINVAL;
0259 struct ts_config *conf;
0260 struct ts_fsm *fsm;
0261 struct ts_fsm_token *tokens = (struct ts_fsm_token *) pattern;
0262 unsigned int ntokens = len / sizeof(*tokens);
0263 size_t priv_size = sizeof(*fsm) + len;
0264
0265 if (len % sizeof(struct ts_fsm_token) || ntokens < 1)
0266 goto errout;
0267
0268 if (flags & TS_IGNORECASE)
0269 goto errout;
0270
0271 for (i = 0; i < ntokens; i++) {
0272 struct ts_fsm_token *t = &tokens[i];
0273
0274 if (t->type > TS_FSM_TYPE_MAX || t->recur > TS_FSM_RECUR_MAX)
0275 goto errout;
0276
0277 if (t->recur == TS_FSM_HEAD_IGNORE &&
0278 (i != 0 || i == (ntokens - 1)))
0279 goto errout;
0280 }
0281
0282 conf = alloc_ts_config(priv_size, gfp_mask);
0283 if (IS_ERR(conf))
0284 return conf;
0285
0286 conf->flags = flags;
0287 fsm = ts_config_priv(conf);
0288 fsm->ntokens = ntokens;
0289 memcpy(fsm->tokens, pattern, len);
0290
0291 for (i = 0; i < fsm->ntokens; i++) {
0292 struct ts_fsm_token *t = &fsm->tokens[i];
0293 t->type = token_map[t->type];
0294 }
0295
0296 return conf;
0297
0298 errout:
0299 return ERR_PTR(err);
0300 }
0301
0302 static void *fsm_get_pattern(struct ts_config *conf)
0303 {
0304 struct ts_fsm *fsm = ts_config_priv(conf);
0305 return fsm->tokens;
0306 }
0307
0308 static unsigned int fsm_get_pattern_len(struct ts_config *conf)
0309 {
0310 struct ts_fsm *fsm = ts_config_priv(conf);
0311 return fsm->ntokens * sizeof(struct ts_fsm_token);
0312 }
0313
0314 static struct ts_ops fsm_ops = {
0315 .name = "fsm",
0316 .find = fsm_find,
0317 .init = fsm_init,
0318 .get_pattern = fsm_get_pattern,
0319 .get_pattern_len = fsm_get_pattern_len,
0320 .owner = THIS_MODULE,
0321 .list = LIST_HEAD_INIT(fsm_ops.list)
0322 };
0323
0324 static int __init init_fsm(void)
0325 {
0326 return textsearch_register(&fsm_ops);
0327 }
0328
0329 static void __exit exit_fsm(void)
0330 {
0331 textsearch_unregister(&fsm_ops);
0332 }
0333
0334 MODULE_LICENSE("GPL");
0335
0336 module_init(init_fsm);
0337 module_exit(exit_fsm);