Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * net/sched/ematch.c       Extended Match API
0004  *
0005  * Authors: Thomas Graf <tgraf@suug.ch>
0006  *
0007  * ==========================================================================
0008  *
0009  * An extended match (ematch) is a small classification tool not worth
0010  * writing a full classifier for. Ematches can be interconnected to form
0011  * a logic expression and get attached to classifiers to extend their
0012  * functionatlity.
0013  *
0014  * The userspace part transforms the logic expressions into an array
0015  * consisting of multiple sequences of interconnected ematches separated
0016  * by markers. Precedence is implemented by a special ematch kind
0017  * referencing a sequence beyond the marker of the current sequence
0018  * causing the current position in the sequence to be pushed onto a stack
0019  * to allow the current position to be overwritten by the position referenced
0020  * in the special ematch. Matching continues in the new sequence until a
0021  * marker is reached causing the position to be restored from the stack.
0022  *
0023  * Example:
0024  *          A AND (B1 OR B2) AND C AND D
0025  *
0026  *              ------->-PUSH-------
0027  *    -->--    /         -->--      \   -->--
0028  *   /     \  /         /     \      \ /     \
0029  * +-------+-------+-------+-------+-------+--------+
0030  * | A AND | B AND | C AND | D END | B1 OR | B2 END |
0031  * +-------+-------+-------+-------+-------+--------+
0032  *                    \                      /
0033  *                     --------<-POP---------
0034  *
0035  * where B is a virtual ematch referencing to sequence starting with B1.
0036  *
0037  * ==========================================================================
0038  *
0039  * How to write an ematch in 60 seconds
0040  * ------------------------------------
0041  *
0042  *   1) Provide a matcher function:
0043  *      static int my_match(struct sk_buff *skb, struct tcf_ematch *m,
0044  *                          struct tcf_pkt_info *info)
0045  *      {
0046  *          struct mydata *d = (struct mydata *) m->data;
0047  *
0048  *          if (...matching goes here...)
0049  *              return 1;
0050  *          else
0051  *              return 0;
0052  *      }
0053  *
0054  *   2) Fill out a struct tcf_ematch_ops:
0055  *      static struct tcf_ematch_ops my_ops = {
0056  *          .kind = unique id,
0057  *          .datalen = sizeof(struct mydata),
0058  *          .match = my_match,
0059  *          .owner = THIS_MODULE,
0060  *      };
0061  *
0062  *   3) Register/Unregister your ematch:
0063  *      static int __init init_my_ematch(void)
0064  *      {
0065  *          return tcf_em_register(&my_ops);
0066  *      }
0067  *
0068  *      static void __exit exit_my_ematch(void)
0069  *      {
0070  *          tcf_em_unregister(&my_ops);
0071  *      }
0072  *
0073  *      module_init(init_my_ematch);
0074  *      module_exit(exit_my_ematch);
0075  *
0076  *   4) By now you should have two more seconds left, barely enough to
0077  *      open up a beer to watch the compilation going.
0078  */
0079 
0080 #include <linux/module.h>
0081 #include <linux/slab.h>
0082 #include <linux/types.h>
0083 #include <linux/kernel.h>
0084 #include <linux/errno.h>
0085 #include <linux/rtnetlink.h>
0086 #include <linux/skbuff.h>
0087 #include <net/pkt_cls.h>
0088 
0089 static LIST_HEAD(ematch_ops);
0090 static DEFINE_RWLOCK(ematch_mod_lock);
0091 
0092 static struct tcf_ematch_ops *tcf_em_lookup(u16 kind)
0093 {
0094     struct tcf_ematch_ops *e = NULL;
0095 
0096     read_lock(&ematch_mod_lock);
0097     list_for_each_entry(e, &ematch_ops, link) {
0098         if (kind == e->kind) {
0099             if (!try_module_get(e->owner))
0100                 e = NULL;
0101             read_unlock(&ematch_mod_lock);
0102             return e;
0103         }
0104     }
0105     read_unlock(&ematch_mod_lock);
0106 
0107     return NULL;
0108 }
0109 
0110 /**
0111  * tcf_em_register - register an extended match
0112  *
0113  * @ops: ematch operations lookup table
0114  *
0115  * This function must be called by ematches to announce their presence.
0116  * The given @ops must have kind set to a unique identifier and the
0117  * callback match() must be implemented. All other callbacks are optional
0118  * and a fallback implementation is used instead.
0119  *
0120  * Returns -EEXISTS if an ematch of the same kind has already registered.
0121  */
0122 int tcf_em_register(struct tcf_ematch_ops *ops)
0123 {
0124     int err = -EEXIST;
0125     struct tcf_ematch_ops *e;
0126 
0127     if (ops->match == NULL)
0128         return -EINVAL;
0129 
0130     write_lock(&ematch_mod_lock);
0131     list_for_each_entry(e, &ematch_ops, link)
0132         if (ops->kind == e->kind)
0133             goto errout;
0134 
0135     list_add_tail(&ops->link, &ematch_ops);
0136     err = 0;
0137 errout:
0138     write_unlock(&ematch_mod_lock);
0139     return err;
0140 }
0141 EXPORT_SYMBOL(tcf_em_register);
0142 
0143 /**
0144  * tcf_em_unregister - unregister and extended match
0145  *
0146  * @ops: ematch operations lookup table
0147  *
0148  * This function must be called by ematches to announce their disappearance
0149  * for examples when the module gets unloaded. The @ops parameter must be
0150  * the same as the one used for registration.
0151  *
0152  * Returns -ENOENT if no matching ematch was found.
0153  */
0154 void tcf_em_unregister(struct tcf_ematch_ops *ops)
0155 {
0156     write_lock(&ematch_mod_lock);
0157     list_del(&ops->link);
0158     write_unlock(&ematch_mod_lock);
0159 }
0160 EXPORT_SYMBOL(tcf_em_unregister);
0161 
0162 static inline struct tcf_ematch *tcf_em_get_match(struct tcf_ematch_tree *tree,
0163                           int index)
0164 {
0165     return &tree->matches[index];
0166 }
0167 
0168 
0169 static int tcf_em_validate(struct tcf_proto *tp,
0170                struct tcf_ematch_tree_hdr *tree_hdr,
0171                struct tcf_ematch *em, struct nlattr *nla, int idx)
0172 {
0173     int err = -EINVAL;
0174     struct tcf_ematch_hdr *em_hdr = nla_data(nla);
0175     int data_len = nla_len(nla) - sizeof(*em_hdr);
0176     void *data = (void *) em_hdr + sizeof(*em_hdr);
0177     struct net *net = tp->chain->block->net;
0178 
0179     if (!TCF_EM_REL_VALID(em_hdr->flags))
0180         goto errout;
0181 
0182     if (em_hdr->kind == TCF_EM_CONTAINER) {
0183         /* Special ematch called "container", carries an index
0184          * referencing an external ematch sequence.
0185          */
0186         u32 ref;
0187 
0188         if (data_len < sizeof(ref))
0189             goto errout;
0190         ref = *(u32 *) data;
0191 
0192         if (ref >= tree_hdr->nmatches)
0193             goto errout;
0194 
0195         /* We do not allow backward jumps to avoid loops and jumps
0196          * to our own position are of course illegal.
0197          */
0198         if (ref <= idx)
0199             goto errout;
0200 
0201 
0202         em->data = ref;
0203     } else {
0204         /* Note: This lookup will increase the module refcnt
0205          * of the ematch module referenced. In case of a failure,
0206          * a destroy function is called by the underlying layer
0207          * which automatically releases the reference again, therefore
0208          * the module MUST not be given back under any circumstances
0209          * here. Be aware, the destroy function assumes that the
0210          * module is held if the ops field is non zero.
0211          */
0212         em->ops = tcf_em_lookup(em_hdr->kind);
0213 
0214         if (em->ops == NULL) {
0215             err = -ENOENT;
0216 #ifdef CONFIG_MODULES
0217             __rtnl_unlock();
0218             request_module("ematch-kind-%u", em_hdr->kind);
0219             rtnl_lock();
0220             em->ops = tcf_em_lookup(em_hdr->kind);
0221             if (em->ops) {
0222                 /* We dropped the RTNL mutex in order to
0223                  * perform the module load. Tell the caller
0224                  * to replay the request.
0225                  */
0226                 module_put(em->ops->owner);
0227                 em->ops = NULL;
0228                 err = -EAGAIN;
0229             }
0230 #endif
0231             goto errout;
0232         }
0233 
0234         /* ematch module provides expected length of data, so we
0235          * can do a basic sanity check.
0236          */
0237         if (em->ops->datalen && data_len < em->ops->datalen)
0238             goto errout;
0239 
0240         if (em->ops->change) {
0241             err = -EINVAL;
0242             if (em_hdr->flags & TCF_EM_SIMPLE)
0243                 goto errout;
0244             err = em->ops->change(net, data, data_len, em);
0245             if (err < 0)
0246                 goto errout;
0247         } else if (data_len > 0) {
0248             /* ematch module doesn't provide an own change
0249              * procedure and expects us to allocate and copy
0250              * the ematch data.
0251              *
0252              * TCF_EM_SIMPLE may be specified stating that the
0253              * data only consists of a u32 integer and the module
0254              * does not expected a memory reference but rather
0255              * the value carried.
0256              */
0257             if (em_hdr->flags & TCF_EM_SIMPLE) {
0258                 if (data_len < sizeof(u32))
0259                     goto errout;
0260                 em->data = *(u32 *) data;
0261             } else {
0262                 void *v = kmemdup(data, data_len, GFP_KERNEL);
0263                 if (v == NULL) {
0264                     err = -ENOBUFS;
0265                     goto errout;
0266                 }
0267                 em->data = (unsigned long) v;
0268             }
0269             em->datalen = data_len;
0270         }
0271     }
0272 
0273     em->matchid = em_hdr->matchid;
0274     em->flags = em_hdr->flags;
0275     em->net = net;
0276 
0277     err = 0;
0278 errout:
0279     return err;
0280 }
0281 
0282 static const struct nla_policy em_policy[TCA_EMATCH_TREE_MAX + 1] = {
0283     [TCA_EMATCH_TREE_HDR]   = { .len = sizeof(struct tcf_ematch_tree_hdr) },
0284     [TCA_EMATCH_TREE_LIST]  = { .type = NLA_NESTED },
0285 };
0286 
0287 /**
0288  * tcf_em_tree_validate - validate ematch config TLV and build ematch tree
0289  *
0290  * @tp: classifier kind handle
0291  * @nla: ematch tree configuration TLV
0292  * @tree: destination ematch tree variable to store the resulting
0293  *        ematch tree.
0294  *
0295  * This function validates the given configuration TLV @nla and builds an
0296  * ematch tree in @tree. The resulting tree must later be copied into
0297  * the private classifier data using tcf_em_tree_change(). You MUST NOT
0298  * provide the ematch tree variable of the private classifier data directly,
0299  * the changes would not be locked properly.
0300  *
0301  * Returns a negative error code if the configuration TLV contains errors.
0302  */
0303 int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
0304              struct tcf_ematch_tree *tree)
0305 {
0306     int idx, list_len, matches_len, err;
0307     struct nlattr *tb[TCA_EMATCH_TREE_MAX + 1];
0308     struct nlattr *rt_match, *rt_hdr, *rt_list;
0309     struct tcf_ematch_tree_hdr *tree_hdr;
0310     struct tcf_ematch *em;
0311 
0312     memset(tree, 0, sizeof(*tree));
0313     if (!nla)
0314         return 0;
0315 
0316     err = nla_parse_nested_deprecated(tb, TCA_EMATCH_TREE_MAX, nla,
0317                       em_policy, NULL);
0318     if (err < 0)
0319         goto errout;
0320 
0321     err = -EINVAL;
0322     rt_hdr = tb[TCA_EMATCH_TREE_HDR];
0323     rt_list = tb[TCA_EMATCH_TREE_LIST];
0324 
0325     if (rt_hdr == NULL || rt_list == NULL)
0326         goto errout;
0327 
0328     tree_hdr = nla_data(rt_hdr);
0329     memcpy(&tree->hdr, tree_hdr, sizeof(*tree_hdr));
0330 
0331     rt_match = nla_data(rt_list);
0332     list_len = nla_len(rt_list);
0333     matches_len = tree_hdr->nmatches * sizeof(*em);
0334 
0335     tree->matches = kzalloc(matches_len, GFP_KERNEL);
0336     if (tree->matches == NULL)
0337         goto errout;
0338 
0339     /* We do not use nla_parse_nested here because the maximum
0340      * number of attributes is unknown. This saves us the allocation
0341      * for a tb buffer which would serve no purpose at all.
0342      *
0343      * The array of rt attributes is parsed in the order as they are
0344      * provided, their type must be incremental from 1 to n. Even
0345      * if it does not serve any real purpose, a failure of sticking
0346      * to this policy will result in parsing failure.
0347      */
0348     for (idx = 0; nla_ok(rt_match, list_len); idx++) {
0349         err = -EINVAL;
0350 
0351         if (rt_match->nla_type != (idx + 1))
0352             goto errout_abort;
0353 
0354         if (idx >= tree_hdr->nmatches)
0355             goto errout_abort;
0356 
0357         if (nla_len(rt_match) < sizeof(struct tcf_ematch_hdr))
0358             goto errout_abort;
0359 
0360         em = tcf_em_get_match(tree, idx);
0361 
0362         err = tcf_em_validate(tp, tree_hdr, em, rt_match, idx);
0363         if (err < 0)
0364             goto errout_abort;
0365 
0366         rt_match = nla_next(rt_match, &list_len);
0367     }
0368 
0369     /* Check if the number of matches provided by userspace actually
0370      * complies with the array of matches. The number was used for
0371      * the validation of references and a mismatch could lead to
0372      * undefined references during the matching process.
0373      */
0374     if (idx != tree_hdr->nmatches) {
0375         err = -EINVAL;
0376         goto errout_abort;
0377     }
0378 
0379     err = 0;
0380 errout:
0381     return err;
0382 
0383 errout_abort:
0384     tcf_em_tree_destroy(tree);
0385     return err;
0386 }
0387 EXPORT_SYMBOL(tcf_em_tree_validate);
0388 
0389 /**
0390  * tcf_em_tree_destroy - destroy an ematch tree
0391  *
0392  * @tree: ematch tree to be deleted
0393  *
0394  * This functions destroys an ematch tree previously created by
0395  * tcf_em_tree_validate()/tcf_em_tree_change(). You must ensure that
0396  * the ematch tree is not in use before calling this function.
0397  */
0398 void tcf_em_tree_destroy(struct tcf_ematch_tree *tree)
0399 {
0400     int i;
0401 
0402     if (tree->matches == NULL)
0403         return;
0404 
0405     for (i = 0; i < tree->hdr.nmatches; i++) {
0406         struct tcf_ematch *em = tcf_em_get_match(tree, i);
0407 
0408         if (em->ops) {
0409             if (em->ops->destroy)
0410                 em->ops->destroy(em);
0411             else if (!tcf_em_is_simple(em))
0412                 kfree((void *) em->data);
0413             module_put(em->ops->owner);
0414         }
0415     }
0416 
0417     tree->hdr.nmatches = 0;
0418     kfree(tree->matches);
0419     tree->matches = NULL;
0420 }
0421 EXPORT_SYMBOL(tcf_em_tree_destroy);
0422 
0423 /**
0424  * tcf_em_tree_dump - dump ematch tree into a rtnl message
0425  *
0426  * @skb: skb holding the rtnl message
0427  * @tree: ematch tree to be dumped
0428  * @tlv: TLV type to be used to encapsulate the tree
0429  *
0430  * This function dumps a ematch tree into a rtnl message. It is valid to
0431  * call this function while the ematch tree is in use.
0432  *
0433  * Returns -1 if the skb tailroom is insufficient.
0434  */
0435 int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
0436 {
0437     int i;
0438     u8 *tail;
0439     struct nlattr *top_start;
0440     struct nlattr *list_start;
0441 
0442     top_start = nla_nest_start_noflag(skb, tlv);
0443     if (top_start == NULL)
0444         goto nla_put_failure;
0445 
0446     if (nla_put(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr))
0447         goto nla_put_failure;
0448 
0449     list_start = nla_nest_start_noflag(skb, TCA_EMATCH_TREE_LIST);
0450     if (list_start == NULL)
0451         goto nla_put_failure;
0452 
0453     tail = skb_tail_pointer(skb);
0454     for (i = 0; i < tree->hdr.nmatches; i++) {
0455         struct nlattr *match_start = (struct nlattr *)tail;
0456         struct tcf_ematch *em = tcf_em_get_match(tree, i);
0457         struct tcf_ematch_hdr em_hdr = {
0458             .kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,
0459             .matchid = em->matchid,
0460             .flags = em->flags
0461         };
0462 
0463         if (nla_put(skb, i + 1, sizeof(em_hdr), &em_hdr))
0464             goto nla_put_failure;
0465 
0466         if (em->ops && em->ops->dump) {
0467             if (em->ops->dump(skb, em) < 0)
0468                 goto nla_put_failure;
0469         } else if (tcf_em_is_container(em) || tcf_em_is_simple(em)) {
0470             u32 u = em->data;
0471             nla_put_nohdr(skb, sizeof(u), &u);
0472         } else if (em->datalen > 0)
0473             nla_put_nohdr(skb, em->datalen, (void *) em->data);
0474 
0475         tail = skb_tail_pointer(skb);
0476         match_start->nla_len = tail - (u8 *)match_start;
0477     }
0478 
0479     nla_nest_end(skb, list_start);
0480     nla_nest_end(skb, top_start);
0481 
0482     return 0;
0483 
0484 nla_put_failure:
0485     return -1;
0486 }
0487 EXPORT_SYMBOL(tcf_em_tree_dump);
0488 
0489 static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
0490                    struct tcf_pkt_info *info)
0491 {
0492     int r = em->ops->match(skb, em, info);
0493 
0494     return tcf_em_is_inverted(em) ? !r : r;
0495 }
0496 
0497 /* Do not use this function directly, use tcf_em_tree_match instead */
0498 int __tcf_em_tree_match(struct sk_buff *skb, struct tcf_ematch_tree *tree,
0499             struct tcf_pkt_info *info)
0500 {
0501     int stackp = 0, match_idx = 0, res = 0;
0502     struct tcf_ematch *cur_match;
0503     int stack[CONFIG_NET_EMATCH_STACK];
0504 
0505 proceed:
0506     while (match_idx < tree->hdr.nmatches) {
0507         cur_match = tcf_em_get_match(tree, match_idx);
0508 
0509         if (tcf_em_is_container(cur_match)) {
0510             if (unlikely(stackp >= CONFIG_NET_EMATCH_STACK))
0511                 goto stack_overflow;
0512 
0513             stack[stackp++] = match_idx;
0514             match_idx = cur_match->data;
0515             goto proceed;
0516         }
0517 
0518         res = tcf_em_match(skb, cur_match, info);
0519 
0520         if (tcf_em_early_end(cur_match, res))
0521             break;
0522 
0523         match_idx++;
0524     }
0525 
0526 pop_stack:
0527     if (stackp > 0) {
0528         match_idx = stack[--stackp];
0529         cur_match = tcf_em_get_match(tree, match_idx);
0530 
0531         if (tcf_em_is_inverted(cur_match))
0532             res = !res;
0533 
0534         if (tcf_em_early_end(cur_match, res)) {
0535             goto pop_stack;
0536         } else {
0537             match_idx++;
0538             goto proceed;
0539         }
0540     }
0541 
0542     return res;
0543 
0544 stack_overflow:
0545     net_warn_ratelimited("tc ematch: local stack overflow, increase NET_EMATCH_STACK\n");
0546     return -1;
0547 }
0548 EXPORT_SYMBOL(__tcf_em_tree_match);