Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
0003  *
0004  * This software is available to you under a choice of one of two
0005  * licenses.  You may choose to be licensed under the terms of the GNU
0006  * General Public License (GPL) Version 2, available from the file
0007  * COPYING in the main directory of this source tree, or the
0008  * OpenIB.org BSD license below:
0009  *
0010  *     Redistribution and use in source and binary forms, with or
0011  *     without modification, are permitted provided that the following
0012  *     conditions are met:
0013  *
0014  *      - Redistributions of source code must retain the above
0015  *        copyright notice, this list of conditions and the following
0016  *        disclaimer.
0017  *
0018  *      - Redistributions in binary form must reproduce the above
0019  *        copyright notice, this list of conditions and the following
0020  *        disclaimer in the documentation and/or other materials
0021  *        provided with the distribution.
0022  *
0023  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0024  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0025  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0026  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0027  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0028  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0029  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0030  * SOFTWARE.
0031  */
0032 
0033 #include <net/flow_dissector.h>
0034 #include <net/flow_offload.h>
0035 #include <net/sch_generic.h>
0036 #include <net/pkt_cls.h>
0037 #include <linux/mlx5/fs.h>
0038 #include <linux/mlx5/device.h>
0039 #include <linux/rhashtable.h>
0040 #include <linux/refcount.h>
0041 #include <linux/completion.h>
0042 #include <net/arp.h>
0043 #include <net/ipv6_stubs.h>
0044 #include <net/bareudp.h>
0045 #include <net/bonding.h>
0046 #include "en.h"
0047 #include "en/tc/post_act.h"
0048 #include "en_rep.h"
0049 #include "en/rep/tc.h"
0050 #include "en/rep/neigh.h"
0051 #include "en_tc.h"
0052 #include "eswitch.h"
0053 #include "fs_core.h"
0054 #include "en/port.h"
0055 #include "en/tc_tun.h"
0056 #include "en/mapping.h"
0057 #include "en/tc_ct.h"
0058 #include "en/mod_hdr.h"
0059 #include "en/tc_tun_encap.h"
0060 #include "en/tc/sample.h"
0061 #include "en/tc/act/act.h"
0062 #include "en/tc/post_meter.h"
0063 #include "lib/devcom.h"
0064 #include "lib/geneve.h"
0065 #include "lib/fs_chains.h"
0066 #include "diag/en_tc_tracepoint.h"
0067 #include <asm/div64.h>
0068 #include "lag/lag.h"
0069 #include "lag/mp.h"
0070 
0071 #define MLX5E_TC_TABLE_NUM_GROUPS 4
0072 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
0073 
0074 struct mlx5e_tc_table {
0075     /* Protects the dynamic assignment of the t parameter
0076      * which is the nic tc root table.
0077      */
0078     struct mutex            t_lock;
0079     struct mlx5e_priv       *priv;
0080     struct mlx5_flow_table      *t;
0081     struct mlx5_flow_table      *miss_t;
0082     struct mlx5_fs_chains           *chains;
0083     struct mlx5e_post_act       *post_act;
0084 
0085     struct rhashtable               ht;
0086 
0087     struct mod_hdr_tbl mod_hdr;
0088     struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */
0089     DECLARE_HASHTABLE(hairpin_tbl, 8);
0090 
0091     struct notifier_block     netdevice_nb;
0092     struct netdev_net_notifier  netdevice_nn;
0093 
0094     struct mlx5_tc_ct_priv         *ct;
0095     struct mapping_ctx             *mapping;
0096 };
0097 
0098 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
0099     [CHAIN_TO_REG] = {
0100         .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
0101         .moffset = 0,
0102         .mlen = 16,
0103     },
0104     [VPORT_TO_REG] = {
0105         .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
0106         .moffset = 16,
0107         .mlen = 16,
0108     },
0109     [TUNNEL_TO_REG] = {
0110         .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
0111         .moffset = 8,
0112         .mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS,
0113         .soffset = MLX5_BYTE_OFF(fte_match_param,
0114                      misc_parameters_2.metadata_reg_c_1),
0115     },
0116     [ZONE_TO_REG] = zone_to_reg_ct,
0117     [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
0118     [CTSTATE_TO_REG] = ctstate_to_reg_ct,
0119     [MARK_TO_REG] = mark_to_reg_ct,
0120     [LABELS_TO_REG] = labels_to_reg_ct,
0121     [FTEID_TO_REG] = fteid_to_reg_ct,
0122     /* For NIC rules we store the restore metadata directly
0123      * into reg_b that is passed to SW since we don't
0124      * jump between steering domains.
0125      */
0126     [NIC_CHAIN_TO_REG] = {
0127         .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
0128         .moffset = 0,
0129         .mlen = 16,
0130     },
0131     [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
0132     [PACKET_COLOR_TO_REG] = packet_color_to_reg,
0133 };
0134 
0135 struct mlx5e_tc_table *mlx5e_tc_table_alloc(void)
0136 {
0137     struct mlx5e_tc_table *tc;
0138 
0139     tc = kvzalloc(sizeof(*tc), GFP_KERNEL);
0140     return tc ? tc : ERR_PTR(-ENOMEM);
0141 }
0142 
0143 void mlx5e_tc_table_free(struct mlx5e_tc_table *tc)
0144 {
0145     kvfree(tc);
0146 }
0147 
0148 struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc)
0149 {
0150     return tc->chains;
0151 }
0152 
0153 /* To avoid false lock dependency warning set the tc_ht lock
0154  * class different than the lock class of the ht being used when deleting
0155  * last flow from a group and then deleting a group, we get into del_sw_flow_group()
0156  * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
0157  * it's different than the ht->mutex here.
0158  */
0159 static struct lock_class_key tc_ht_lock_key;
0160 
0161 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
0162 static void free_flow_post_acts(struct mlx5e_tc_flow *flow);
0163 
0164 void
0165 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
0166                 enum mlx5e_tc_attr_to_reg type,
0167                 u32 val,
0168                 u32 mask)
0169 {
0170     void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
0171     int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
0172     int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
0173     int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
0174     u32 max_mask = GENMASK(match_len - 1, 0);
0175     __be32 curr_mask_be, curr_val_be;
0176     u32 curr_mask, curr_val;
0177 
0178     fmask = headers_c + soffset;
0179     fval = headers_v + soffset;
0180 
0181     memcpy(&curr_mask_be, fmask, 4);
0182     memcpy(&curr_val_be, fval, 4);
0183 
0184     curr_mask = be32_to_cpu(curr_mask_be);
0185     curr_val = be32_to_cpu(curr_val_be);
0186 
0187     //move to correct offset
0188     WARN_ON(mask > max_mask);
0189     mask <<= moffset;
0190     val <<= moffset;
0191     max_mask <<= moffset;
0192 
0193     //zero val and mask
0194     curr_mask &= ~max_mask;
0195     curr_val &= ~max_mask;
0196 
0197     //add current to mask
0198     curr_mask |= mask;
0199     curr_val |= val;
0200 
0201     //back to be32 and write
0202     curr_mask_be = cpu_to_be32(curr_mask);
0203     curr_val_be = cpu_to_be32(curr_val);
0204 
0205     memcpy(fmask, &curr_mask_be, 4);
0206     memcpy(fval, &curr_val_be, 4);
0207 
0208     spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
0209 }
0210 
0211 void
0212 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
0213                 enum mlx5e_tc_attr_to_reg type,
0214                 u32 *val,
0215                 u32 *mask)
0216 {
0217     void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
0218     int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
0219     int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
0220     int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
0221     u32 max_mask = GENMASK(match_len - 1, 0);
0222     __be32 curr_mask_be, curr_val_be;
0223     u32 curr_mask, curr_val;
0224 
0225     fmask = headers_c + soffset;
0226     fval = headers_v + soffset;
0227 
0228     memcpy(&curr_mask_be, fmask, 4);
0229     memcpy(&curr_val_be, fval, 4);
0230 
0231     curr_mask = be32_to_cpu(curr_mask_be);
0232     curr_val = be32_to_cpu(curr_val_be);
0233 
0234     *mask = (curr_mask >> moffset) & max_mask;
0235     *val = (curr_val >> moffset) & max_mask;
0236 }
0237 
0238 int
0239 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
0240                      struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
0241                      enum mlx5_flow_namespace_type ns,
0242                      enum mlx5e_tc_attr_to_reg type,
0243                      u32 data)
0244 {
0245     int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
0246     int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
0247     int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
0248     char *modact;
0249     int err;
0250 
0251     modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts);
0252     if (IS_ERR(modact))
0253         return PTR_ERR(modact);
0254 
0255     /* Firmware has 5bit length field and 0 means 32bits */
0256     if (mlen == 32)
0257         mlen = 0;
0258 
0259     MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
0260     MLX5_SET(set_action_in, modact, field, mfield);
0261     MLX5_SET(set_action_in, modact, offset, moffset);
0262     MLX5_SET(set_action_in, modact, length, mlen);
0263     MLX5_SET(set_action_in, modact, data, data);
0264     err = mod_hdr_acts->num_actions;
0265     mod_hdr_acts->num_actions++;
0266 
0267     return err;
0268 }
0269 
0270 struct mlx5e_tc_int_port_priv *
0271 mlx5e_get_int_port_priv(struct mlx5e_priv *priv)
0272 {
0273     struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
0274     struct mlx5_rep_uplink_priv *uplink_priv;
0275     struct mlx5e_rep_priv *uplink_rpriv;
0276 
0277     if (is_mdev_switchdev_mode(priv->mdev)) {
0278         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
0279         uplink_priv = &uplink_rpriv->uplink_priv;
0280 
0281         return uplink_priv->int_port_priv;
0282     }
0283 
0284     return NULL;
0285 }
0286 
0287 struct mlx5e_flow_meters *
0288 mlx5e_get_flow_meters(struct mlx5_core_dev *dev)
0289 {
0290     struct mlx5_eswitch *esw = dev->priv.eswitch;
0291     struct mlx5_rep_uplink_priv *uplink_priv;
0292     struct mlx5e_rep_priv *uplink_rpriv;
0293     struct mlx5e_priv *priv;
0294 
0295     if (is_mdev_switchdev_mode(dev)) {
0296         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
0297         uplink_priv = &uplink_rpriv->uplink_priv;
0298         priv = netdev_priv(uplink_rpriv->netdev);
0299         if (!uplink_priv->flow_meters)
0300             uplink_priv->flow_meters =
0301                 mlx5e_flow_meters_init(priv,
0302                                MLX5_FLOW_NAMESPACE_FDB,
0303                                uplink_priv->post_act);
0304         if (!IS_ERR(uplink_priv->flow_meters))
0305             return uplink_priv->flow_meters;
0306     }
0307 
0308     return NULL;
0309 }
0310 
0311 static struct mlx5_tc_ct_priv *
0312 get_ct_priv(struct mlx5e_priv *priv)
0313 {
0314     struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
0315     struct mlx5_rep_uplink_priv *uplink_priv;
0316     struct mlx5e_rep_priv *uplink_rpriv;
0317 
0318     if (is_mdev_switchdev_mode(priv->mdev)) {
0319         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
0320         uplink_priv = &uplink_rpriv->uplink_priv;
0321 
0322         return uplink_priv->ct_priv;
0323     }
0324 
0325     return priv->fs->tc->ct;
0326 }
0327 
0328 static struct mlx5e_tc_psample *
0329 get_sample_priv(struct mlx5e_priv *priv)
0330 {
0331     struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
0332     struct mlx5_rep_uplink_priv *uplink_priv;
0333     struct mlx5e_rep_priv *uplink_rpriv;
0334 
0335     if (is_mdev_switchdev_mode(priv->mdev)) {
0336         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
0337         uplink_priv = &uplink_rpriv->uplink_priv;
0338 
0339         return uplink_priv->tc_psample;
0340     }
0341 
0342     return NULL;
0343 }
0344 
0345 static struct mlx5e_post_act *
0346 get_post_action(struct mlx5e_priv *priv)
0347 {
0348     struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
0349     struct mlx5_rep_uplink_priv *uplink_priv;
0350     struct mlx5e_rep_priv *uplink_rpriv;
0351 
0352     if (is_mdev_switchdev_mode(priv->mdev)) {
0353         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
0354         uplink_priv = &uplink_rpriv->uplink_priv;
0355 
0356         return uplink_priv->post_act;
0357     }
0358 
0359     return priv->fs->tc->post_act;
0360 }
0361 
0362 struct mlx5_flow_handle *
0363 mlx5_tc_rule_insert(struct mlx5e_priv *priv,
0364             struct mlx5_flow_spec *spec,
0365             struct mlx5_flow_attr *attr)
0366 {
0367     struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
0368 
0369     if (is_mdev_switchdev_mode(priv->mdev))
0370         return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
0371 
0372     return  mlx5e_add_offloaded_nic_rule(priv, spec, attr);
0373 }
0374 
0375 void
0376 mlx5_tc_rule_delete(struct mlx5e_priv *priv,
0377             struct mlx5_flow_handle *rule,
0378             struct mlx5_flow_attr *attr)
0379 {
0380     struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
0381 
0382     if (is_mdev_switchdev_mode(priv->mdev)) {
0383         mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
0384         return;
0385     }
0386 
0387     mlx5e_del_offloaded_nic_rule(priv, rule, attr);
0388 }
0389 
0390 static bool
0391 is_flow_meter_action(struct mlx5_flow_attr *attr)
0392 {
0393     return ((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) &&
0394         (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER));
0395 }
0396 
0397 static int
0398 mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv,
0399             struct mlx5_flow_attr *attr)
0400 {
0401     struct mlx5e_post_act *post_act = get_post_action(priv);
0402     struct mlx5e_post_meter_priv *post_meter;
0403     enum mlx5_flow_namespace_type ns_type;
0404     struct mlx5e_flow_meter_handle *meter;
0405 
0406     meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params);
0407     if (IS_ERR(meter)) {
0408         mlx5_core_err(priv->mdev, "Failed to get flow meter\n");
0409         return PTR_ERR(meter);
0410     }
0411 
0412     ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters);
0413     post_meter = mlx5e_post_meter_init(priv, ns_type, post_act, meter->green_counter,
0414                        meter->red_counter);
0415     if (IS_ERR(post_meter)) {
0416         mlx5_core_err(priv->mdev, "Failed to init post meter\n");
0417         goto err_meter_init;
0418     }
0419 
0420     attr->meter_attr.meter = meter;
0421     attr->meter_attr.post_meter = post_meter;
0422     attr->dest_ft = mlx5e_post_meter_get_ft(post_meter);
0423     attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
0424 
0425     return 0;
0426 
0427 err_meter_init:
0428     mlx5e_tc_meter_put(meter);
0429     return PTR_ERR(post_meter);
0430 }
0431 
0432 static void
0433 mlx5e_tc_del_flow_meter(struct mlx5_flow_attr *attr)
0434 {
0435     mlx5e_post_meter_cleanup(attr->meter_attr.post_meter);
0436     mlx5e_tc_meter_put(attr->meter_attr.meter);
0437 }
0438 
0439 struct mlx5_flow_handle *
0440 mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
0441               struct mlx5_flow_spec *spec,
0442               struct mlx5_flow_attr *attr)
0443 {
0444     struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
0445     int err;
0446 
0447     if (attr->flags & MLX5_ATTR_FLAG_CT) {
0448         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts =
0449             &attr->parse_attr->mod_hdr_acts;
0450 
0451         return mlx5_tc_ct_flow_offload(get_ct_priv(priv),
0452                            spec, attr,
0453                            mod_hdr_acts);
0454     }
0455 
0456     if (!is_mdev_switchdev_mode(priv->mdev))
0457         return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
0458 
0459     if (attr->flags & MLX5_ATTR_FLAG_SAMPLE)
0460         return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr);
0461 
0462     if (is_flow_meter_action(attr)) {
0463         err = mlx5e_tc_add_flow_meter(priv, attr);
0464         if (err)
0465             return ERR_PTR(err);
0466     }
0467 
0468     return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
0469 }
0470 
0471 void
0472 mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
0473             struct mlx5_flow_handle *rule,
0474             struct mlx5_flow_attr *attr)
0475 {
0476     struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
0477 
0478     if (attr->flags & MLX5_ATTR_FLAG_CT) {
0479         mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr);
0480         return;
0481     }
0482 
0483     if (!is_mdev_switchdev_mode(priv->mdev)) {
0484         mlx5e_del_offloaded_nic_rule(priv, rule, attr);
0485         return;
0486     }
0487 
0488     if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) {
0489         mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr);
0490         return;
0491     }
0492 
0493     mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
0494 
0495     if (attr->meter_attr.meter)
0496         mlx5e_tc_del_flow_meter(attr);
0497 }
0498 
0499 int
0500 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
0501               struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
0502               enum mlx5_flow_namespace_type ns,
0503               enum mlx5e_tc_attr_to_reg type,
0504               u32 data)
0505 {
0506     int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data);
0507 
0508     return ret < 0 ? ret : 0;
0509 }
0510 
0511 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
0512                       struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
0513                       enum mlx5e_tc_attr_to_reg type,
0514                       int act_id, u32 data)
0515 {
0516     int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
0517     int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
0518     int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
0519     char *modact;
0520 
0521     modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id);
0522 
0523     /* Firmware has 5bit length field and 0 means 32bits */
0524     if (mlen == 32)
0525         mlen = 0;
0526 
0527     MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
0528     MLX5_SET(set_action_in, modact, field, mfield);
0529     MLX5_SET(set_action_in, modact, offset, moffset);
0530     MLX5_SET(set_action_in, modact, length, mlen);
0531     MLX5_SET(set_action_in, modact, data, data);
0532 }
0533 
0534 struct mlx5e_hairpin {
0535     struct mlx5_hairpin *pair;
0536 
0537     struct mlx5_core_dev *func_mdev;
0538     struct mlx5e_priv *func_priv;
0539     u32 tdn;
0540     struct mlx5e_tir direct_tir;
0541 
0542     int num_channels;
0543     struct mlx5e_rqt indir_rqt;
0544     struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
0545     struct mlx5_ttc_table *ttc;
0546 };
0547 
0548 struct mlx5e_hairpin_entry {
0549     /* a node of a hash table which keeps all the  hairpin entries */
0550     struct hlist_node hairpin_hlist;
0551 
0552     /* protects flows list */
0553     spinlock_t flows_lock;
0554     /* flows sharing the same hairpin */
0555     struct list_head flows;
0556     /* hpe's that were not fully initialized when dead peer update event
0557      * function traversed them.
0558      */
0559     struct list_head dead_peer_wait_list;
0560 
0561     u16 peer_vhca_id;
0562     u8 prio;
0563     struct mlx5e_hairpin *hp;
0564     refcount_t refcnt;
0565     struct completion res_ready;
0566 };
0567 
0568 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
0569                   struct mlx5e_tc_flow *flow);
0570 
0571 struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
0572 {
0573     if (!flow || !refcount_inc_not_zero(&flow->refcnt))
0574         return ERR_PTR(-EINVAL);
0575     return flow;
0576 }
0577 
0578 void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
0579 {
0580     if (refcount_dec_and_test(&flow->refcnt)) {
0581         mlx5e_tc_del_flow(priv, flow);
0582         kfree_rcu(flow, rcu_head);
0583     }
0584 }
0585 
0586 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
0587 {
0588     return flow_flag_test(flow, ESWITCH);
0589 }
0590 
0591 bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
0592 {
0593     return flow_flag_test(flow, FT);
0594 }
0595 
0596 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
0597 {
0598     return flow_flag_test(flow, OFFLOADED);
0599 }
0600 
0601 int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow)
0602 {
0603     return mlx5e_is_eswitch_flow(flow) ?
0604         MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
0605 }
0606 
0607 static struct mod_hdr_tbl *
0608 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
0609 {
0610     struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
0611 
0612     return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ?
0613         &esw->offloads.mod_hdr :
0614         &priv->fs->tc->mod_hdr;
0615 }
0616 
0617 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
0618                 struct mlx5e_tc_flow *flow,
0619                 struct mlx5e_tc_flow_parse_attr *parse_attr)
0620 {
0621     struct mlx5_modify_hdr *modify_hdr;
0622     struct mlx5e_mod_hdr_handle *mh;
0623 
0624     mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
0625                   mlx5e_get_flow_namespace(flow),
0626                   &parse_attr->mod_hdr_acts);
0627     if (IS_ERR(mh))
0628         return PTR_ERR(mh);
0629 
0630     modify_hdr = mlx5e_mod_hdr_get(mh);
0631     flow->attr->modify_hdr = modify_hdr;
0632     flow->mh = mh;
0633 
0634     return 0;
0635 }
0636 
0637 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
0638                  struct mlx5e_tc_flow *flow)
0639 {
0640     /* flow wasn't fully initialized */
0641     if (!flow->mh)
0642         return;
0643 
0644     mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
0645                  flow->mh);
0646     flow->mh = NULL;
0647 }
0648 
0649 static
0650 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
0651 {
0652     struct mlx5_core_dev *mdev;
0653     struct net_device *netdev;
0654     struct mlx5e_priv *priv;
0655 
0656     netdev = dev_get_by_index(net, ifindex);
0657     if (!netdev)
0658         return ERR_PTR(-ENODEV);
0659 
0660     priv = netdev_priv(netdev);
0661     mdev = priv->mdev;
0662     dev_put(netdev);
0663 
0664     /* Mirred tc action holds a refcount on the ifindex net_device (see
0665      * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
0666      * after dev_put(netdev), while we're in the context of adding a tc flow.
0667      *
0668      * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
0669      * stored in a hairpin object, which exists until all flows, that refer to it, get
0670      * removed.
0671      *
0672      * On the other hand, after a hairpin object has been created, the peer net_device may
0673      * be removed/unbound while there are still some hairpin flows that are using it. This
0674      * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
0675      * NETDEV_UNREGISTER event of the peer net_device.
0676      */
0677     return mdev;
0678 }
0679 
0680 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
0681 {
0682     struct mlx5e_tir_builder *builder;
0683     int err;
0684 
0685     builder = mlx5e_tir_builder_alloc(false);
0686     if (!builder)
0687         return -ENOMEM;
0688 
0689     err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
0690     if (err)
0691         goto out;
0692 
0693     mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]);
0694     err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false);
0695     if (err)
0696         goto create_tir_err;
0697 
0698 out:
0699     mlx5e_tir_builder_free(builder);
0700     return err;
0701 
0702 create_tir_err:
0703     mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
0704 
0705     goto out;
0706 }
0707 
0708 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
0709 {
0710     mlx5e_tir_destroy(&hp->direct_tir);
0711     mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
0712 }
0713 
0714 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
0715 {
0716     struct mlx5e_priv *priv = hp->func_priv;
0717     struct mlx5_core_dev *mdev = priv->mdev;
0718     struct mlx5e_rss_params_indir *indir;
0719     int err;
0720 
0721     indir = kvmalloc(sizeof(*indir), GFP_KERNEL);
0722     if (!indir)
0723         return -ENOMEM;
0724 
0725     mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels);
0726     err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels,
0727                    mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
0728                    indir);
0729 
0730     kvfree(indir);
0731     return err;
0732 }
0733 
0734 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
0735 {
0736     struct mlx5e_priv *priv = hp->func_priv;
0737     struct mlx5e_rss_params_hash rss_hash;
0738     enum mlx5_traffic_types tt, max_tt;
0739     struct mlx5e_tir_builder *builder;
0740     int err = 0;
0741 
0742     builder = mlx5e_tir_builder_alloc(false);
0743     if (!builder)
0744         return -ENOMEM;
0745 
0746     rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res);
0747 
0748     for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
0749         struct mlx5e_rss_params_traffic_type rss_tt;
0750 
0751         rss_tt = mlx5e_rss_get_default_tt_config(tt);
0752 
0753         mlx5e_tir_builder_build_rqt(builder, hp->tdn,
0754                         mlx5e_rqt_get_rqtn(&hp->indir_rqt),
0755                         false);
0756         mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false);
0757 
0758         err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false);
0759         if (err) {
0760             mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
0761             goto err_destroy_tirs;
0762         }
0763 
0764         mlx5e_tir_builder_clear(builder);
0765     }
0766 
0767 out:
0768     mlx5e_tir_builder_free(builder);
0769     return err;
0770 
0771 err_destroy_tirs:
0772     max_tt = tt;
0773     for (tt = 0; tt < max_tt; tt++)
0774         mlx5e_tir_destroy(&hp->indir_tir[tt]);
0775 
0776     goto out;
0777 }
0778 
0779 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
0780 {
0781     int tt;
0782 
0783     for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
0784         mlx5e_tir_destroy(&hp->indir_tir[tt]);
0785 }
0786 
0787 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
0788                      struct ttc_params *ttc_params)
0789 {
0790     struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
0791     int tt;
0792 
0793     memset(ttc_params, 0, sizeof(*ttc_params));
0794 
0795     ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev,
0796                          MLX5_FLOW_NAMESPACE_KERNEL);
0797     for (tt = 0; tt < MLX5_NUM_TT; tt++) {
0798         ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
0799         ttc_params->dests[tt].tir_num =
0800             tt == MLX5_TT_ANY ?
0801                 mlx5e_tir_get_tirn(&hp->direct_tir) :
0802                 mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
0803     }
0804 
0805     ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
0806     ft_attr->prio = MLX5E_TC_PRIO;
0807 }
0808 
0809 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
0810 {
0811     struct mlx5e_priv *priv = hp->func_priv;
0812     struct ttc_params ttc_params;
0813     int err;
0814 
0815     err = mlx5e_hairpin_create_indirect_rqt(hp);
0816     if (err)
0817         return err;
0818 
0819     err = mlx5e_hairpin_create_indirect_tirs(hp);
0820     if (err)
0821         goto err_create_indirect_tirs;
0822 
0823     mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
0824     hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
0825     if (IS_ERR(hp->ttc)) {
0826         err = PTR_ERR(hp->ttc);
0827         goto err_create_ttc_table;
0828     }
0829 
0830     netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
0831            hp->num_channels,
0832            mlx5_get_ttc_flow_table(priv->fs->ttc)->id);
0833 
0834     return 0;
0835 
0836 err_create_ttc_table:
0837     mlx5e_hairpin_destroy_indirect_tirs(hp);
0838 err_create_indirect_tirs:
0839     mlx5e_rqt_destroy(&hp->indir_rqt);
0840 
0841     return err;
0842 }
0843 
0844 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
0845 {
0846     mlx5_destroy_ttc_table(hp->ttc);
0847     mlx5e_hairpin_destroy_indirect_tirs(hp);
0848     mlx5e_rqt_destroy(&hp->indir_rqt);
0849 }
0850 
0851 static struct mlx5e_hairpin *
0852 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
0853              int peer_ifindex)
0854 {
0855     struct mlx5_core_dev *func_mdev, *peer_mdev;
0856     struct mlx5e_hairpin *hp;
0857     struct mlx5_hairpin *pair;
0858     int err;
0859 
0860     hp = kzalloc(sizeof(*hp), GFP_KERNEL);
0861     if (!hp)
0862         return ERR_PTR(-ENOMEM);
0863 
0864     func_mdev = priv->mdev;
0865     peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
0866     if (IS_ERR(peer_mdev)) {
0867         err = PTR_ERR(peer_mdev);
0868         goto create_pair_err;
0869     }
0870 
0871     pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
0872     if (IS_ERR(pair)) {
0873         err = PTR_ERR(pair);
0874         goto create_pair_err;
0875     }
0876     hp->pair = pair;
0877     hp->func_mdev = func_mdev;
0878     hp->func_priv = priv;
0879     hp->num_channels = params->num_channels;
0880 
0881     err = mlx5e_hairpin_create_transport(hp);
0882     if (err)
0883         goto create_transport_err;
0884 
0885     if (hp->num_channels > 1) {
0886         err = mlx5e_hairpin_rss_init(hp);
0887         if (err)
0888             goto rss_init_err;
0889     }
0890 
0891     return hp;
0892 
0893 rss_init_err:
0894     mlx5e_hairpin_destroy_transport(hp);
0895 create_transport_err:
0896     mlx5_core_hairpin_destroy(hp->pair);
0897 create_pair_err:
0898     kfree(hp);
0899     return ERR_PTR(err);
0900 }
0901 
0902 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
0903 {
0904     if (hp->num_channels > 1)
0905         mlx5e_hairpin_rss_cleanup(hp);
0906     mlx5e_hairpin_destroy_transport(hp);
0907     mlx5_core_hairpin_destroy(hp->pair);
0908     kvfree(hp);
0909 }
0910 
0911 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
0912 {
0913     return (peer_vhca_id << 16 | prio);
0914 }
0915 
0916 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
0917                              u16 peer_vhca_id, u8 prio)
0918 {
0919     struct mlx5e_hairpin_entry *hpe;
0920     u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
0921 
0922     hash_for_each_possible(priv->fs->tc->hairpin_tbl, hpe,
0923                    hairpin_hlist, hash_key) {
0924         if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
0925             refcount_inc(&hpe->refcnt);
0926             return hpe;
0927         }
0928     }
0929 
0930     return NULL;
0931 }
0932 
0933 static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
0934                   struct mlx5e_hairpin_entry *hpe)
0935 {
0936     /* no more hairpin flows for us, release the hairpin pair */
0937     if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs->tc->hairpin_tbl_lock))
0938         return;
0939     hash_del(&hpe->hairpin_hlist);
0940     mutex_unlock(&priv->fs->tc->hairpin_tbl_lock);
0941 
0942     if (!IS_ERR_OR_NULL(hpe->hp)) {
0943         netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
0944                dev_name(hpe->hp->pair->peer_mdev->device));
0945 
0946         mlx5e_hairpin_destroy(hpe->hp);
0947     }
0948 
0949     WARN_ON(!list_empty(&hpe->flows));
0950     kfree(hpe);
0951 }
0952 
0953 #define UNKNOWN_MATCH_PRIO 8
0954 
0955 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
0956                   struct mlx5_flow_spec *spec, u8 *match_prio,
0957                   struct netlink_ext_ack *extack)
0958 {
0959     void *headers_c, *headers_v;
0960     u8 prio_val, prio_mask = 0;
0961     bool vlan_present;
0962 
0963 #ifdef CONFIG_MLX5_CORE_EN_DCB
0964     if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
0965         NL_SET_ERR_MSG_MOD(extack,
0966                    "only PCP trust state supported for hairpin");
0967         return -EOPNOTSUPP;
0968     }
0969 #endif
0970     headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
0971     headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
0972 
0973     vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
0974     if (vlan_present) {
0975         prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
0976         prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
0977     }
0978 
0979     if (!vlan_present || !prio_mask) {
0980         prio_val = UNKNOWN_MATCH_PRIO;
0981     } else if (prio_mask != 0x7) {
0982         NL_SET_ERR_MSG_MOD(extack,
0983                    "masked priority match not supported for hairpin");
0984         return -EOPNOTSUPP;
0985     }
0986 
0987     *match_prio = prio_val;
0988     return 0;
0989 }
0990 
0991 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
0992                   struct mlx5e_tc_flow *flow,
0993                   struct mlx5e_tc_flow_parse_attr *parse_attr,
0994                   struct netlink_ext_ack *extack)
0995 {
0996     int peer_ifindex = parse_attr->mirred_ifindex[0];
0997     struct mlx5_hairpin_params params;
0998     struct mlx5_core_dev *peer_mdev;
0999     struct mlx5e_hairpin_entry *hpe;
1000     struct mlx5e_hairpin *hp;
1001     u64 link_speed64;
1002     u32 link_speed;
1003     u8 match_prio;
1004     u16 peer_id;
1005     int err;
1006 
1007     peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
1008     if (IS_ERR(peer_mdev)) {
1009         NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
1010         return PTR_ERR(peer_mdev);
1011     }
1012 
1013     if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
1014         NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
1015         return -EOPNOTSUPP;
1016     }
1017 
1018     peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
1019     err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
1020                      extack);
1021     if (err)
1022         return err;
1023 
1024     mutex_lock(&priv->fs->tc->hairpin_tbl_lock);
1025     hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
1026     if (hpe) {
1027         mutex_unlock(&priv->fs->tc->hairpin_tbl_lock);
1028         wait_for_completion(&hpe->res_ready);
1029 
1030         if (IS_ERR(hpe->hp)) {
1031             err = -EREMOTEIO;
1032             goto out_err;
1033         }
1034         goto attach_flow;
1035     }
1036 
1037     hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
1038     if (!hpe) {
1039         mutex_unlock(&priv->fs->tc->hairpin_tbl_lock);
1040         return -ENOMEM;
1041     }
1042 
1043     spin_lock_init(&hpe->flows_lock);
1044     INIT_LIST_HEAD(&hpe->flows);
1045     INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
1046     hpe->peer_vhca_id = peer_id;
1047     hpe->prio = match_prio;
1048     refcount_set(&hpe->refcnt, 1);
1049     init_completion(&hpe->res_ready);
1050 
1051     hash_add(priv->fs->tc->hairpin_tbl, &hpe->hairpin_hlist,
1052          hash_hairpin_info(peer_id, match_prio));
1053     mutex_unlock(&priv->fs->tc->hairpin_tbl_lock);
1054 
1055     params.log_data_size = 16;
1056     params.log_data_size = min_t(u8, params.log_data_size,
1057                      MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
1058     params.log_data_size = max_t(u8, params.log_data_size,
1059                      MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
1060 
1061     params.log_num_packets = params.log_data_size -
1062                  MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
1063     params.log_num_packets = min_t(u8, params.log_num_packets,
1064                        MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
1065 
1066     params.q_counter = priv->q_counter;
1067     /* set hairpin pair per each 50Gbs share of the link */
1068     mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
1069     link_speed = max_t(u32, link_speed, 50000);
1070     link_speed64 = link_speed;
1071     do_div(link_speed64, 50000);
1072     params.num_channels = link_speed64;
1073 
1074     hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
1075     hpe->hp = hp;
1076     complete_all(&hpe->res_ready);
1077     if (IS_ERR(hp)) {
1078         err = PTR_ERR(hp);
1079         goto out_err;
1080     }
1081 
1082     netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
1083            mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0],
1084            dev_name(hp->pair->peer_mdev->device),
1085            hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
1086 
1087 attach_flow:
1088     if (hpe->hp->num_channels > 1) {
1089         flow_flag_set(flow, HAIRPIN_RSS);
1090         flow->attr->nic_attr->hairpin_ft =
1091             mlx5_get_ttc_flow_table(hpe->hp->ttc);
1092     } else {
1093         flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir);
1094     }
1095 
1096     flow->hpe = hpe;
1097     spin_lock(&hpe->flows_lock);
1098     list_add(&flow->hairpin, &hpe->flows);
1099     spin_unlock(&hpe->flows_lock);
1100 
1101     return 0;
1102 
1103 out_err:
1104     mlx5e_hairpin_put(priv, hpe);
1105     return err;
1106 }
1107 
1108 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
1109                    struct mlx5e_tc_flow *flow)
1110 {
1111     /* flow wasn't fully initialized */
1112     if (!flow->hpe)
1113         return;
1114 
1115     spin_lock(&flow->hpe->flows_lock);
1116     list_del(&flow->hairpin);
1117     spin_unlock(&flow->hpe->flows_lock);
1118 
1119     mlx5e_hairpin_put(priv, flow->hpe);
1120     flow->hpe = NULL;
1121 }
1122 
1123 struct mlx5_flow_handle *
1124 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
1125                  struct mlx5_flow_spec *spec,
1126                  struct mlx5_flow_attr *attr)
1127 {
1128     struct mlx5_flow_context *flow_context = &spec->flow_context;
1129     struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
1130     struct mlx5e_tc_table *tc = priv->fs->tc;
1131     struct mlx5_flow_destination dest[2] = {};
1132     struct mlx5_fs_chains *nic_chains;
1133     struct mlx5_flow_act flow_act = {
1134         .action = attr->action,
1135         .flags    = FLOW_ACT_NO_APPEND,
1136     };
1137     struct mlx5_flow_handle *rule;
1138     struct mlx5_flow_table *ft;
1139     int dest_ix = 0;
1140 
1141     nic_chains = mlx5e_nic_chains(tc);
1142     flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
1143     flow_context->flow_tag = nic_attr->flow_tag;
1144 
1145     if (attr->dest_ft) {
1146         dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1147         dest[dest_ix].ft = attr->dest_ft;
1148         dest_ix++;
1149     } else if (nic_attr->hairpin_ft) {
1150         dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1151         dest[dest_ix].ft = nic_attr->hairpin_ft;
1152         dest_ix++;
1153     } else if (nic_attr->hairpin_tirn) {
1154         dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1155         dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
1156         dest_ix++;
1157     } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1158         dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1159         if (attr->dest_chain) {
1160             dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
1161                                  attr->dest_chain, 1,
1162                                  MLX5E_TC_FT_LEVEL);
1163             if (IS_ERR(dest[dest_ix].ft))
1164                 return ERR_CAST(dest[dest_ix].ft);
1165         } else {
1166             dest[dest_ix].ft = mlx5e_vlan_get_flowtable(priv->fs->vlan);
1167         }
1168         dest_ix++;
1169     }
1170 
1171     if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1172         MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
1173         flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1174 
1175     if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1176         dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1177         dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
1178         dest_ix++;
1179     }
1180 
1181     if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1182         flow_act.modify_hdr = attr->modify_hdr;
1183 
1184     mutex_lock(&tc->t_lock);
1185     if (IS_ERR_OR_NULL(tc->t)) {
1186         /* Create the root table here if doesn't exist yet */
1187         tc->t =
1188             mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
1189 
1190         if (IS_ERR(tc->t)) {
1191             mutex_unlock(&tc->t_lock);
1192             netdev_err(priv->netdev,
1193                    "Failed to create tc offload table\n");
1194             rule = ERR_CAST(priv->fs->tc->t);
1195             goto err_ft_get;
1196         }
1197     }
1198     mutex_unlock(&tc->t_lock);
1199 
1200     if (attr->chain || attr->prio)
1201         ft = mlx5_chains_get_table(nic_chains,
1202                        attr->chain, attr->prio,
1203                        MLX5E_TC_FT_LEVEL);
1204     else
1205         ft = attr->ft;
1206 
1207     if (IS_ERR(ft)) {
1208         rule = ERR_CAST(ft);
1209         goto err_ft_get;
1210     }
1211 
1212     if (attr->outer_match_level != MLX5_MATCH_NONE)
1213         spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1214 
1215     rule = mlx5_add_flow_rules(ft, spec,
1216                    &flow_act, dest, dest_ix);
1217     if (IS_ERR(rule))
1218         goto err_rule;
1219 
1220     return rule;
1221 
1222 err_rule:
1223     if (attr->chain || attr->prio)
1224         mlx5_chains_put_table(nic_chains,
1225                       attr->chain, attr->prio,
1226                       MLX5E_TC_FT_LEVEL);
1227 err_ft_get:
1228     if (attr->dest_chain)
1229         mlx5_chains_put_table(nic_chains,
1230                       attr->dest_chain, 1,
1231                       MLX5E_TC_FT_LEVEL);
1232 
1233     return ERR_CAST(rule);
1234 }
1235 
1236 static int
1237 alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev,
1238             struct mlx5_flow_attr *attr)
1239 
1240 {
1241     struct mlx5_fc *counter;
1242 
1243     counter = mlx5_fc_create(counter_dev, true);
1244     if (IS_ERR(counter))
1245         return PTR_ERR(counter);
1246 
1247     attr->counter = counter;
1248     return 0;
1249 }
1250 
1251 static int
1252 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1253               struct mlx5e_tc_flow *flow,
1254               struct netlink_ext_ack *extack)
1255 {
1256     struct mlx5e_tc_flow_parse_attr *parse_attr;
1257     struct mlx5_flow_attr *attr = flow->attr;
1258     struct mlx5_core_dev *dev = priv->mdev;
1259     int err;
1260 
1261     parse_attr = attr->parse_attr;
1262 
1263     if (flow_flag_test(flow, HAIRPIN)) {
1264         err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1265         if (err)
1266             return err;
1267     }
1268 
1269     if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1270         err = alloc_flow_attr_counter(dev, attr);
1271         if (err)
1272             return err;
1273     }
1274 
1275     if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1276         err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1277         mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
1278         if (err)
1279             return err;
1280     }
1281 
1282     if (attr->flags & MLX5_ATTR_FLAG_CT)
1283         flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec,
1284                             attr, &parse_attr->mod_hdr_acts);
1285     else
1286         flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
1287                                  attr);
1288 
1289     return PTR_ERR_OR_ZERO(flow->rule[0]);
1290 }
1291 
1292 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
1293                   struct mlx5_flow_handle *rule,
1294                   struct mlx5_flow_attr *attr)
1295 {
1296     struct mlx5_fs_chains *nic_chains = mlx5e_nic_chains(priv->fs->tc);
1297 
1298     mlx5_del_flow_rules(rule);
1299 
1300     if (attr->chain || attr->prio)
1301         mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
1302                       MLX5E_TC_FT_LEVEL);
1303 
1304     if (attr->dest_chain)
1305         mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
1306                       MLX5E_TC_FT_LEVEL);
1307 }
1308 
1309 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1310                   struct mlx5e_tc_flow *flow)
1311 {
1312     struct mlx5_flow_attr *attr = flow->attr;
1313     struct mlx5e_tc_table *tc = priv->fs->tc;
1314 
1315     flow_flag_clear(flow, OFFLOADED);
1316 
1317     if (attr->flags & MLX5_ATTR_FLAG_CT)
1318         mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
1319     else if (!IS_ERR_OR_NULL(flow->rule[0]))
1320         mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
1321 
1322     /* Remove root table if no rules are left to avoid
1323      * extra steering hops.
1324      */
1325     mutex_lock(&priv->fs->tc->t_lock);
1326     if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
1327         !IS_ERR_OR_NULL(tc->t)) {
1328         mlx5_chains_put_table(mlx5e_nic_chains(tc), 0, 1, MLX5E_TC_FT_LEVEL);
1329         priv->fs->tc->t = NULL;
1330     }
1331     mutex_unlock(&priv->fs->tc->t_lock);
1332 
1333     if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1334         mlx5e_detach_mod_hdr(priv, flow);
1335 
1336     if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1337         mlx5_fc_destroy(priv->mdev, attr->counter);
1338 
1339     if (flow_flag_test(flow, HAIRPIN))
1340         mlx5e_hairpin_flow_del(priv, flow);
1341 
1342     free_flow_post_acts(flow);
1343 
1344     kvfree(attr->parse_attr);
1345     kfree(flow->attr);
1346 }
1347 
1348 struct mlx5_flow_handle *
1349 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1350                struct mlx5e_tc_flow *flow,
1351                struct mlx5_flow_spec *spec,
1352                struct mlx5_flow_attr *attr)
1353 {
1354     struct mlx5_flow_handle *rule;
1355 
1356     if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
1357         return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1358 
1359     rule = mlx5e_tc_rule_offload(flow->priv, spec, attr);
1360 
1361     if (IS_ERR(rule))
1362         return rule;
1363 
1364     if (attr->esw_attr->split_count) {
1365         flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1366         if (IS_ERR(flow->rule[1]))
1367             goto err_rule1;
1368     }
1369 
1370     return rule;
1371 
1372 err_rule1:
1373     mlx5e_tc_rule_unoffload(flow->priv, rule, attr);
1374     return flow->rule[1];
1375 }
1376 
1377 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1378                   struct mlx5e_tc_flow *flow,
1379                   struct mlx5_flow_attr *attr)
1380 {
1381     flow_flag_clear(flow, OFFLOADED);
1382 
1383     if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
1384         return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1385 
1386     if (attr->esw_attr->split_count)
1387         mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1388 
1389     mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr);
1390 }
1391 
1392 struct mlx5_flow_handle *
1393 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1394                   struct mlx5e_tc_flow *flow,
1395                   struct mlx5_flow_spec *spec)
1396 {
1397     struct mlx5_flow_attr *slow_attr;
1398     struct mlx5_flow_handle *rule;
1399 
1400     slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1401     if (!slow_attr)
1402         return ERR_PTR(-ENOMEM);
1403 
1404     memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1405     slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1406     slow_attr->esw_attr->split_count = 0;
1407     slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
1408 
1409     rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1410     if (!IS_ERR(rule))
1411         flow_flag_set(flow, SLOW);
1412 
1413     kfree(slow_attr);
1414 
1415     return rule;
1416 }
1417 
1418 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1419                        struct mlx5e_tc_flow *flow)
1420 {
1421     struct mlx5_flow_attr *slow_attr;
1422 
1423     slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1424     if (!slow_attr) {
1425         mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
1426         return;
1427     }
1428 
1429     memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1430     slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1431     slow_attr->esw_attr->split_count = 0;
1432     slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
1433     mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1434     flow_flag_clear(flow, SLOW);
1435     kfree(slow_attr);
1436 }
1437 
1438 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1439  * function.
1440  */
1441 static void unready_flow_add(struct mlx5e_tc_flow *flow,
1442                  struct list_head *unready_flows)
1443 {
1444     flow_flag_set(flow, NOT_READY);
1445     list_add_tail(&flow->unready, unready_flows);
1446 }
1447 
1448 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1449  * function.
1450  */
1451 static void unready_flow_del(struct mlx5e_tc_flow *flow)
1452 {
1453     list_del(&flow->unready);
1454     flow_flag_clear(flow, NOT_READY);
1455 }
1456 
1457 static void add_unready_flow(struct mlx5e_tc_flow *flow)
1458 {
1459     struct mlx5_rep_uplink_priv *uplink_priv;
1460     struct mlx5e_rep_priv *rpriv;
1461     struct mlx5_eswitch *esw;
1462 
1463     esw = flow->priv->mdev->priv.eswitch;
1464     rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1465     uplink_priv = &rpriv->uplink_priv;
1466 
1467     mutex_lock(&uplink_priv->unready_flows_lock);
1468     unready_flow_add(flow, &uplink_priv->unready_flows);
1469     mutex_unlock(&uplink_priv->unready_flows_lock);
1470 }
1471 
1472 static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1473 {
1474     struct mlx5_rep_uplink_priv *uplink_priv;
1475     struct mlx5e_rep_priv *rpriv;
1476     struct mlx5_eswitch *esw;
1477 
1478     esw = flow->priv->mdev->priv.eswitch;
1479     rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1480     uplink_priv = &rpriv->uplink_priv;
1481 
1482     mutex_lock(&uplink_priv->unready_flows_lock);
1483     unready_flow_del(flow);
1484     mutex_unlock(&uplink_priv->unready_flows_lock);
1485 }
1486 
1487 bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
1488 {
1489     struct mlx5_core_dev *out_mdev, *route_mdev;
1490     struct mlx5e_priv *out_priv, *route_priv;
1491 
1492     out_priv = netdev_priv(out_dev);
1493     out_mdev = out_priv->mdev;
1494     route_priv = netdev_priv(route_dev);
1495     route_mdev = route_priv->mdev;
1496 
1497     if (out_mdev->coredev_type != MLX5_COREDEV_PF ||
1498         route_mdev->coredev_type != MLX5_COREDEV_VF)
1499         return false;
1500 
1501     return mlx5e_same_hw_devs(out_priv, route_priv);
1502 }
1503 
1504 int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
1505 {
1506     struct mlx5e_priv *out_priv, *route_priv;
1507     struct mlx5_devcom *devcom = NULL;
1508     struct mlx5_core_dev *route_mdev;
1509     struct mlx5_eswitch *esw;
1510     u16 vhca_id;
1511     int err;
1512 
1513     out_priv = netdev_priv(out_dev);
1514     esw = out_priv->mdev->priv.eswitch;
1515     route_priv = netdev_priv(route_dev);
1516     route_mdev = route_priv->mdev;
1517 
1518     vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
1519     if (mlx5_lag_is_active(out_priv->mdev)) {
1520         /* In lag case we may get devices from different eswitch instances.
1521          * If we failed to get vport num, it means, mostly, that we on the wrong
1522          * eswitch.
1523          */
1524         err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1525         if (err != -ENOENT)
1526             return err;
1527 
1528         devcom = out_priv->mdev->priv.devcom;
1529         esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1530         if (!esw)
1531             return -ENODEV;
1532     }
1533 
1534     err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1535     if (devcom)
1536         mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1537     return err;
1538 }
1539 
1540 int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
1541                   struct mlx5e_tc_flow *flow,
1542                   struct mlx5_flow_attr *attr)
1543 {
1544     struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1545     struct mlx5_modify_hdr *mod_hdr;
1546 
1547     mod_hdr = mlx5_modify_header_alloc(priv->mdev,
1548                        mlx5e_get_flow_namespace(flow),
1549                        mod_hdr_acts->num_actions,
1550                        mod_hdr_acts->actions);
1551     if (IS_ERR(mod_hdr))
1552         return PTR_ERR(mod_hdr);
1553 
1554     WARN_ON(attr->modify_hdr);
1555     attr->modify_hdr = mod_hdr;
1556 
1557     return 0;
1558 }
1559 
1560 static int
1561 set_encap_dests(struct mlx5e_priv *priv,
1562         struct mlx5e_tc_flow *flow,
1563         struct mlx5_flow_attr *attr,
1564         struct netlink_ext_ack *extack,
1565         bool *encap_valid,
1566         bool *vf_tun)
1567 {
1568     struct mlx5e_tc_flow_parse_attr *parse_attr;
1569     struct mlx5_esw_flow_attr *esw_attr;
1570     struct net_device *encap_dev = NULL;
1571     struct mlx5e_rep_priv *rpriv;
1572     struct mlx5e_priv *out_priv;
1573     int out_index;
1574     int err = 0;
1575 
1576     if (!mlx5e_is_eswitch_flow(flow))
1577         return 0;
1578 
1579     parse_attr = attr->parse_attr;
1580     esw_attr = attr->esw_attr;
1581     *vf_tun = false;
1582     *encap_valid = true;
1583 
1584     for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1585         struct net_device *out_dev;
1586         int mirred_ifindex;
1587 
1588         if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1589             continue;
1590 
1591         mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1592         out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1593         if (!out_dev) {
1594             NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1595             err = -ENODEV;
1596             goto out;
1597         }
1598         err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
1599                      extack, &encap_dev, encap_valid);
1600         dev_put(out_dev);
1601         if (err)
1602             goto out;
1603 
1604         if (esw_attr->dests[out_index].flags &
1605             MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1606             !esw_attr->dest_int_port)
1607             *vf_tun = true;
1608 
1609         out_priv = netdev_priv(encap_dev);
1610         rpriv = out_priv->ppriv;
1611         esw_attr->dests[out_index].rep = rpriv->rep;
1612         esw_attr->dests[out_index].mdev = out_priv->mdev;
1613     }
1614 
1615     if (*vf_tun && esw_attr->out_count > 1) {
1616         NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1617         err = -EOPNOTSUPP;
1618         goto out;
1619     }
1620 
1621 out:
1622     return err;
1623 }
1624 
1625 static void
1626 clean_encap_dests(struct mlx5e_priv *priv,
1627           struct mlx5e_tc_flow *flow,
1628           struct mlx5_flow_attr *attr,
1629           bool *vf_tun)
1630 {
1631     struct mlx5_esw_flow_attr *esw_attr;
1632     int out_index;
1633 
1634     if (!mlx5e_is_eswitch_flow(flow))
1635         return;
1636 
1637     esw_attr = attr->esw_attr;
1638     *vf_tun = false;
1639 
1640     for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1641         if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1642             continue;
1643 
1644         if (esw_attr->dests[out_index].flags &
1645             MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1646             !esw_attr->dest_int_port)
1647             *vf_tun = true;
1648 
1649         mlx5e_detach_encap(priv, flow, attr, out_index);
1650         kfree(attr->parse_attr->tun_info[out_index]);
1651     }
1652 }
1653 
1654 static int
1655 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1656               struct mlx5e_tc_flow *flow,
1657               struct netlink_ext_ack *extack)
1658 {
1659     struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1660     struct mlx5e_tc_flow_parse_attr *parse_attr;
1661     struct mlx5_flow_attr *attr = flow->attr;
1662     struct mlx5_esw_flow_attr *esw_attr;
1663     bool vf_tun, encap_valid;
1664     u32 max_prio, max_chain;
1665     int err = 0;
1666 
1667     parse_attr = attr->parse_attr;
1668     esw_attr = attr->esw_attr;
1669 
1670     /* We check chain range only for tc flows.
1671      * For ft flows, we checked attr->chain was originally 0 and set it to
1672      * FDB_FT_CHAIN which is outside tc range.
1673      * See mlx5e_rep_setup_ft_cb().
1674      */
1675     max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
1676     if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1677         NL_SET_ERR_MSG_MOD(extack,
1678                    "Requested chain is out of supported range");
1679         err = -EOPNOTSUPP;
1680         goto err_out;
1681     }
1682 
1683     max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
1684     if (attr->prio > max_prio) {
1685         NL_SET_ERR_MSG_MOD(extack,
1686                    "Requested priority is out of supported range");
1687         err = -EOPNOTSUPP;
1688         goto err_out;
1689     }
1690 
1691     if (flow_flag_test(flow, TUN_RX)) {
1692         err = mlx5e_attach_decap_route(priv, flow);
1693         if (err)
1694             goto err_out;
1695 
1696         if (!attr->chain && esw_attr->int_port &&
1697             attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1698             /* If decap route device is internal port, change the
1699              * source vport value in reg_c0 back to uplink just in
1700              * case the rule performs goto chain > 0. If we have a miss
1701              * on chain > 0 we want the metadata regs to hold the
1702              * chain id so SW will resume handling of this packet
1703              * from the proper chain.
1704              */
1705             u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw,
1706                                     esw_attr->in_rep->vport);
1707 
1708             err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
1709                             MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
1710                             metadata);
1711             if (err)
1712                 goto err_out;
1713 
1714             attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1715         }
1716     }
1717 
1718     if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
1719         err = mlx5e_attach_decap(priv, flow, extack);
1720         if (err)
1721             goto err_out;
1722     }
1723 
1724     if (netif_is_ovs_master(parse_attr->filter_dev)) {
1725         struct mlx5e_tc_int_port *int_port;
1726 
1727         if (attr->chain) {
1728             NL_SET_ERR_MSG_MOD(extack,
1729                        "Internal port rule is only supported on chain 0");
1730             err = -EOPNOTSUPP;
1731             goto err_out;
1732         }
1733 
1734         if (attr->dest_chain) {
1735             NL_SET_ERR_MSG_MOD(extack,
1736                        "Internal port rule offload doesn't support goto action");
1737             err = -EOPNOTSUPP;
1738             goto err_out;
1739         }
1740 
1741         int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
1742                          parse_attr->filter_dev->ifindex,
1743                          flow_flag_test(flow, EGRESS) ?
1744                          MLX5E_TC_INT_PORT_EGRESS :
1745                          MLX5E_TC_INT_PORT_INGRESS);
1746         if (IS_ERR(int_port)) {
1747             err = PTR_ERR(int_port);
1748             goto err_out;
1749         }
1750 
1751         esw_attr->int_port = int_port;
1752     }
1753 
1754     err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun);
1755     if (err)
1756         goto err_out;
1757 
1758     err = mlx5_eswitch_add_vlan_action(esw, attr);
1759     if (err)
1760         goto err_out;
1761 
1762     if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1763         if (vf_tun) {
1764             err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
1765             if (err)
1766                 goto err_out;
1767         } else {
1768             err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1769             if (err)
1770                 goto err_out;
1771         }
1772     }
1773 
1774     if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1775         err = alloc_flow_attr_counter(esw_attr->counter_dev, attr);
1776         if (err)
1777             goto err_out;
1778     }
1779 
1780     /* we get here if one of the following takes place:
1781      * (1) there's no error
1782      * (2) there's an encap action and we don't have valid neigh
1783      */
1784     if (!encap_valid || flow_flag_test(flow, SLOW))
1785         flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
1786     else
1787         flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
1788 
1789     if (IS_ERR(flow->rule[0])) {
1790         err = PTR_ERR(flow->rule[0]);
1791         goto err_out;
1792     }
1793     flow_flag_set(flow, OFFLOADED);
1794 
1795     return 0;
1796 
1797 err_out:
1798     flow_flag_set(flow, FAILED);
1799     return err;
1800 }
1801 
1802 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1803 {
1804     struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
1805     void *headers_v = MLX5_ADDR_OF(fte_match_param,
1806                        spec->match_value,
1807                        misc_parameters_3);
1808     u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1809                          headers_v,
1810                          geneve_tlv_option_0_data);
1811 
1812     return !!geneve_tlv_opt_0_data;
1813 }
1814 
1815 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
1816                   struct mlx5e_tc_flow *flow)
1817 {
1818     struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1819     struct mlx5_flow_attr *attr = flow->attr;
1820     struct mlx5_esw_flow_attr *esw_attr;
1821     bool vf_tun;
1822 
1823     esw_attr = attr->esw_attr;
1824     mlx5e_put_flow_tunnel_id(flow);
1825 
1826     if (flow_flag_test(flow, NOT_READY))
1827         remove_unready_flow(flow);
1828 
1829     if (mlx5e_is_offloaded_flow(flow)) {
1830         if (flow_flag_test(flow, SLOW))
1831             mlx5e_tc_unoffload_from_slow_path(esw, flow);
1832         else
1833             mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1834     }
1835     complete_all(&flow->del_hw_done);
1836 
1837     if (mlx5_flow_has_geneve_opt(flow))
1838         mlx5_geneve_tlv_option_del(priv->mdev->geneve);
1839 
1840     mlx5_eswitch_del_vlan_action(esw, attr);
1841 
1842     if (flow->decap_route)
1843         mlx5e_detach_decap_route(priv, flow);
1844 
1845     clean_encap_dests(priv, flow, attr, &vf_tun);
1846 
1847     mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
1848 
1849     if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1850         mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
1851         if (vf_tun && attr->modify_hdr)
1852             mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
1853         else
1854             mlx5e_detach_mod_hdr(priv, flow);
1855     }
1856 
1857     if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1858         mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
1859 
1860     if (esw_attr->int_port)
1861         mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port);
1862 
1863     if (esw_attr->dest_int_port)
1864         mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port);
1865 
1866     if (flow_flag_test(flow, L3_TO_L2_DECAP))
1867         mlx5e_detach_decap(priv, flow);
1868 
1869     free_flow_post_acts(flow);
1870 
1871     if (flow->attr->lag.count)
1872         mlx5_lag_del_mpesw_rule(esw->dev);
1873 
1874     kvfree(attr->esw_attr->rx_tun_attr);
1875     kvfree(attr->parse_attr);
1876     kfree(flow->attr);
1877 }
1878 
1879 struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1880 {
1881     struct mlx5_flow_attr *attr;
1882 
1883     attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list);
1884     return attr->counter;
1885 }
1886 
1887 /* Iterate over tmp_list of flows attached to flow_list head. */
1888 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
1889 {
1890     struct mlx5e_tc_flow *flow, *tmp;
1891 
1892     list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
1893         mlx5e_flow_put(priv, flow);
1894 }
1895 
1896 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1897 {
1898     struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1899 
1900     if (!flow_flag_test(flow, ESWITCH) ||
1901         !flow_flag_test(flow, DUP))
1902         return;
1903 
1904     mutex_lock(&esw->offloads.peer_mutex);
1905     list_del(&flow->peer);
1906     mutex_unlock(&esw->offloads.peer_mutex);
1907 
1908     flow_flag_clear(flow, DUP);
1909 
1910     if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
1911         mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1912         kfree(flow->peer_flow);
1913     }
1914 
1915     flow->peer_flow = NULL;
1916 }
1917 
1918 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1919 {
1920     struct mlx5_core_dev *dev = flow->priv->mdev;
1921     struct mlx5_devcom *devcom = dev->priv.devcom;
1922     struct mlx5_eswitch *peer_esw;
1923 
1924     peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1925     if (!peer_esw)
1926         return;
1927 
1928     __mlx5e_tc_del_fdb_peer_flow(flow);
1929     mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1930 }
1931 
1932 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
1933                   struct mlx5e_tc_flow *flow)
1934 {
1935     if (mlx5e_is_eswitch_flow(flow)) {
1936         mlx5e_tc_del_fdb_peer_flow(flow);
1937         mlx5e_tc_del_fdb_flow(priv, flow);
1938     } else {
1939         mlx5e_tc_del_nic_flow(priv, flow);
1940     }
1941 }
1942 
1943 static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f)
1944 {
1945     struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1946     struct flow_action *flow_action = &rule->action;
1947     const struct flow_action_entry *act;
1948     int i;
1949 
1950     if (chain)
1951         return false;
1952 
1953     flow_action_for_each(i, act, flow_action) {
1954         switch (act->id) {
1955         case FLOW_ACTION_GOTO:
1956             return true;
1957         case FLOW_ACTION_SAMPLE:
1958             return true;
1959         default:
1960             continue;
1961         }
1962     }
1963 
1964     return false;
1965 }
1966 
1967 static int
1968 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
1969                     struct flow_dissector_key_enc_opts *opts,
1970                     struct netlink_ext_ack *extack,
1971                     bool *dont_care)
1972 {
1973     struct geneve_opt *opt;
1974     int off = 0;
1975 
1976     *dont_care = true;
1977 
1978     while (opts->len > off) {
1979         opt = (struct geneve_opt *)&opts->data[off];
1980 
1981         if (!(*dont_care) || opt->opt_class || opt->type ||
1982             memchr_inv(opt->opt_data, 0, opt->length * 4)) {
1983             *dont_care = false;
1984 
1985             if (opt->opt_class != htons(U16_MAX) ||
1986                 opt->type != U8_MAX) {
1987                 NL_SET_ERR_MSG_MOD(extack,
1988                            "Partial match of tunnel options in chain > 0 isn't supported");
1989                 netdev_warn(priv->netdev,
1990                         "Partial match of tunnel options in chain > 0 isn't supported");
1991                 return -EOPNOTSUPP;
1992             }
1993         }
1994 
1995         off += sizeof(struct geneve_opt) + opt->length * 4;
1996     }
1997 
1998     return 0;
1999 }
2000 
2001 #define COPY_DISSECTOR(rule, diss_key, dst)\
2002 ({ \
2003     struct flow_rule *__rule = (rule);\
2004     typeof(dst) __dst = dst;\
2005 \
2006     memcpy(__dst,\
2007            skb_flow_dissector_target(__rule->match.dissector,\
2008                      diss_key,\
2009                      __rule->match.key),\
2010            sizeof(*__dst));\
2011 })
2012 
2013 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
2014                     struct mlx5e_tc_flow *flow,
2015                     struct flow_cls_offload *f,
2016                     struct net_device *filter_dev)
2017 {
2018     struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2019     struct netlink_ext_ack *extack = f->common.extack;
2020     struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
2021     struct flow_match_enc_opts enc_opts_match;
2022     struct tunnel_match_enc_opts tun_enc_opts;
2023     struct mlx5_rep_uplink_priv *uplink_priv;
2024     struct mlx5_flow_attr *attr = flow->attr;
2025     struct mlx5e_rep_priv *uplink_rpriv;
2026     struct tunnel_match_key tunnel_key;
2027     bool enc_opts_is_dont_care = true;
2028     u32 tun_id, enc_opts_id = 0;
2029     struct mlx5_eswitch *esw;
2030     u32 value, mask;
2031     int err;
2032 
2033     esw = priv->mdev->priv.eswitch;
2034     uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2035     uplink_priv = &uplink_rpriv->uplink_priv;
2036 
2037     memset(&tunnel_key, 0, sizeof(tunnel_key));
2038     COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
2039                &tunnel_key.enc_control);
2040     if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
2041         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
2042                    &tunnel_key.enc_ipv4);
2043     else
2044         COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
2045                    &tunnel_key.enc_ipv6);
2046     COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
2047     COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
2048                &tunnel_key.enc_tp);
2049     COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
2050                &tunnel_key.enc_key_id);
2051     tunnel_key.filter_ifindex = filter_dev->ifindex;
2052 
2053     err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
2054     if (err)
2055         return err;
2056 
2057     flow_rule_match_enc_opts(rule, &enc_opts_match);
2058     err = enc_opts_is_dont_care_or_full_match(priv,
2059                           enc_opts_match.mask,
2060                           extack,
2061                           &enc_opts_is_dont_care);
2062     if (err)
2063         goto err_enc_opts;
2064 
2065     if (!enc_opts_is_dont_care) {
2066         memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
2067         memcpy(&tun_enc_opts.key, enc_opts_match.key,
2068                sizeof(*enc_opts_match.key));
2069         memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
2070                sizeof(*enc_opts_match.mask));
2071 
2072         err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
2073                   &tun_enc_opts, &enc_opts_id);
2074         if (err)
2075             goto err_enc_opts;
2076     }
2077 
2078     value = tun_id << ENC_OPTS_BITS | enc_opts_id;
2079     mask = enc_opts_id ? TUNNEL_ID_MASK :
2080                  (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
2081 
2082     if (attr->chain) {
2083         mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
2084                         TUNNEL_TO_REG, value, mask);
2085     } else {
2086         mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
2087         err = mlx5e_tc_match_to_reg_set(priv->mdev,
2088                         mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
2089                         TUNNEL_TO_REG, value);
2090         if (err)
2091             goto err_set;
2092 
2093         attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2094     }
2095 
2096     flow->attr->tunnel_id = value;
2097     return 0;
2098 
2099 err_set:
2100     if (enc_opts_id)
2101         mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2102                    enc_opts_id);
2103 err_enc_opts:
2104     mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2105     return err;
2106 }
2107 
2108 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
2109 {
2110     u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK;
2111     u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS;
2112     struct mlx5_rep_uplink_priv *uplink_priv;
2113     struct mlx5e_rep_priv *uplink_rpriv;
2114     struct mlx5_eswitch *esw;
2115 
2116     esw = flow->priv->mdev->priv.eswitch;
2117     uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2118     uplink_priv = &uplink_rpriv->uplink_priv;
2119 
2120     if (tun_id)
2121         mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2122     if (enc_opts_id)
2123         mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2124                    enc_opts_id);
2125 }
2126 
2127 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
2128                 struct flow_match_basic *match, bool outer,
2129                 void *headers_c, void *headers_v)
2130 {
2131     bool ip_version_cap;
2132 
2133     ip_version_cap = outer ?
2134         MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2135                       ft_field_support.outer_ip_version) :
2136         MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2137                       ft_field_support.inner_ip_version);
2138 
2139     if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
2140         (match->key->n_proto == htons(ETH_P_IP) ||
2141          match->key->n_proto == htons(ETH_P_IPV6))) {
2142         MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
2143         MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
2144              match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
2145     } else {
2146         MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
2147              ntohs(match->mask->n_proto));
2148         MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
2149              ntohs(match->key->n_proto));
2150     }
2151 }
2152 
2153 u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
2154 {
2155     void *headers_v;
2156     u16 ethertype;
2157     u8 ip_version;
2158 
2159     if (outer)
2160         headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
2161     else
2162         headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
2163 
2164     ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
2165     /* Return ip_version converted from ethertype anyway */
2166     if (!ip_version) {
2167         ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
2168         if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
2169             ip_version = 4;
2170         else if (ethertype == ETH_P_IPV6)
2171             ip_version = 6;
2172     }
2173     return ip_version;
2174 }
2175 
2176 /* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
2177  * And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
2178  *      +---------+----------------------------------------+
2179  *      |Arriving |         Arriving Outer Header          |
2180  *      |   Inner +---------+---------+---------+----------+
2181  *      |  Header | Not-ECT | ECT(0)  | ECT(1)  |   CE     |
2182  *      +---------+---------+---------+---------+----------+
2183  *      | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop>   |
2184  *      |  ECT(0) |  ECT(0) | ECT(0)  | ECT(1)  |   CE*    |
2185  *      |  ECT(1) |  ECT(1) | ECT(1)  | ECT(1)* |   CE*    |
2186  *      |    CE   |   CE    |  CE     | CE      |   CE     |
2187  *      +---------+---------+---------+---------+----------+
2188  *
2189  * Tc matches on inner after decapsulation on tunnel device, but hw offload matches
2190  * the inner ip_ecn value before hardware decap action.
2191  *
2192  * Cells marked are changed from original inner packet ip_ecn value during decap, and
2193  * so matching those values on inner ip_ecn before decap will fail.
2194  *
2195  * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
2196  * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
2197  * and such we can drop the inner ip_ecn=CE match.
2198  */
2199 
2200 static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv,
2201                       struct flow_cls_offload *f,
2202                       bool *match_inner_ecn)
2203 {
2204     u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0;
2205     struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2206     struct netlink_ext_ack *extack = f->common.extack;
2207     struct flow_match_ip match;
2208 
2209     *match_inner_ecn = true;
2210 
2211     if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
2212         flow_rule_match_enc_ip(rule, &match);
2213         outer_ecn_key = match.key->tos & INET_ECN_MASK;
2214         outer_ecn_mask = match.mask->tos & INET_ECN_MASK;
2215     }
2216 
2217     if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2218         flow_rule_match_ip(rule, &match);
2219         inner_ecn_key = match.key->tos & INET_ECN_MASK;
2220         inner_ecn_mask = match.mask->tos & INET_ECN_MASK;
2221     }
2222 
2223     if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) {
2224         NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported");
2225         netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported");
2226         return -EOPNOTSUPP;
2227     }
2228 
2229     if (!outer_ecn_mask) {
2230         if (!inner_ecn_mask)
2231             return 0;
2232 
2233         NL_SET_ERR_MSG_MOD(extack,
2234                    "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2235         netdev_warn(priv->netdev,
2236                 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2237         return -EOPNOTSUPP;
2238     }
2239 
2240     if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) {
2241         NL_SET_ERR_MSG_MOD(extack,
2242                    "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2243         netdev_warn(priv->netdev,
2244                 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2245         return -EOPNOTSUPP;
2246     }
2247 
2248     if (!inner_ecn_mask)
2249         return 0;
2250 
2251     /* Both inner and outer have full mask on ecn */
2252 
2253     if (outer_ecn_key == INET_ECN_ECT_1) {
2254         /* inner ecn might change by DECAP action */
2255 
2256         NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported");
2257         netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported");
2258         return -EOPNOTSUPP;
2259     }
2260 
2261     if (outer_ecn_key != INET_ECN_CE)
2262         return 0;
2263 
2264     if (inner_ecn_key != INET_ECN_CE) {
2265         /* Can't happen in software, as packet ecn will be changed to CE after decap */
2266         NL_SET_ERR_MSG_MOD(extack,
2267                    "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2268         netdev_warn(priv->netdev,
2269                 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2270         return -EOPNOTSUPP;
2271     }
2272 
2273     /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
2274      * drop match on inner ecn
2275      */
2276     *match_inner_ecn = false;
2277 
2278     return 0;
2279 }
2280 
2281 static int parse_tunnel_attr(struct mlx5e_priv *priv,
2282                  struct mlx5e_tc_flow *flow,
2283                  struct mlx5_flow_spec *spec,
2284                  struct flow_cls_offload *f,
2285                  struct net_device *filter_dev,
2286                  u8 *match_level,
2287                  bool *match_inner)
2288 {
2289     struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
2290     struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2291     struct netlink_ext_ack *extack = f->common.extack;
2292     bool needs_mapping, sets_mapping;
2293     int err;
2294 
2295     if (!mlx5e_is_eswitch_flow(flow)) {
2296         NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported");
2297         return -EOPNOTSUPP;
2298     }
2299 
2300     needs_mapping = !!flow->attr->chain;
2301     sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f);
2302     *match_inner = !needs_mapping;
2303 
2304     if ((needs_mapping || sets_mapping) &&
2305         !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2306         NL_SET_ERR_MSG_MOD(extack,
2307                    "Chains on tunnel devices isn't supported without register loopback support");
2308         netdev_warn(priv->netdev,
2309                 "Chains on tunnel devices isn't supported without register loopback support");
2310         return -EOPNOTSUPP;
2311     }
2312 
2313     if (!flow->attr->chain) {
2314         err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
2315                      match_level);
2316         if (err) {
2317             NL_SET_ERR_MSG_MOD(extack,
2318                        "Failed to parse tunnel attributes");
2319             netdev_warn(priv->netdev,
2320                     "Failed to parse tunnel attributes");
2321             return err;
2322         }
2323 
2324         /* With mpls over udp we decapsulate using packet reformat
2325          * object
2326          */
2327         if (!netif_is_bareudp(filter_dev))
2328             flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2329         err = mlx5e_tc_set_attr_rx_tun(flow, spec);
2330         if (err)
2331             return err;
2332     } else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
2333         struct mlx5_flow_spec *tmp_spec;
2334 
2335         tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
2336         if (!tmp_spec) {
2337             NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec");
2338             netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec");
2339             return -ENOMEM;
2340         }
2341         memcpy(tmp_spec, spec, sizeof(*tmp_spec));
2342 
2343         err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
2344         if (err) {
2345             kvfree(tmp_spec);
2346             NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
2347             netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
2348             return err;
2349         }
2350         err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
2351         kvfree(tmp_spec);
2352         if (err)
2353             return err;
2354     }
2355 
2356     if (!needs_mapping && !sets_mapping)
2357         return 0;
2358 
2359     return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
2360 }
2361 
2362 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
2363 {
2364     return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2365                 inner_headers);
2366 }
2367 
2368 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
2369 {
2370     return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2371                 inner_headers);
2372 }
2373 
2374 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
2375 {
2376     return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2377                 outer_headers);
2378 }
2379 
2380 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
2381 {
2382     return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2383                 outer_headers);
2384 }
2385 
2386 void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec)
2387 {
2388     return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2389         get_match_inner_headers_value(spec) :
2390         get_match_outer_headers_value(spec);
2391 }
2392 
2393 void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec)
2394 {
2395     return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2396         get_match_inner_headers_criteria(spec) :
2397         get_match_outer_headers_criteria(spec);
2398 }
2399 
2400 static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
2401                    struct flow_cls_offload *f)
2402 {
2403     struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2404     struct netlink_ext_ack *extack = f->common.extack;
2405     struct net_device *ingress_dev;
2406     struct flow_match_meta match;
2407 
2408     if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
2409         return 0;
2410 
2411     flow_rule_match_meta(rule, &match);
2412     if (!match.mask->ingress_ifindex)
2413         return 0;
2414 
2415     if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
2416         NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
2417         return -EOPNOTSUPP;
2418     }
2419 
2420     ingress_dev = __dev_get_by_index(dev_net(filter_dev),
2421                      match.key->ingress_ifindex);
2422     if (!ingress_dev) {
2423         NL_SET_ERR_MSG_MOD(extack,
2424                    "Can't find the ingress port to match on");
2425         return -ENOENT;
2426     }
2427 
2428     if (ingress_dev != filter_dev) {
2429         NL_SET_ERR_MSG_MOD(extack,
2430                    "Can't match on the ingress filter port");
2431         return -EOPNOTSUPP;
2432     }
2433 
2434     return 0;
2435 }
2436 
2437 static bool skip_key_basic(struct net_device *filter_dev,
2438                struct flow_cls_offload *f)
2439 {
2440     /* When doing mpls over udp decap, the user needs to provide
2441      * MPLS_UC as the protocol in order to be able to match on mpls
2442      * label fields.  However, the actual ethertype is IP so we want to
2443      * avoid matching on this, otherwise we'll fail the match.
2444      */
2445     if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
2446         return true;
2447 
2448     return false;
2449 }
2450 
2451 static int __parse_cls_flower(struct mlx5e_priv *priv,
2452                   struct mlx5e_tc_flow *flow,
2453                   struct mlx5_flow_spec *spec,
2454                   struct flow_cls_offload *f,
2455                   struct net_device *filter_dev,
2456                   u8 *inner_match_level, u8 *outer_match_level)
2457 {
2458     struct netlink_ext_ack *extack = f->common.extack;
2459     void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2460                        outer_headers);
2461     void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2462                        outer_headers);
2463     void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2464                     misc_parameters);
2465     void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2466                     misc_parameters);
2467     void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2468                     misc_parameters_3);
2469     void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2470                     misc_parameters_3);
2471     struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2472     struct flow_dissector *dissector = rule->match.dissector;
2473     enum fs_flow_table_type fs_type;
2474     bool match_inner_ecn = true;
2475     u16 addr_type = 0;
2476     u8 ip_proto = 0;
2477     u8 *match_level;
2478     int err;
2479 
2480     fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX;
2481     match_level = outer_match_level;
2482 
2483     if (dissector->used_keys &
2484         ~(BIT(FLOW_DISSECTOR_KEY_META) |
2485           BIT(FLOW_DISSECTOR_KEY_CONTROL) |
2486           BIT(FLOW_DISSECTOR_KEY_BASIC) |
2487           BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
2488           BIT(FLOW_DISSECTOR_KEY_VLAN) |
2489           BIT(FLOW_DISSECTOR_KEY_CVLAN) |
2490           BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
2491           BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
2492           BIT(FLOW_DISSECTOR_KEY_PORTS) |
2493           BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
2494           BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
2495           BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
2496           BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
2497           BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
2498           BIT(FLOW_DISSECTOR_KEY_TCP) |
2499           BIT(FLOW_DISSECTOR_KEY_IP)  |
2500           BIT(FLOW_DISSECTOR_KEY_CT) |
2501           BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
2502           BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
2503           BIT(FLOW_DISSECTOR_KEY_ICMP) |
2504           BIT(FLOW_DISSECTOR_KEY_MPLS))) {
2505         NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2506         netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n",
2507                dissector->used_keys);
2508         return -EOPNOTSUPP;
2509     }
2510 
2511     if (mlx5e_get_tc_tun(filter_dev)) {
2512         bool match_inner = false;
2513 
2514         err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2515                     outer_match_level, &match_inner);
2516         if (err)
2517             return err;
2518 
2519         if (match_inner) {
2520             /* header pointers should point to the inner headers
2521              * if the packet was decapsulated already.
2522              * outer headers are set by parse_tunnel_attr.
2523              */
2524             match_level = inner_match_level;
2525             headers_c = get_match_inner_headers_criteria(spec);
2526             headers_v = get_match_inner_headers_value(spec);
2527         }
2528 
2529         err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn);
2530         if (err)
2531             return err;
2532     }
2533 
2534     err = mlx5e_flower_parse_meta(filter_dev, f);
2535     if (err)
2536         return err;
2537 
2538     if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
2539         !skip_key_basic(filter_dev, f)) {
2540         struct flow_match_basic match;
2541 
2542         flow_rule_match_basic(rule, &match);
2543         mlx5e_tc_set_ethertype(priv->mdev, &match,
2544                        match_level == outer_match_level,
2545                        headers_c, headers_v);
2546 
2547         if (match.mask->n_proto)
2548             *match_level = MLX5_MATCH_L2;
2549     }
2550     if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2551         is_vlan_dev(filter_dev)) {
2552         struct flow_dissector_key_vlan filter_dev_mask;
2553         struct flow_dissector_key_vlan filter_dev_key;
2554         struct flow_match_vlan match;
2555 
2556         if (is_vlan_dev(filter_dev)) {
2557             match.key = &filter_dev_key;
2558             match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2559             match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2560             match.key->vlan_priority = 0;
2561             match.mask = &filter_dev_mask;
2562             memset(match.mask, 0xff, sizeof(*match.mask));
2563             match.mask->vlan_priority = 0;
2564         } else {
2565             flow_rule_match_vlan(rule, &match);
2566         }
2567         if (match.mask->vlan_id ||
2568             match.mask->vlan_priority ||
2569             match.mask->vlan_tpid) {
2570             if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2571                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2572                      svlan_tag, 1);
2573                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2574                      svlan_tag, 1);
2575             } else {
2576                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2577                      cvlan_tag, 1);
2578                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2579                      cvlan_tag, 1);
2580             }
2581 
2582             MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2583                  match.mask->vlan_id);
2584             MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2585                  match.key->vlan_id);
2586 
2587             MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2588                  match.mask->vlan_priority);
2589             MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2590                  match.key->vlan_priority);
2591 
2592             *match_level = MLX5_MATCH_L2;
2593 
2594             if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
2595                 match.mask->vlan_eth_type &&
2596                 MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
2597                             ft_field_support.outer_second_vid,
2598                             fs_type)) {
2599                 MLX5_SET(fte_match_set_misc, misc_c,
2600                      outer_second_cvlan_tag, 1);
2601                 spec->match_criteria_enable |=
2602                     MLX5_MATCH_MISC_PARAMETERS;
2603             }
2604         }
2605     } else if (*match_level != MLX5_MATCH_NONE) {
2606         /* cvlan_tag enabled in match criteria and
2607          * disabled in match value means both S & C tags
2608          * don't exist (untagged of both)
2609          */
2610         MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
2611         *match_level = MLX5_MATCH_L2;
2612     }
2613 
2614     if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2615         struct flow_match_vlan match;
2616 
2617         flow_rule_match_cvlan(rule, &match);
2618         if (match.mask->vlan_id ||
2619             match.mask->vlan_priority ||
2620             match.mask->vlan_tpid) {
2621             if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid,
2622                              fs_type)) {
2623                 NL_SET_ERR_MSG_MOD(extack,
2624                            "Matching on CVLAN is not supported");
2625                 return -EOPNOTSUPP;
2626             }
2627 
2628             if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2629                 MLX5_SET(fte_match_set_misc, misc_c,
2630                      outer_second_svlan_tag, 1);
2631                 MLX5_SET(fte_match_set_misc, misc_v,
2632                      outer_second_svlan_tag, 1);
2633             } else {
2634                 MLX5_SET(fte_match_set_misc, misc_c,
2635                      outer_second_cvlan_tag, 1);
2636                 MLX5_SET(fte_match_set_misc, misc_v,
2637                      outer_second_cvlan_tag, 1);
2638             }
2639 
2640             MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2641                  match.mask->vlan_id);
2642             MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2643                  match.key->vlan_id);
2644             MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2645                  match.mask->vlan_priority);
2646             MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2647                  match.key->vlan_priority);
2648 
2649             *match_level = MLX5_MATCH_L2;
2650             spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
2651         }
2652     }
2653 
2654     if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2655         struct flow_match_eth_addrs match;
2656 
2657         flow_rule_match_eth_addrs(rule, &match);
2658         ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2659                          dmac_47_16),
2660                 match.mask->dst);
2661         ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2662                          dmac_47_16),
2663                 match.key->dst);
2664 
2665         ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2666                          smac_47_16),
2667                 match.mask->src);
2668         ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2669                          smac_47_16),
2670                 match.key->src);
2671 
2672         if (!is_zero_ether_addr(match.mask->src) ||
2673             !is_zero_ether_addr(match.mask->dst))
2674             *match_level = MLX5_MATCH_L2;
2675     }
2676 
2677     if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2678         struct flow_match_control match;
2679 
2680         flow_rule_match_control(rule, &match);
2681         addr_type = match.key->addr_type;
2682 
2683         /* the HW doesn't support frag first/later */
2684         if (match.mask->flags & FLOW_DIS_FIRST_FRAG) {
2685             NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported");
2686             return -EOPNOTSUPP;
2687         }
2688 
2689         if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2690             MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2691             MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2692                  match.key->flags & FLOW_DIS_IS_FRAGMENT);
2693 
2694             /* the HW doesn't need L3 inline to match on frag=no */
2695             if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2696                 *match_level = MLX5_MATCH_L2;
2697     /* ***  L2 attributes parsing up to here *** */
2698             else
2699                 *match_level = MLX5_MATCH_L3;
2700         }
2701     }
2702 
2703     if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2704         struct flow_match_basic match;
2705 
2706         flow_rule_match_basic(rule, &match);
2707         ip_proto = match.key->ip_proto;
2708 
2709         MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2710              match.mask->ip_proto);
2711         MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2712              match.key->ip_proto);
2713 
2714         if (match.mask->ip_proto)
2715             *match_level = MLX5_MATCH_L3;
2716     }
2717 
2718     if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2719         struct flow_match_ipv4_addrs match;
2720 
2721         flow_rule_match_ipv4_addrs(rule, &match);
2722         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2723                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2724                &match.mask->src, sizeof(match.mask->src));
2725         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2726                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2727                &match.key->src, sizeof(match.key->src));
2728         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2729                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2730                &match.mask->dst, sizeof(match.mask->dst));
2731         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2732                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2733                &match.key->dst, sizeof(match.key->dst));
2734 
2735         if (match.mask->src || match.mask->dst)
2736             *match_level = MLX5_MATCH_L3;
2737     }
2738 
2739     if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2740         struct flow_match_ipv6_addrs match;
2741 
2742         flow_rule_match_ipv6_addrs(rule, &match);
2743         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2744                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2745                &match.mask->src, sizeof(match.mask->src));
2746         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2747                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2748                &match.key->src, sizeof(match.key->src));
2749 
2750         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2751                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2752                &match.mask->dst, sizeof(match.mask->dst));
2753         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2754                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2755                &match.key->dst, sizeof(match.key->dst));
2756 
2757         if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2758             ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
2759             *match_level = MLX5_MATCH_L3;
2760     }
2761 
2762     if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2763         struct flow_match_ip match;
2764 
2765         flow_rule_match_ip(rule, &match);
2766         if (match_inner_ecn) {
2767             MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2768                  match.mask->tos & 0x3);
2769             MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2770                  match.key->tos & 0x3);
2771         }
2772 
2773         MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2774              match.mask->tos >> 2);
2775         MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2776              match.key->tos  >> 2);
2777 
2778         MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2779              match.mask->ttl);
2780         MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2781              match.key->ttl);
2782 
2783         if (match.mask->ttl &&
2784             !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
2785                         ft_field_support.outer_ipv4_ttl)) {
2786             NL_SET_ERR_MSG_MOD(extack,
2787                        "Matching on TTL is not supported");
2788             return -EOPNOTSUPP;
2789         }
2790 
2791         if (match.mask->tos || match.mask->ttl)
2792             *match_level = MLX5_MATCH_L3;
2793     }
2794 
2795     /* ***  L3 attributes parsing up to here *** */
2796 
2797     if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2798         struct flow_match_ports match;
2799 
2800         flow_rule_match_ports(rule, &match);
2801         switch (ip_proto) {
2802         case IPPROTO_TCP:
2803             MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2804                  tcp_sport, ntohs(match.mask->src));
2805             MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2806                  tcp_sport, ntohs(match.key->src));
2807 
2808             MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2809                  tcp_dport, ntohs(match.mask->dst));
2810             MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2811                  tcp_dport, ntohs(match.key->dst));
2812             break;
2813 
2814         case IPPROTO_UDP:
2815             MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2816                  udp_sport, ntohs(match.mask->src));
2817             MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2818                  udp_sport, ntohs(match.key->src));
2819 
2820             MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2821                  udp_dport, ntohs(match.mask->dst));
2822             MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2823                  udp_dport, ntohs(match.key->dst));
2824             break;
2825         default:
2826             NL_SET_ERR_MSG_MOD(extack,
2827                        "Only UDP and TCP transports are supported for L4 matching");
2828             netdev_err(priv->netdev,
2829                    "Only UDP and TCP transport are supported\n");
2830             return -EINVAL;
2831         }
2832 
2833         if (match.mask->src || match.mask->dst)
2834             *match_level = MLX5_MATCH_L4;
2835     }
2836 
2837     if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
2838         struct flow_match_tcp match;
2839 
2840         flow_rule_match_tcp(rule, &match);
2841         MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
2842              ntohs(match.mask->flags));
2843         MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
2844              ntohs(match.key->flags));
2845 
2846         if (match.mask->flags)
2847             *match_level = MLX5_MATCH_L4;
2848     }
2849     if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
2850         struct flow_match_icmp match;
2851 
2852         flow_rule_match_icmp(rule, &match);
2853         switch (ip_proto) {
2854         case IPPROTO_ICMP:
2855             if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
2856                   MLX5_FLEX_PROTO_ICMP)) {
2857                 NL_SET_ERR_MSG_MOD(extack,
2858                            "Match on Flex protocols for ICMP is not supported");
2859                 return -EOPNOTSUPP;
2860             }
2861             MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type,
2862                  match.mask->type);
2863             MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type,
2864                  match.key->type);
2865             MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code,
2866                  match.mask->code);
2867             MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code,
2868                  match.key->code);
2869             break;
2870         case IPPROTO_ICMPV6:
2871             if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
2872                   MLX5_FLEX_PROTO_ICMPV6)) {
2873                 NL_SET_ERR_MSG_MOD(extack,
2874                            "Match on Flex protocols for ICMPV6 is not supported");
2875                 return -EOPNOTSUPP;
2876             }
2877             MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type,
2878                  match.mask->type);
2879             MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type,
2880                  match.key->type);
2881             MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code,
2882                  match.mask->code);
2883             MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code,
2884                  match.key->code);
2885             break;
2886         default:
2887             NL_SET_ERR_MSG_MOD(extack,
2888                        "Code and type matching only with ICMP and ICMPv6");
2889             netdev_err(priv->netdev,
2890                    "Code and type matching only with ICMP and ICMPv6\n");
2891             return -EINVAL;
2892         }
2893         if (match.mask->code || match.mask->type) {
2894             *match_level = MLX5_MATCH_L4;
2895             spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
2896         }
2897     }
2898     /* Currently supported only for MPLS over UDP */
2899     if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
2900         !netif_is_bareudp(filter_dev)) {
2901         NL_SET_ERR_MSG_MOD(extack,
2902                    "Matching on MPLS is supported only for MPLS over UDP");
2903         netdev_err(priv->netdev,
2904                "Matching on MPLS is supported only for MPLS over UDP\n");
2905         return -EOPNOTSUPP;
2906     }
2907 
2908     return 0;
2909 }
2910 
2911 static int parse_cls_flower(struct mlx5e_priv *priv,
2912                 struct mlx5e_tc_flow *flow,
2913                 struct mlx5_flow_spec *spec,
2914                 struct flow_cls_offload *f,
2915                 struct net_device *filter_dev)
2916 {
2917     u8 inner_match_level, outer_match_level, non_tunnel_match_level;
2918     struct netlink_ext_ack *extack = f->common.extack;
2919     struct mlx5_core_dev *dev = priv->mdev;
2920     struct mlx5_eswitch *esw = dev->priv.eswitch;
2921     struct mlx5e_rep_priv *rpriv = priv->ppriv;
2922     struct mlx5_eswitch_rep *rep;
2923     bool is_eswitch_flow;
2924     int err;
2925 
2926     inner_match_level = MLX5_MATCH_NONE;
2927     outer_match_level = MLX5_MATCH_NONE;
2928 
2929     err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
2930                  &inner_match_level, &outer_match_level);
2931     non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
2932                  outer_match_level : inner_match_level;
2933 
2934     is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
2935     if (!err && is_eswitch_flow) {
2936         rep = rpriv->rep;
2937         if (rep->vport != MLX5_VPORT_UPLINK &&
2938             (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
2939             esw->offloads.inline_mode < non_tunnel_match_level)) {
2940             NL_SET_ERR_MSG_MOD(extack,
2941                        "Flow is not offloaded due to min inline setting");
2942             netdev_warn(priv->netdev,
2943                     "Flow is not offloaded due to min inline setting, required %d actual %d\n",
2944                     non_tunnel_match_level, esw->offloads.inline_mode);
2945             return -EOPNOTSUPP;
2946         }
2947     }
2948 
2949     flow->attr->inner_match_level = inner_match_level;
2950     flow->attr->outer_match_level = outer_match_level;
2951 
2952 
2953     return err;
2954 }
2955 
2956 struct mlx5_fields {
2957     u8  field;
2958     u8  field_bsize;
2959     u32 field_mask;
2960     u32 offset;
2961     u32 match_offset;
2962 };
2963 
2964 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
2965         {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
2966          offsetof(struct pedit_headers, field) + (off), \
2967          MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2968 
2969 /* masked values are the same and there are no rewrites that do not have a
2970  * match.
2971  */
2972 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2973     type matchmaskx = *(type *)(matchmaskp); \
2974     type matchvalx = *(type *)(matchvalp); \
2975     type maskx = *(type *)(maskp); \
2976     type valx = *(type *)(valp); \
2977     \
2978     (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2979                                  matchmaskx)); \
2980 })
2981 
2982 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
2983              void *matchmaskp, u8 bsize)
2984 {
2985     bool same = false;
2986 
2987     switch (bsize) {
2988     case 8:
2989         same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
2990         break;
2991     case 16:
2992         same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
2993         break;
2994     case 32:
2995         same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
2996         break;
2997     }
2998 
2999     return same;
3000 }
3001 
3002 static struct mlx5_fields fields[] = {
3003     OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
3004     OFFLOAD(DMAC_15_0,  16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
3005     OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
3006     OFFLOAD(SMAC_15_0,  16, U16_MAX, eth.h_source[4], 0, smac_15_0),
3007     OFFLOAD(ETHERTYPE,  16, U16_MAX, eth.h_proto, 0, ethertype),
3008     OFFLOAD(FIRST_VID,  16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
3009 
3010     OFFLOAD(IP_DSCP, 8,    0xfc, ip4.tos,   0, ip_dscp),
3011     OFFLOAD(IP_TTL,  8,  U8_MAX, ip4.ttl,   0, ttl_hoplimit),
3012     OFFLOAD(SIPV4,  32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
3013     OFFLOAD(DIPV4,  32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
3014 
3015     OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
3016         src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
3017     OFFLOAD(SIPV6_95_64,  32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
3018         src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
3019     OFFLOAD(SIPV6_63_32,  32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
3020         src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
3021     OFFLOAD(SIPV6_31_0,   32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
3022         src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
3023     OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
3024         dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
3025     OFFLOAD(DIPV6_95_64,  32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
3026         dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
3027     OFFLOAD(DIPV6_63_32,  32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
3028         dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
3029     OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
3030         dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
3031     OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
3032     OFFLOAD(IP_DSCP, 16,  0xc00f, ip6, 0, ip_dscp),
3033 
3034     OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
3035     OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
3036     /* in linux iphdr tcp_flags is 8 bits long */
3037     OFFLOAD(TCP_FLAGS,  8,  U8_MAX, tcp.ack_seq, 5, tcp_flags),
3038 
3039     OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
3040     OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
3041 };
3042 
3043 static unsigned long mask_to_le(unsigned long mask, int size)
3044 {
3045     __be32 mask_be32;
3046     __be16 mask_be16;
3047 
3048     if (size == 32) {
3049         mask_be32 = (__force __be32)(mask);
3050         mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
3051     } else if (size == 16) {
3052         mask_be32 = (__force __be32)(mask);
3053         mask_be16 = *(__be16 *)&mask_be32;
3054         mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
3055     }
3056 
3057     return mask;
3058 }
3059 
3060 static int offload_pedit_fields(struct mlx5e_priv *priv,
3061                 int namespace,
3062                 struct mlx5e_tc_flow_parse_attr *parse_attr,
3063                 u32 *action_flags,
3064                 struct netlink_ext_ack *extack)
3065 {
3066     struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
3067     struct pedit_headers_action *hdrs = parse_attr->hdrs;
3068     void *headers_c, *headers_v, *action, *vals_p;
3069     u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
3070     struct mlx5e_tc_mod_hdr_acts *mod_acts;
3071     unsigned long mask, field_mask;
3072     int i, first, last, next_z;
3073     struct mlx5_fields *f;
3074     u8 cmd;
3075 
3076     mod_acts = &parse_attr->mod_hdr_acts;
3077     headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec);
3078     headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec);
3079 
3080     set_masks = &hdrs[0].masks;
3081     add_masks = &hdrs[1].masks;
3082     set_vals = &hdrs[0].vals;
3083     add_vals = &hdrs[1].vals;
3084 
3085     for (i = 0; i < ARRAY_SIZE(fields); i++) {
3086         bool skip;
3087 
3088         f = &fields[i];
3089         /* avoid seeing bits set from previous iterations */
3090         s_mask = 0;
3091         a_mask = 0;
3092 
3093         s_masks_p = (void *)set_masks + f->offset;
3094         a_masks_p = (void *)add_masks + f->offset;
3095 
3096         s_mask = *s_masks_p & f->field_mask;
3097         a_mask = *a_masks_p & f->field_mask;
3098 
3099         if (!s_mask && !a_mask) /* nothing to offload here */
3100             continue;
3101 
3102         if (s_mask && a_mask) {
3103             NL_SET_ERR_MSG_MOD(extack,
3104                        "can't set and add to the same HW field");
3105             netdev_warn(priv->netdev,
3106                     "mlx5: can't set and add to the same HW field (%x)\n",
3107                     f->field);
3108             return -EOPNOTSUPP;
3109         }
3110 
3111         skip = false;
3112         if (s_mask) {
3113             void *match_mask = headers_c + f->match_offset;
3114             void *match_val = headers_v + f->match_offset;
3115 
3116             cmd  = MLX5_ACTION_TYPE_SET;
3117             mask = s_mask;
3118             vals_p = (void *)set_vals + f->offset;
3119             /* don't rewrite if we have a match on the same value */
3120             if (cmp_val_mask(vals_p, s_masks_p, match_val,
3121                      match_mask, f->field_bsize))
3122                 skip = true;
3123             /* clear to denote we consumed this field */
3124             *s_masks_p &= ~f->field_mask;
3125         } else {
3126             cmd  = MLX5_ACTION_TYPE_ADD;
3127             mask = a_mask;
3128             vals_p = (void *)add_vals + f->offset;
3129             /* add 0 is no change */
3130             if ((*(u32 *)vals_p & f->field_mask) == 0)
3131                 skip = true;
3132             /* clear to denote we consumed this field */
3133             *a_masks_p &= ~f->field_mask;
3134         }
3135         if (skip)
3136             continue;
3137 
3138         mask = mask_to_le(mask, f->field_bsize);
3139 
3140         first = find_first_bit(&mask, f->field_bsize);
3141         next_z = find_next_zero_bit(&mask, f->field_bsize, first);
3142         last  = find_last_bit(&mask, f->field_bsize);
3143         if (first < next_z && next_z < last) {
3144             NL_SET_ERR_MSG_MOD(extack,
3145                        "rewrite of few sub-fields isn't supported");
3146             netdev_warn(priv->netdev,
3147                     "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
3148                     mask);
3149             return -EOPNOTSUPP;
3150         }
3151 
3152         action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts);
3153         if (IS_ERR(action)) {
3154             NL_SET_ERR_MSG_MOD(extack,
3155                        "too many pedit actions, can't offload");
3156             mlx5_core_warn(priv->mdev,
3157                        "mlx5: parsed %d pedit actions, can't do more\n",
3158                        mod_acts->num_actions);
3159             return PTR_ERR(action);
3160         }
3161 
3162         MLX5_SET(set_action_in, action, action_type, cmd);
3163         MLX5_SET(set_action_in, action, field, f->field);
3164 
3165         if (cmd == MLX5_ACTION_TYPE_SET) {
3166             int start;
3167 
3168             field_mask = mask_to_le(f->field_mask, f->field_bsize);
3169 
3170             /* if field is bit sized it can start not from first bit */
3171             start = find_first_bit(&field_mask, f->field_bsize);
3172 
3173             MLX5_SET(set_action_in, action, offset, first - start);
3174             /* length is num of bits to be written, zero means length of 32 */
3175             MLX5_SET(set_action_in, action, length, (last - first + 1));
3176         }
3177 
3178         if (f->field_bsize == 32)
3179             MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
3180         else if (f->field_bsize == 16)
3181             MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
3182         else if (f->field_bsize == 8)
3183             MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
3184 
3185         ++mod_acts->num_actions;
3186     }
3187 
3188     return 0;
3189 }
3190 
3191 static const struct pedit_headers zero_masks = {};
3192 
3193 static int verify_offload_pedit_fields(struct mlx5e_priv *priv,
3194                        struct mlx5e_tc_flow_parse_attr *parse_attr,
3195                        struct netlink_ext_ack *extack)
3196 {
3197     struct pedit_headers *cmd_masks;
3198     u8 cmd;
3199 
3200     for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
3201         cmd_masks = &parse_attr->hdrs[cmd].masks;
3202         if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
3203             NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field");
3204             netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
3205             print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
3206                        16, 1, cmd_masks, sizeof(zero_masks), true);
3207             return -EOPNOTSUPP;
3208         }
3209     }
3210 
3211     return 0;
3212 }
3213 
3214 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
3215                  struct mlx5e_tc_flow_parse_attr *parse_attr,
3216                  u32 *action_flags,
3217                  struct netlink_ext_ack *extack)
3218 {
3219     int err;
3220 
3221     err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack);
3222     if (err)
3223         goto out_dealloc_parsed_actions;
3224 
3225     err = verify_offload_pedit_fields(priv, parse_attr, extack);
3226     if (err)
3227         goto out_dealloc_parsed_actions;
3228 
3229     return 0;
3230 
3231 out_dealloc_parsed_actions:
3232     mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3233     return err;
3234 }
3235 
3236 struct ip_ttl_word {
3237     __u8    ttl;
3238     __u8    protocol;
3239     __sum16 check;
3240 };
3241 
3242 struct ipv6_hoplimit_word {
3243     __be16  payload_len;
3244     __u8    nexthdr;
3245     __u8    hop_limit;
3246 };
3247 
3248 static bool
3249 is_action_keys_supported(const struct flow_action_entry *act, bool ct_flow,
3250              bool *modify_ip_header, bool *modify_tuple,
3251              struct netlink_ext_ack *extack)
3252 {
3253     u32 mask, offset;
3254     u8 htype;
3255 
3256     htype = act->mangle.htype;
3257     offset = act->mangle.offset;
3258     mask = ~act->mangle.mask;
3259     /* For IPv4 & IPv6 header check 4 byte word,
3260      * to determine that modified fields
3261      * are NOT ttl & hop_limit only.
3262      */
3263     if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
3264         struct ip_ttl_word *ttl_word =
3265             (struct ip_ttl_word *)&mask;
3266 
3267         if (offset != offsetof(struct iphdr, ttl) ||
3268             ttl_word->protocol ||
3269             ttl_word->check) {
3270             *modify_ip_header = true;
3271         }
3272 
3273         if (offset >= offsetof(struct iphdr, saddr))
3274             *modify_tuple = true;
3275 
3276         if (ct_flow && *modify_tuple) {
3277             NL_SET_ERR_MSG_MOD(extack,
3278                        "can't offload re-write of ipv4 address with action ct");
3279             return false;
3280         }
3281     } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
3282         struct ipv6_hoplimit_word *hoplimit_word =
3283             (struct ipv6_hoplimit_word *)&mask;
3284 
3285         if (offset != offsetof(struct ipv6hdr, payload_len) ||
3286             hoplimit_word->payload_len ||
3287             hoplimit_word->nexthdr) {
3288             *modify_ip_header = true;
3289         }
3290 
3291         if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr))
3292             *modify_tuple = true;
3293 
3294         if (ct_flow && *modify_tuple) {
3295             NL_SET_ERR_MSG_MOD(extack,
3296                        "can't offload re-write of ipv6 address with action ct");
3297             return false;
3298         }
3299     } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
3300            htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) {
3301         *modify_tuple = true;
3302         if (ct_flow) {
3303             NL_SET_ERR_MSG_MOD(extack,
3304                        "can't offload re-write of transport header ports with action ct");
3305             return false;
3306         }
3307     }
3308 
3309     return true;
3310 }
3311 
3312 static bool modify_tuple_supported(bool modify_tuple, bool ct_clear,
3313                    bool ct_flow, struct netlink_ext_ack *extack,
3314                    struct mlx5e_priv *priv,
3315                    struct mlx5_flow_spec *spec)
3316 {
3317     if (!modify_tuple || ct_clear)
3318         return true;
3319 
3320     if (ct_flow) {
3321         NL_SET_ERR_MSG_MOD(extack,
3322                    "can't offload tuple modification with non-clear ct()");
3323         netdev_info(priv->netdev,
3324                 "can't offload tuple modification with non-clear ct()");
3325         return false;
3326     }
3327 
3328     /* Add ct_state=-trk match so it will be offloaded for non ct flows
3329      * (or after clear action), as otherwise, since the tuple is changed,
3330      * we can't restore ct state
3331      */
3332     if (mlx5_tc_ct_add_no_trk_match(spec)) {
3333         NL_SET_ERR_MSG_MOD(extack,
3334                    "can't offload tuple modification with ct matches and no ct(clear) action");
3335         netdev_info(priv->netdev,
3336                 "can't offload tuple modification with ct matches and no ct(clear) action");
3337         return false;
3338     }
3339 
3340     return true;
3341 }
3342 
3343 static bool modify_header_match_supported(struct mlx5e_priv *priv,
3344                       struct mlx5_flow_spec *spec,
3345                       struct flow_action *flow_action,
3346                       u32 actions, bool ct_flow,
3347                       bool ct_clear,
3348                       struct netlink_ext_ack *extack)
3349 {
3350     const struct flow_action_entry *act;
3351     bool modify_ip_header, modify_tuple;
3352     void *headers_c;
3353     void *headers_v;
3354     u16 ethertype;
3355     u8 ip_proto;
3356     int i;
3357 
3358     headers_c = mlx5e_get_match_headers_criteria(actions, spec);
3359     headers_v = mlx5e_get_match_headers_value(actions, spec);
3360     ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
3361 
3362     /* for non-IP we only re-write MACs, so we're okay */
3363     if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
3364         ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
3365         goto out_ok;
3366 
3367     modify_ip_header = false;
3368     modify_tuple = false;
3369     flow_action_for_each(i, act, flow_action) {
3370         if (act->id != FLOW_ACTION_MANGLE &&
3371             act->id != FLOW_ACTION_ADD)
3372             continue;
3373 
3374         if (!is_action_keys_supported(act, ct_flow,
3375                           &modify_ip_header,
3376                           &modify_tuple, extack))
3377             return false;
3378     }
3379 
3380     if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack,
3381                     priv, spec))
3382         return false;
3383 
3384     ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
3385     if (modify_ip_header && ip_proto != IPPROTO_TCP &&
3386         ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
3387         NL_SET_ERR_MSG_MOD(extack,
3388                    "can't offload re-write of non TCP/UDP");
3389         netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
3390                 ip_proto);
3391         return false;
3392     }
3393 
3394 out_ok:
3395     return true;
3396 }
3397 
3398 static bool
3399 actions_match_supported_fdb(struct mlx5e_priv *priv,
3400                 struct mlx5e_tc_flow_parse_attr *parse_attr,
3401                 struct mlx5e_tc_flow *flow,
3402                 struct netlink_ext_ack *extack)
3403 {
3404     struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
3405     bool ct_flow, ct_clear;
3406 
3407     ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
3408     ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3409 
3410     if (esw_attr->split_count && ct_flow &&
3411         !MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve)) {
3412         /* All registers used by ct are cleared when using
3413          * split rules.
3414          */
3415         NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct");
3416         return false;
3417     }
3418 
3419     if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
3420         NL_SET_ERR_MSG_MOD(extack,
3421                    "current firmware doesn't support split rule for port mirroring");
3422         netdev_warn_once(priv->netdev,
3423                  "current firmware doesn't support split rule for port mirroring\n");
3424         return false;
3425     }
3426 
3427     return true;
3428 }
3429 
3430 static bool
3431 actions_match_supported(struct mlx5e_priv *priv,
3432             struct flow_action *flow_action,
3433             u32 actions,
3434             struct mlx5e_tc_flow_parse_attr *parse_attr,
3435             struct mlx5e_tc_flow *flow,
3436             struct netlink_ext_ack *extack)
3437 {
3438     bool ct_flow, ct_clear;
3439 
3440     ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
3441     ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3442 
3443     if (!(actions &
3444           (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3445         NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action");
3446         return false;
3447     }
3448 
3449     if (!(~actions &
3450           (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3451         NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
3452         return false;
3453     }
3454 
3455     if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3456         actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
3457         NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
3458         return false;
3459     }
3460 
3461     if (!(~actions &
3462           (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3463         NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
3464         return false;
3465     }
3466 
3467     if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3468         actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
3469         NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
3470         return false;
3471     }
3472 
3473     if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3474         !modify_header_match_supported(priv, &parse_attr->spec, flow_action,
3475                        actions, ct_flow, ct_clear, extack))
3476         return false;
3477 
3478     if (mlx5e_is_eswitch_flow(flow) &&
3479         !actions_match_supported_fdb(priv, parse_attr, flow, extack))
3480         return false;
3481 
3482     return true;
3483 }
3484 
3485 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3486 {
3487     return priv->mdev == peer_priv->mdev;
3488 }
3489 
3490 bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3491 {
3492     struct mlx5_core_dev *fmdev, *pmdev;
3493     u64 fsystem_guid, psystem_guid;
3494 
3495     fmdev = priv->mdev;
3496     pmdev = peer_priv->mdev;
3497 
3498     fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
3499     psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
3500 
3501     return (fsystem_guid == psystem_guid);
3502 }
3503 
3504 static int
3505 actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
3506                 struct mlx5e_tc_flow *flow,
3507                 struct mlx5_flow_attr *attr,
3508                 struct netlink_ext_ack *extack)
3509 {
3510     struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
3511     struct pedit_headers_action *hdrs = parse_attr->hdrs;
3512     enum mlx5_flow_namespace_type ns_type;
3513     int err;
3514 
3515     if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits &&
3516         !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits)
3517         return 0;
3518 
3519     ns_type = mlx5e_get_flow_namespace(flow);
3520 
3521     err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack);
3522     if (err)
3523         return err;
3524 
3525     if (parse_attr->mod_hdr_acts.num_actions > 0)
3526         return 0;
3527 
3528     /* In case all pedit actions are skipped, remove the MOD_HDR flag. */
3529     attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3530     mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3531 
3532     if (ns_type != MLX5_FLOW_NAMESPACE_FDB)
3533         return 0;
3534 
3535     if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
3536           (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
3537         attr->esw_attr->split_count = 0;
3538 
3539     return 0;
3540 }
3541 
3542 static struct mlx5_flow_attr*
3543 mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr,
3544                    enum mlx5_flow_namespace_type ns_type)
3545 {
3546     struct mlx5e_tc_flow_parse_attr *parse_attr;
3547     u32 attr_sz = ns_to_attr_sz(ns_type);
3548     struct mlx5_flow_attr *attr2;
3549 
3550     attr2 = mlx5_alloc_flow_attr(ns_type);
3551     parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
3552     if (!attr2 || !parse_attr) {
3553         kvfree(parse_attr);
3554         kfree(attr2);
3555         return NULL;
3556     }
3557 
3558     memcpy(attr2, attr, attr_sz);
3559     INIT_LIST_HEAD(&attr2->list);
3560     parse_attr->filter_dev = attr->parse_attr->filter_dev;
3561     attr2->action = 0;
3562     attr2->flags = 0;
3563     attr2->parse_attr = parse_attr;
3564     return attr2;
3565 }
3566 
3567 static struct mlx5_core_dev *
3568 get_flow_counter_dev(struct mlx5e_tc_flow *flow)
3569 {
3570     return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev;
3571 }
3572 
3573 struct mlx5_flow_attr *
3574 mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow)
3575 {
3576     struct mlx5_esw_flow_attr *esw_attr;
3577     struct mlx5_flow_attr *attr;
3578     int i;
3579 
3580     list_for_each_entry(attr, &flow->attrs, list) {
3581         esw_attr = attr->esw_attr;
3582         for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
3583             if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)
3584                 return attr;
3585         }
3586     }
3587 
3588     return NULL;
3589 }
3590 
3591 void
3592 mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow)
3593 {
3594     struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3595     struct mlx5_flow_attr *attr;
3596 
3597     list_for_each_entry(attr, &flow->attrs, list) {
3598         if (list_is_last(&attr->list, &flow->attrs))
3599             break;
3600 
3601         mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle);
3602     }
3603 }
3604 
3605 static void
3606 free_flow_post_acts(struct mlx5e_tc_flow *flow)
3607 {
3608     struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
3609     struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3610     struct mlx5_flow_attr *attr, *tmp;
3611     bool vf_tun;
3612 
3613     list_for_each_entry_safe(attr, tmp, &flow->attrs, list) {
3614         if (list_is_last(&attr->list, &flow->attrs))
3615             break;
3616 
3617         if (attr->post_act_handle)
3618             mlx5e_tc_post_act_del(post_act, attr->post_act_handle);
3619 
3620         clean_encap_dests(flow->priv, flow, attr, &vf_tun);
3621 
3622         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
3623             mlx5_fc_destroy(counter_dev, attr->counter);
3624 
3625         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
3626             mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
3627             if (attr->modify_hdr)
3628                 mlx5_modify_header_dealloc(flow->priv->mdev, attr->modify_hdr);
3629         }
3630 
3631         list_del(&attr->list);
3632         kvfree(attr->parse_attr);
3633         kfree(attr);
3634     }
3635 }
3636 
3637 int
3638 mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow)
3639 {
3640     struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3641     struct mlx5_flow_attr *attr;
3642     int err = 0;
3643 
3644     list_for_each_entry(attr, &flow->attrs, list) {
3645         if (list_is_last(&attr->list, &flow->attrs))
3646             break;
3647 
3648         err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle);
3649         if (err)
3650             break;
3651     }
3652 
3653     return err;
3654 }
3655 
3656 /* TC filter rule HW translation:
3657  *
3658  * +---------------------+
3659  * + ft prio (tc chain)  +
3660  * + original match      +
3661  * +---------------------+
3662  *           |
3663  *           | if multi table action
3664  *           |
3665  *           v
3666  * +---------------------+
3667  * + post act ft         |<----.
3668  * + match fte id        |     | split on multi table action
3669  * + do actions          |-----'
3670  * +---------------------+
3671  *           |
3672  *           |
3673  *           v
3674  * Do rest of the actions after last multi table action.
3675  */
3676 static int
3677 alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
3678 {
3679     struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3680     struct mlx5_flow_attr *attr, *next_attr = NULL;
3681     struct mlx5e_post_act_handle *handle;
3682     bool vf_tun, encap_valid = true;
3683     int err;
3684 
3685     /* This is going in reverse order as needed.
3686      * The first entry is the last attribute.
3687      */
3688     list_for_each_entry(attr, &flow->attrs, list) {
3689         if (!next_attr) {
3690             /* Set counter action on last post act rule. */
3691             attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3692         } else {
3693             err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr);
3694             if (err)
3695                 goto out_free;
3696         }
3697 
3698         /* Don't add post_act rule for first attr (last in the list).
3699          * It's being handled by the caller.
3700          */
3701         if (list_is_last(&attr->list, &flow->attrs))
3702             break;
3703 
3704         err = set_encap_dests(flow->priv, flow, attr, extack, &encap_valid, &vf_tun);
3705         if (err)
3706             goto out_free;
3707 
3708         if (!encap_valid)
3709             flow_flag_set(flow, SLOW);
3710 
3711         err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack);
3712         if (err)
3713             goto out_free;
3714 
3715         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
3716             err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr);
3717             if (err)
3718                 goto out_free;
3719         }
3720 
3721         if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
3722             err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr);
3723             if (err)
3724                 goto out_free;
3725         }
3726 
3727         handle = mlx5e_tc_post_act_add(post_act, attr);
3728         if (IS_ERR(handle)) {
3729             err = PTR_ERR(handle);
3730             goto out_free;
3731         }
3732 
3733         attr->post_act_handle = handle;
3734         next_attr = attr;
3735     }
3736 
3737     if (flow_flag_test(flow, SLOW))
3738         goto out;
3739 
3740     err = mlx5e_tc_offload_flow_post_acts(flow);
3741     if (err)
3742         goto out_free;
3743 
3744 out:
3745     return 0;
3746 
3747 out_free:
3748     free_flow_post_acts(flow);
3749     return err;
3750 }
3751 
3752 static int
3753 parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
3754          struct flow_action *flow_action)
3755 {
3756     struct netlink_ext_ack *extack = parse_state->extack;
3757     struct mlx5e_tc_flow_action flow_action_reorder;
3758     struct mlx5e_tc_flow *flow = parse_state->flow;
3759     struct mlx5_flow_attr *attr = flow->attr;
3760     enum mlx5_flow_namespace_type ns_type;
3761     struct mlx5e_priv *priv = flow->priv;
3762     struct flow_action_entry *act, **_act;
3763     struct mlx5e_tc_act *tc_act;
3764     int err, i;
3765 
3766     flow_action_reorder.num_entries = flow_action->num_entries;
3767     flow_action_reorder.entries = kcalloc(flow_action->num_entries,
3768                           sizeof(flow_action), GFP_KERNEL);
3769     if (!flow_action_reorder.entries)
3770         return -ENOMEM;
3771 
3772     mlx5e_tc_act_reorder_flow_actions(flow_action, &flow_action_reorder);
3773 
3774     ns_type = mlx5e_get_flow_namespace(flow);
3775     list_add(&attr->list, &flow->attrs);
3776 
3777     flow_action_for_each(i, _act, &flow_action_reorder) {
3778         act = *_act;
3779         tc_act = mlx5e_tc_act_get(act->id, ns_type);
3780         if (!tc_act) {
3781             NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action");
3782             err = -EOPNOTSUPP;
3783             goto out_free;
3784         }
3785 
3786         if (!tc_act->can_offload(parse_state, act, i, attr)) {
3787             err = -EOPNOTSUPP;
3788             goto out_free;
3789         }
3790 
3791         err = tc_act->parse_action(parse_state, act, priv, attr);
3792         if (err)
3793             goto out_free;
3794 
3795         parse_state->actions |= attr->action;
3796 
3797         /* Split attr for multi table act if not the last act. */
3798         if (tc_act->is_multi_table_act &&
3799             tc_act->is_multi_table_act(priv, act, attr) &&
3800             i < flow_action_reorder.num_entries - 1) {
3801             err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
3802             if (err)
3803                 goto out_free;
3804 
3805             attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type);
3806             if (!attr) {
3807                 err = -ENOMEM;
3808                 goto out_free;
3809             }
3810 
3811             list_add(&attr->list, &flow->attrs);
3812         }
3813     }
3814 
3815     kfree(flow_action_reorder.entries);
3816 
3817     err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
3818     if (err)
3819         goto out_free_post_acts;
3820 
3821     err = alloc_flow_post_acts(flow, extack);
3822     if (err)
3823         goto out_free_post_acts;
3824 
3825     return 0;
3826 
3827 out_free:
3828     kfree(flow_action_reorder.entries);
3829 out_free_post_acts:
3830     free_flow_post_acts(flow);
3831 
3832     return err;
3833 }
3834 
3835 static int
3836 flow_action_supported(struct flow_action *flow_action,
3837               struct netlink_ext_ack *extack)
3838 {
3839     if (!flow_action_has_entries(flow_action)) {
3840         NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries");
3841         return -EINVAL;
3842     }
3843 
3844     if (!flow_action_hw_stats_check(flow_action, extack,
3845                     FLOW_ACTION_HW_STATS_DELAYED_BIT)) {
3846         NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
3847         return -EOPNOTSUPP;
3848     }
3849 
3850     return 0;
3851 }
3852 
3853 static int
3854 parse_tc_nic_actions(struct mlx5e_priv *priv,
3855              struct flow_action *flow_action,
3856              struct mlx5e_tc_flow *flow,
3857              struct netlink_ext_ack *extack)
3858 {
3859     struct mlx5e_tc_act_parse_state *parse_state;
3860     struct mlx5e_tc_flow_parse_attr *parse_attr;
3861     struct mlx5_flow_attr *attr = flow->attr;
3862     int err;
3863 
3864     err = flow_action_supported(flow_action, extack);
3865     if (err)
3866         return err;
3867 
3868     attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
3869     parse_attr = attr->parse_attr;
3870     parse_state = &parse_attr->parse_state;
3871     mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
3872     parse_state->ct_priv = get_ct_priv(priv);
3873 
3874     err = parse_tc_actions(parse_state, flow_action);
3875     if (err)
3876         return err;
3877 
3878     err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
3879     if (err)
3880         return err;
3881 
3882     if (!actions_match_supported(priv, flow_action, parse_state->actions,
3883                      parse_attr, flow, extack))
3884         return -EOPNOTSUPP;
3885 
3886     return 0;
3887 }
3888 
3889 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
3890                   struct net_device *peer_netdev)
3891 {
3892     struct mlx5e_priv *peer_priv;
3893 
3894     peer_priv = netdev_priv(peer_netdev);
3895 
3896     return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
3897         mlx5e_eswitch_vf_rep(priv->netdev) &&
3898         mlx5e_eswitch_vf_rep(peer_netdev) &&
3899         mlx5e_same_hw_devs(priv, peer_priv));
3900 }
3901 
3902 static bool same_hw_reps(struct mlx5e_priv *priv,
3903              struct net_device *peer_netdev)
3904 {
3905     struct mlx5e_priv *peer_priv;
3906 
3907     peer_priv = netdev_priv(peer_netdev);
3908 
3909     return mlx5e_eswitch_rep(priv->netdev) &&
3910            mlx5e_eswitch_rep(peer_netdev) &&
3911            mlx5e_same_hw_devs(priv, peer_priv);
3912 }
3913 
3914 static bool is_lag_dev(struct mlx5e_priv *priv,
3915                struct net_device *peer_netdev)
3916 {
3917     return ((mlx5_lag_is_sriov(priv->mdev) ||
3918          mlx5_lag_is_multipath(priv->mdev)) &&
3919          same_hw_reps(priv, peer_netdev));
3920 }
3921 
3922 static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
3923 {
3924     if (same_hw_reps(priv, out_dev) &&
3925         MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
3926         MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
3927         return true;
3928 
3929     return false;
3930 }
3931 
3932 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
3933                     struct net_device *out_dev)
3934 {
3935     if (is_merged_eswitch_vfs(priv, out_dev))
3936         return true;
3937 
3938     if (is_multiport_eligible(priv, out_dev))
3939         return true;
3940 
3941     if (is_lag_dev(priv, out_dev))
3942         return true;
3943 
3944     return mlx5e_eswitch_rep(out_dev) &&
3945            same_port_devs(priv, netdev_priv(out_dev));
3946 }
3947 
3948 int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv,
3949                       struct mlx5_flow_attr *attr,
3950                       int ifindex,
3951                       enum mlx5e_tc_int_port_type type,
3952                       u32 *action,
3953                       int out_index)
3954 {
3955     struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
3956     struct mlx5e_tc_int_port_priv *int_port_priv;
3957     struct mlx5e_tc_flow_parse_attr *parse_attr;
3958     struct mlx5e_tc_int_port *dest_int_port;
3959     int err;
3960 
3961     parse_attr = attr->parse_attr;
3962     int_port_priv = mlx5e_get_int_port_priv(priv);
3963 
3964     dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type);
3965     if (IS_ERR(dest_int_port))
3966         return PTR_ERR(dest_int_port);
3967 
3968     err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
3969                     MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
3970                     mlx5e_tc_int_port_get_metadata(dest_int_port));
3971     if (err) {
3972         mlx5e_tc_int_port_put(int_port_priv, dest_int_port);
3973         return err;
3974     }
3975 
3976     *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3977 
3978     esw_attr->dest_int_port = dest_int_port;
3979     esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
3980 
3981     /* Forward to root fdb for matching against the new source vport */
3982     attr->dest_chain = 0;
3983 
3984     return 0;
3985 }
3986 
3987 static int
3988 parse_tc_fdb_actions(struct mlx5e_priv *priv,
3989              struct flow_action *flow_action,
3990              struct mlx5e_tc_flow *flow,
3991              struct netlink_ext_ack *extack)
3992 {
3993     struct mlx5e_tc_act_parse_state *parse_state;
3994     struct mlx5e_tc_flow_parse_attr *parse_attr;
3995     struct mlx5_flow_attr *attr = flow->attr;
3996     struct mlx5_esw_flow_attr *esw_attr;
3997     int err;
3998 
3999     err = flow_action_supported(flow_action, extack);
4000     if (err)
4001         return err;
4002 
4003     esw_attr = attr->esw_attr;
4004     parse_attr = attr->parse_attr;
4005     parse_state = &parse_attr->parse_state;
4006     mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
4007     parse_state->ct_priv = get_ct_priv(priv);
4008 
4009     err = parse_tc_actions(parse_state, flow_action);
4010     if (err)
4011         return err;
4012 
4013     /* Forward to/from internal port can only have 1 dest */
4014     if ((netif_is_ovs_master(parse_attr->filter_dev) || esw_attr->dest_int_port) &&
4015         esw_attr->out_count > 1) {
4016         NL_SET_ERR_MSG_MOD(extack,
4017                    "Rules with internal port can have only one destination");
4018         return -EOPNOTSUPP;
4019     }
4020 
4021     err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
4022     if (err)
4023         return err;
4024 
4025     if (!actions_match_supported(priv, flow_action, parse_state->actions,
4026                      parse_attr, flow, extack))
4027         return -EOPNOTSUPP;
4028 
4029     return 0;
4030 }
4031 
4032 static void get_flags(int flags, unsigned long *flow_flags)
4033 {
4034     unsigned long __flow_flags = 0;
4035 
4036     if (flags & MLX5_TC_FLAG(INGRESS))
4037         __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
4038     if (flags & MLX5_TC_FLAG(EGRESS))
4039         __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
4040 
4041     if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
4042         __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4043     if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
4044         __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4045     if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
4046         __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
4047 
4048     *flow_flags = __flow_flags;
4049 }
4050 
4051 static const struct rhashtable_params tc_ht_params = {
4052     .head_offset = offsetof(struct mlx5e_tc_flow, node),
4053     .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
4054     .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
4055     .automatic_shrinking = true,
4056 };
4057 
4058 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
4059                     unsigned long flags)
4060 {
4061     struct mlx5e_rep_priv *rpriv;
4062 
4063     if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
4064         rpriv = priv->ppriv;
4065         return &rpriv->tc_ht;
4066     } else /* NIC offload */
4067         return &priv->fs->tc->ht;
4068 }
4069 
4070 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
4071 {
4072     struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
4073     struct mlx5_flow_attr *attr = flow->attr;
4074     bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
4075         flow_flag_test(flow, INGRESS);
4076     bool act_is_encap = !!(attr->action &
4077                    MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
4078     bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
4079                         MLX5_DEVCOM_ESW_OFFLOADS);
4080 
4081     if (!esw_paired)
4082         return false;
4083 
4084     if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
4085          mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
4086         (is_rep_ingress || act_is_encap))
4087         return true;
4088 
4089     return false;
4090 }
4091 
4092 struct mlx5_flow_attr *
4093 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
4094 {
4095     u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB)  ?
4096                 sizeof(struct mlx5_esw_flow_attr) :
4097                 sizeof(struct mlx5_nic_flow_attr);
4098     struct mlx5_flow_attr *attr;
4099 
4100     attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
4101     if (!attr)
4102         return attr;
4103 
4104     INIT_LIST_HEAD(&attr->list);
4105     return attr;
4106 }
4107 
4108 static int
4109 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
4110          struct flow_cls_offload *f, unsigned long flow_flags,
4111          struct mlx5e_tc_flow_parse_attr **__parse_attr,
4112          struct mlx5e_tc_flow **__flow)
4113 {
4114     struct mlx5e_tc_flow_parse_attr *parse_attr;
4115     struct mlx5_flow_attr *attr;
4116     struct mlx5e_tc_flow *flow;
4117     int err = -ENOMEM;
4118     int out_index;
4119 
4120     flow = kzalloc(sizeof(*flow), GFP_KERNEL);
4121     parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
4122     if (!parse_attr || !flow)
4123         goto err_free;
4124 
4125     flow->flags = flow_flags;
4126     flow->cookie = f->cookie;
4127     flow->priv = priv;
4128 
4129     attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow));
4130     if (!attr)
4131         goto err_free;
4132 
4133     flow->attr = attr;
4134 
4135     for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
4136         INIT_LIST_HEAD(&flow->encaps[out_index].list);
4137     INIT_LIST_HEAD(&flow->hairpin);
4138     INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
4139     INIT_LIST_HEAD(&flow->attrs);
4140     refcount_set(&flow->refcnt, 1);
4141     init_completion(&flow->init_done);
4142     init_completion(&flow->del_hw_done);
4143 
4144     *__flow = flow;
4145     *__parse_attr = parse_attr;
4146 
4147     return 0;
4148 
4149 err_free:
4150     kfree(flow);
4151     kvfree(parse_attr);
4152     return err;
4153 }
4154 
4155 static void
4156 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
4157              struct mlx5e_tc_flow_parse_attr *parse_attr,
4158              struct flow_cls_offload *f)
4159 {
4160     attr->parse_attr = parse_attr;
4161     attr->chain = f->common.chain_index;
4162     attr->prio = f->common.prio;
4163 }
4164 
4165 static void
4166 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
4167              struct mlx5e_priv *priv,
4168              struct mlx5e_tc_flow_parse_attr *parse_attr,
4169              struct flow_cls_offload *f,
4170              struct mlx5_eswitch_rep *in_rep,
4171              struct mlx5_core_dev *in_mdev)
4172 {
4173     struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4174     struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4175 
4176     mlx5e_flow_attr_init(attr, parse_attr, f);
4177 
4178     esw_attr->in_rep = in_rep;
4179     esw_attr->in_mdev = in_mdev;
4180 
4181     if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4182         MLX5_COUNTER_SOURCE_ESWITCH)
4183         esw_attr->counter_dev = in_mdev;
4184     else
4185         esw_attr->counter_dev = priv->mdev;
4186 }
4187 
4188 static struct mlx5e_tc_flow *
4189 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4190              struct flow_cls_offload *f,
4191              unsigned long flow_flags,
4192              struct net_device *filter_dev,
4193              struct mlx5_eswitch_rep *in_rep,
4194              struct mlx5_core_dev *in_mdev)
4195 {
4196     struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4197     struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4198     struct netlink_ext_ack *extack = f->common.extack;
4199     struct mlx5e_tc_flow_parse_attr *parse_attr;
4200     struct mlx5e_tc_flow *flow;
4201     int attr_size, err;
4202 
4203     flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4204     attr_size  = sizeof(struct mlx5_esw_flow_attr);
4205     err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4206                    &parse_attr, &flow);
4207     if (err)
4208         goto out;
4209 
4210     parse_attr->filter_dev = filter_dev;
4211     mlx5e_flow_esw_attr_init(flow->attr,
4212                  priv, parse_attr,
4213                  f, in_rep, in_mdev);
4214 
4215     err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4216                    f, filter_dev);
4217     if (err)
4218         goto err_free;
4219 
4220     /* actions validation depends on parsing the ct matches first */
4221     err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4222                    &flow->attr->ct_attr, extack);
4223     if (err)
4224         goto err_free;
4225 
4226     /* always set IP version for indirect table handling */
4227     flow->attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true);
4228 
4229     err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
4230     if (err)
4231         goto err_free;
4232 
4233     if (flow->attr->lag.count) {
4234         err = mlx5_lag_add_mpesw_rule(esw->dev);
4235         if (err)
4236             goto err_free;
4237     }
4238 
4239     err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4240     complete_all(&flow->init_done);
4241     if (err) {
4242         if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4243             goto err_lag;
4244 
4245         add_unready_flow(flow);
4246     }
4247 
4248     return flow;
4249 
4250 err_lag:
4251     if (flow->attr->lag.count)
4252         mlx5_lag_del_mpesw_rule(esw->dev);
4253 err_free:
4254     mlx5e_flow_put(priv, flow);
4255 out:
4256     return ERR_PTR(err);
4257 }
4258 
4259 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4260                       struct mlx5e_tc_flow *flow,
4261                       unsigned long flow_flags)
4262 {
4263     struct mlx5e_priv *priv = flow->priv, *peer_priv;
4264     struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
4265     struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4266     struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4267     struct mlx5e_tc_flow_parse_attr *parse_attr;
4268     struct mlx5e_rep_priv *peer_urpriv;
4269     struct mlx5e_tc_flow *peer_flow;
4270     struct mlx5_core_dev *in_mdev;
4271     int err = 0;
4272 
4273     peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4274     if (!peer_esw)
4275         return -ENODEV;
4276 
4277     peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4278     peer_priv = netdev_priv(peer_urpriv->netdev);
4279 
4280     /* in_mdev is assigned of which the packet originated from.
4281      * So packets redirected to uplink use the same mdev of the
4282      * original flow and packets redirected from uplink use the
4283      * peer mdev.
4284      */
4285     if (attr->in_rep->vport == MLX5_VPORT_UPLINK)
4286         in_mdev = peer_priv->mdev;
4287     else
4288         in_mdev = priv->mdev;
4289 
4290     parse_attr = flow->attr->parse_attr;
4291     peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4292                      parse_attr->filter_dev,
4293                      attr->in_rep, in_mdev);
4294     if (IS_ERR(peer_flow)) {
4295         err = PTR_ERR(peer_flow);
4296         goto out;
4297     }
4298 
4299     flow->peer_flow = peer_flow;
4300     flow_flag_set(flow, DUP);
4301     mutex_lock(&esw->offloads.peer_mutex);
4302     list_add_tail(&flow->peer, &esw->offloads.peer_flows);
4303     mutex_unlock(&esw->offloads.peer_mutex);
4304 
4305 out:
4306     mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4307     return err;
4308 }
4309 
4310 static int
4311 mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4312            struct flow_cls_offload *f,
4313            unsigned long flow_flags,
4314            struct net_device *filter_dev,
4315            struct mlx5e_tc_flow **__flow)
4316 {
4317     struct mlx5e_rep_priv *rpriv = priv->ppriv;
4318     struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4319     struct mlx5_core_dev *in_mdev = priv->mdev;
4320     struct mlx5e_tc_flow *flow;
4321     int err;
4322 
4323     flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4324                     in_mdev);
4325     if (IS_ERR(flow))
4326         return PTR_ERR(flow);
4327 
4328     if (is_peer_flow_needed(flow)) {
4329         err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
4330         if (err) {
4331             mlx5e_tc_del_fdb_flow(priv, flow);
4332             goto out;
4333         }
4334     }
4335 
4336     *__flow = flow;
4337 
4338     return 0;
4339 
4340 out:
4341     return err;
4342 }
4343 
4344 static int
4345 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4346            struct flow_cls_offload *f,
4347            unsigned long flow_flags,
4348            struct net_device *filter_dev,
4349            struct mlx5e_tc_flow **__flow)
4350 {
4351     struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4352     struct netlink_ext_ack *extack = f->common.extack;
4353     struct mlx5e_tc_flow_parse_attr *parse_attr;
4354     struct mlx5e_tc_flow *flow;
4355     int attr_size, err;
4356 
4357     if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
4358         if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4359             return -EOPNOTSUPP;
4360     } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
4361         return -EOPNOTSUPP;
4362     }
4363 
4364     flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4365     attr_size  = sizeof(struct mlx5_nic_flow_attr);
4366     err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4367                    &parse_attr, &flow);
4368     if (err)
4369         goto out;
4370 
4371     parse_attr->filter_dev = filter_dev;
4372     mlx5e_flow_attr_init(flow->attr, parse_attr, f);
4373 
4374     err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4375                    f, filter_dev);
4376     if (err)
4377         goto err_free;
4378 
4379     err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4380                    &flow->attr->ct_attr, extack);
4381     if (err)
4382         goto err_free;
4383 
4384     err = parse_tc_nic_actions(priv, &rule->action, flow, extack);
4385     if (err)
4386         goto err_free;
4387 
4388     err = mlx5e_tc_add_nic_flow(priv, flow, extack);
4389     if (err)
4390         goto err_free;
4391 
4392     flow_flag_set(flow, OFFLOADED);
4393     *__flow = flow;
4394 
4395     return 0;
4396 
4397 err_free:
4398     flow_flag_set(flow, FAILED);
4399     mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
4400     mlx5e_flow_put(priv, flow);
4401 out:
4402     return err;
4403 }
4404 
4405 static int
4406 mlx5e_tc_add_flow(struct mlx5e_priv *priv,
4407           struct flow_cls_offload *f,
4408           unsigned long flags,
4409           struct net_device *filter_dev,
4410           struct mlx5e_tc_flow **flow)
4411 {
4412     struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4413     unsigned long flow_flags;
4414     int err;
4415 
4416     get_flags(flags, &flow_flags);
4417 
4418     if (!tc_can_offload_extack(priv->netdev, f->common.extack))
4419         return -EOPNOTSUPP;
4420 
4421     if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
4422         err = mlx5e_add_fdb_flow(priv, f, flow_flags,
4423                      filter_dev, flow);
4424     else
4425         err = mlx5e_add_nic_flow(priv, f, flow_flags,
4426                      filter_dev, flow);
4427 
4428     return err;
4429 }
4430 
4431 static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
4432                        struct mlx5e_rep_priv *rpriv)
4433 {
4434     /* Offloaded flow rule is allowed to duplicate on non-uplink representor
4435      * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
4436      * function is called from NIC mode.
4437      */
4438     return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
4439 }
4440 
4441 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
4442                struct flow_cls_offload *f, unsigned long flags)
4443 {
4444     struct netlink_ext_ack *extack = f->common.extack;
4445     struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4446     struct mlx5e_rep_priv *rpriv = priv->ppriv;
4447     struct mlx5e_tc_flow *flow;
4448     int err = 0;
4449 
4450     if (!mlx5_esw_hold(priv->mdev))
4451         return -EAGAIN;
4452 
4453     mlx5_esw_get(priv->mdev);
4454 
4455     rcu_read_lock();
4456     flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4457     if (flow) {
4458         /* Same flow rule offloaded to non-uplink representor sharing tc block,
4459          * just return 0.
4460          */
4461         if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
4462             goto rcu_unlock;
4463 
4464         NL_SET_ERR_MSG_MOD(extack,
4465                    "flow cookie already exists, ignoring");
4466         netdev_warn_once(priv->netdev,
4467                  "flow cookie %lx already exists, ignoring\n",
4468                  f->cookie);
4469         err = -EEXIST;
4470         goto rcu_unlock;
4471     }
4472 rcu_unlock:
4473     rcu_read_unlock();
4474     if (flow)
4475         goto out;
4476 
4477     trace_mlx5e_configure_flower(f);
4478     err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
4479     if (err)
4480         goto out;
4481 
4482     /* Flow rule offloaded to non-uplink representor sharing tc block,
4483      * set the flow's owner dev.
4484      */
4485     if (is_flow_rule_duplicate_allowed(dev, rpriv))
4486         flow->orig_dev = dev;
4487 
4488     err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
4489     if (err)
4490         goto err_free;
4491 
4492     mlx5_esw_release(priv->mdev);
4493     return 0;
4494 
4495 err_free:
4496     mlx5e_flow_put(priv, flow);
4497 out:
4498     mlx5_esw_put(priv->mdev);
4499     mlx5_esw_release(priv->mdev);
4500     return err;
4501 }
4502 
4503 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
4504 {
4505     bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
4506     bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
4507 
4508     return flow_flag_test(flow, INGRESS) == dir_ingress &&
4509         flow_flag_test(flow, EGRESS) == dir_egress;
4510 }
4511 
4512 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
4513             struct flow_cls_offload *f, unsigned long flags)
4514 {
4515     struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4516     struct mlx5e_tc_flow *flow;
4517     int err;
4518 
4519     rcu_read_lock();
4520     flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4521     if (!flow || !same_flow_direction(flow, flags)) {
4522         err = -EINVAL;
4523         goto errout;
4524     }
4525 
4526     /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4527      * set.
4528      */
4529     if (flow_flag_test_and_set(flow, DELETED)) {
4530         err = -EINVAL;
4531         goto errout;
4532     }
4533     rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4534     rcu_read_unlock();
4535 
4536     trace_mlx5e_delete_flower(f);
4537     mlx5e_flow_put(priv, flow);
4538 
4539     mlx5_esw_put(priv->mdev);
4540     return 0;
4541 
4542 errout:
4543     rcu_read_unlock();
4544     return err;
4545 }
4546 
4547 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
4548                struct flow_cls_offload *f, unsigned long flags)
4549 {
4550     struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4551     struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4552     struct mlx5_eswitch *peer_esw;
4553     struct mlx5e_tc_flow *flow;
4554     struct mlx5_fc *counter;
4555     u64 lastuse = 0;
4556     u64 packets = 0;
4557     u64 bytes = 0;
4558     int err = 0;
4559 
4560     rcu_read_lock();
4561     flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
4562                         tc_ht_params));
4563     rcu_read_unlock();
4564     if (IS_ERR(flow))
4565         return PTR_ERR(flow);
4566 
4567     if (!same_flow_direction(flow, flags)) {
4568         err = -EINVAL;
4569         goto errout;
4570     }
4571 
4572     if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
4573         counter = mlx5e_tc_get_counter(flow);
4574         if (!counter)
4575             goto errout;
4576 
4577         mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
4578     }
4579 
4580     /* Under multipath it's possible for one rule to be currently
4581      * un-offloaded while the other rule is offloaded.
4582      */
4583     peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4584     if (!peer_esw)
4585         goto out;
4586 
4587     if (flow_flag_test(flow, DUP) &&
4588         flow_flag_test(flow->peer_flow, OFFLOADED)) {
4589         u64 bytes2;
4590         u64 packets2;
4591         u64 lastuse2;
4592 
4593         counter = mlx5e_tc_get_counter(flow->peer_flow);
4594         if (!counter)
4595             goto no_peer_counter;
4596         mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
4597 
4598         bytes += bytes2;
4599         packets += packets2;
4600         lastuse = max_t(u64, lastuse, lastuse2);
4601     }
4602 
4603 no_peer_counter:
4604     mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4605 out:
4606     flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
4607               FLOW_ACTION_HW_STATS_DELAYED);
4608     trace_mlx5e_stats_flower(f);
4609 errout:
4610     mlx5e_flow_put(priv, flow);
4611     return err;
4612 }
4613 
4614 static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
4615                    struct netlink_ext_ack *extack)
4616 {
4617     struct mlx5e_rep_priv *rpriv = priv->ppriv;
4618     struct mlx5_eswitch *esw;
4619     u32 rate_mbps = 0;
4620     u16 vport_num;
4621     int err;
4622 
4623     vport_num = rpriv->rep->vport;
4624     if (vport_num >= MLX5_VPORT_ECPF) {
4625         NL_SET_ERR_MSG_MOD(extack,
4626                    "Ingress rate limit is supported only for Eswitch ports connected to VFs");
4627         return -EOPNOTSUPP;
4628     }
4629 
4630     esw = priv->mdev->priv.eswitch;
4631     /* rate is given in bytes/sec.
4632      * First convert to bits/sec and then round to the nearest mbit/secs.
4633      * mbit means million bits.
4634      * Moreover, if rate is non zero we choose to configure to a minimum of
4635      * 1 mbit/sec.
4636      */
4637     if (rate) {
4638         rate = (rate * BITS_PER_BYTE) + 500000;
4639         do_div(rate, 1000000);
4640         rate_mbps = max_t(u32, rate, 1);
4641     }
4642 
4643     err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps);
4644     if (err)
4645         NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
4646 
4647     return err;
4648 }
4649 
4650 int mlx5e_policer_validate(const struct flow_action *action,
4651                const struct flow_action_entry *act,
4652                struct netlink_ext_ack *extack)
4653 {
4654     if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
4655         NL_SET_ERR_MSG_MOD(extack,
4656                    "Offload not supported when exceed action is not drop");
4657         return -EOPNOTSUPP;
4658     }
4659 
4660     if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT &&
4661         !flow_action_is_last_entry(action, act)) {
4662         NL_SET_ERR_MSG_MOD(extack,
4663                    "Offload not supported when conform action is ok, but action is not last");
4664         return -EOPNOTSUPP;
4665     }
4666 
4667     if (act->police.peakrate_bytes_ps ||
4668         act->police.avrate || act->police.overhead) {
4669         NL_SET_ERR_MSG_MOD(extack,
4670                    "Offload not supported when peakrate/avrate/overhead is configured");
4671         return -EOPNOTSUPP;
4672     }
4673 
4674     if (act->police.rate_pkt_ps) {
4675         NL_SET_ERR_MSG_MOD(extack,
4676                    "QoS offload not support packets per second");
4677         return -EOPNOTSUPP;
4678     }
4679 
4680     return 0;
4681 }
4682 
4683 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
4684                     struct flow_action *flow_action,
4685                     struct netlink_ext_ack *extack)
4686 {
4687     struct mlx5e_rep_priv *rpriv = priv->ppriv;
4688     const struct flow_action_entry *act;
4689     int err;
4690     int i;
4691 
4692     if (!flow_action_has_entries(flow_action)) {
4693         NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
4694         return -EINVAL;
4695     }
4696 
4697     if (!flow_offload_has_one_action(flow_action)) {
4698         NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
4699         return -EOPNOTSUPP;
4700     }
4701 
4702     if (!flow_action_basic_hw_stats_check(flow_action, extack)) {
4703         NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
4704         return -EOPNOTSUPP;
4705     }
4706 
4707     flow_action_for_each(i, act, flow_action) {
4708         switch (act->id) {
4709         case FLOW_ACTION_POLICE:
4710             if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) {
4711                 NL_SET_ERR_MSG_MOD(extack,
4712                            "Offload not supported when conform action is not continue");
4713                 return -EOPNOTSUPP;
4714             }
4715 
4716             err = mlx5e_policer_validate(flow_action, act, extack);
4717             if (err)
4718                 return err;
4719 
4720             err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
4721             if (err)
4722                 return err;
4723 
4724             rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
4725             break;
4726         default:
4727             NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
4728             return -EOPNOTSUPP;
4729         }
4730     }
4731 
4732     return 0;
4733 }
4734 
4735 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
4736                 struct tc_cls_matchall_offload *ma)
4737 {
4738     struct netlink_ext_ack *extack = ma->common.extack;
4739 
4740     if (ma->common.prio != 1) {
4741         NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
4742         return -EINVAL;
4743     }
4744 
4745     return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
4746 }
4747 
4748 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
4749                  struct tc_cls_matchall_offload *ma)
4750 {
4751     struct netlink_ext_ack *extack = ma->common.extack;
4752 
4753     return apply_police_params(priv, 0, extack);
4754 }
4755 
4756 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
4757                  struct tc_cls_matchall_offload *ma)
4758 {
4759     struct mlx5e_rep_priv *rpriv = priv->ppriv;
4760     struct rtnl_link_stats64 cur_stats;
4761     u64 dbytes;
4762     u64 dpkts;
4763 
4764     cur_stats = priv->stats.vf_vport;
4765     dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
4766     dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
4767     rpriv->prev_vf_vport_stats = cur_stats;
4768     flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
4769               FLOW_ACTION_HW_STATS_DELAYED);
4770 }
4771 
4772 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
4773                           struct mlx5e_priv *peer_priv)
4774 {
4775     struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
4776     struct mlx5e_hairpin_entry *hpe, *tmp;
4777     LIST_HEAD(init_wait_list);
4778     u16 peer_vhca_id;
4779     int bkt;
4780 
4781     if (!mlx5e_same_hw_devs(priv, peer_priv))
4782         return;
4783 
4784     peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
4785 
4786     mutex_lock(&priv->fs->tc->hairpin_tbl_lock);
4787     hash_for_each(priv->fs->tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
4788         if (refcount_inc_not_zero(&hpe->refcnt))
4789             list_add(&hpe->dead_peer_wait_list, &init_wait_list);
4790     mutex_unlock(&priv->fs->tc->hairpin_tbl_lock);
4791 
4792     list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
4793         wait_for_completion(&hpe->res_ready);
4794         if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
4795             mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
4796 
4797         mlx5e_hairpin_put(priv, hpe);
4798     }
4799 }
4800 
4801 static int mlx5e_tc_netdev_event(struct notifier_block *this,
4802                  unsigned long event, void *ptr)
4803 {
4804     struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
4805     struct mlx5e_priv *peer_priv;
4806     struct mlx5e_tc_table *tc;
4807     struct mlx5e_priv *priv;
4808 
4809     if (ndev->netdev_ops != &mlx5e_netdev_ops ||
4810         event != NETDEV_UNREGISTER ||
4811         ndev->reg_state == NETREG_REGISTERED)
4812         return NOTIFY_DONE;
4813 
4814     tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
4815     priv = tc->priv;
4816     peer_priv = netdev_priv(ndev);
4817     if (priv == peer_priv ||
4818         !(priv->netdev->features & NETIF_F_HW_TC))
4819         return NOTIFY_DONE;
4820 
4821     mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
4822 
4823     return NOTIFY_DONE;
4824 }
4825 
4826 static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
4827 {
4828     int tc_grp_size, tc_tbl_size;
4829     u32 max_flow_counter;
4830 
4831     max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
4832                 MLX5_CAP_GEN(dev, max_flow_counter_15_0);
4833 
4834     tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
4835 
4836     tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
4837                 BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
4838 
4839     return tc_tbl_size;
4840 }
4841 
4842 static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
4843 {
4844     struct mlx5_flow_table **ft = &priv->fs->tc->miss_t;
4845     struct mlx5_flow_table_attr ft_attr = {};
4846     struct mlx5_flow_namespace *ns;
4847     int err = 0;
4848 
4849     ft_attr.max_fte = 1;
4850     ft_attr.autogroup.max_num_groups = 1;
4851     ft_attr.level = MLX5E_TC_MISS_LEVEL;
4852     ft_attr.prio = 0;
4853     ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
4854 
4855     *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
4856     if (IS_ERR(*ft)) {
4857         err = PTR_ERR(*ft);
4858         netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err);
4859     }
4860 
4861     return err;
4862 }
4863 
4864 static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv)
4865 {
4866     mlx5_destroy_flow_table(priv->fs->tc->miss_t);
4867 }
4868 
4869 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
4870 {
4871     struct mlx5e_tc_table *tc = priv->fs->tc;
4872     struct mlx5_core_dev *dev = priv->mdev;
4873     struct mapping_ctx *chains_mapping;
4874     struct mlx5_chains_attr attr = {};
4875     u64 mapping_id;
4876     int err;
4877 
4878     mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
4879     mutex_init(&tc->t_lock);
4880     mutex_init(&tc->hairpin_tbl_lock);
4881     hash_init(tc->hairpin_tbl);
4882     tc->priv = priv;
4883 
4884     err = rhashtable_init(&tc->ht, &tc_ht_params);
4885     if (err)
4886         return err;
4887 
4888     lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
4889 
4890     mapping_id = mlx5_query_nic_system_image_guid(dev);
4891 
4892     chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
4893                            sizeof(struct mlx5_mapped_obj),
4894                            MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
4895 
4896     if (IS_ERR(chains_mapping)) {
4897         err = PTR_ERR(chains_mapping);
4898         goto err_mapping;
4899     }
4900     tc->mapping = chains_mapping;
4901 
4902     err = mlx5e_tc_nic_create_miss_table(priv);
4903     if (err)
4904         goto err_chains;
4905 
4906     if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
4907         attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
4908             MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
4909     attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
4910     attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
4911     attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
4912     attr.default_ft = priv->fs->tc->miss_t;
4913     attr.mapping = chains_mapping;
4914 
4915     tc->chains = mlx5_chains_create(dev, &attr);
4916     if (IS_ERR(tc->chains)) {
4917         err = PTR_ERR(tc->chains);
4918         goto err_miss;
4919     }
4920 
4921     tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
4922     tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr,
4923                  MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
4924 
4925     tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
4926     err = register_netdevice_notifier_dev_net(priv->netdev,
4927                           &tc->netdevice_nb,
4928                           &tc->netdevice_nn);
4929     if (err) {
4930         tc->netdevice_nb.notifier_call = NULL;
4931         mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
4932         goto err_reg;
4933     }
4934 
4935     return 0;
4936 
4937 err_reg:
4938     mlx5_tc_ct_clean(tc->ct);
4939     mlx5e_tc_post_act_destroy(tc->post_act);
4940     mlx5_chains_destroy(tc->chains);
4941 err_miss:
4942     mlx5e_tc_nic_destroy_miss_table(priv);
4943 err_chains:
4944     mapping_destroy(chains_mapping);
4945 err_mapping:
4946     rhashtable_destroy(&tc->ht);
4947     return err;
4948 }
4949 
4950 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
4951 {
4952     struct mlx5e_tc_flow *flow = ptr;
4953     struct mlx5e_priv *priv = flow->priv;
4954 
4955     mlx5e_tc_del_flow(priv, flow);
4956     kfree(flow);
4957 }
4958 
4959 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
4960 {
4961     struct mlx5e_tc_table *tc = priv->fs->tc;
4962 
4963     if (tc->netdevice_nb.notifier_call)
4964         unregister_netdevice_notifier_dev_net(priv->netdev,
4965                               &tc->netdevice_nb,
4966                               &tc->netdevice_nn);
4967 
4968     mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
4969     mutex_destroy(&tc->hairpin_tbl_lock);
4970 
4971     rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
4972 
4973     if (!IS_ERR_OR_NULL(tc->t)) {
4974         mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
4975         tc->t = NULL;
4976     }
4977     mutex_destroy(&tc->t_lock);
4978 
4979     mlx5_tc_ct_clean(tc->ct);
4980     mlx5e_tc_post_act_destroy(tc->post_act);
4981     mapping_destroy(tc->mapping);
4982     mlx5_chains_destroy(tc->chains);
4983     mlx5e_tc_nic_destroy_miss_table(priv);
4984 }
4985 
4986 int mlx5e_tc_ht_init(struct rhashtable *tc_ht)
4987 {
4988     int err;
4989 
4990     err = rhashtable_init(tc_ht, &tc_ht_params);
4991     if (err)
4992         return err;
4993 
4994     lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
4995 
4996     return 0;
4997 }
4998 
4999 void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht)
5000 {
5001     rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
5002 }
5003 
5004 int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
5005 {
5006     const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
5007     struct mlx5e_rep_priv *rpriv;
5008     struct mapping_ctx *mapping;
5009     struct mlx5_eswitch *esw;
5010     struct mlx5e_priv *priv;
5011     u64 mapping_id;
5012     int err = 0;
5013 
5014     rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5015     priv = netdev_priv(rpriv->netdev);
5016     esw = priv->mdev->priv.eswitch;
5017 
5018     uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw),
5019                                MLX5_FLOW_NAMESPACE_FDB);
5020     uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
5021                            esw_chains(esw),
5022                            &esw->offloads.mod_hdr,
5023                            MLX5_FLOW_NAMESPACE_FDB,
5024                            uplink_priv->post_act);
5025 
5026     uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev));
5027 
5028     uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
5029 
5030     mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
5031 
5032     mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL,
5033                     sizeof(struct tunnel_match_key),
5034                     TUNNEL_INFO_BITS_MASK, true);
5035 
5036     if (IS_ERR(mapping)) {
5037         err = PTR_ERR(mapping);
5038         goto err_tun_mapping;
5039     }
5040     uplink_priv->tunnel_mapping = mapping;
5041 
5042     /* Two last values are reserved for stack devices slow path table mark
5043      * and bridge ingress push mark.
5044      */
5045     mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS,
5046                     sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true);
5047     if (IS_ERR(mapping)) {
5048         err = PTR_ERR(mapping);
5049         goto err_enc_opts_mapping;
5050     }
5051     uplink_priv->tunnel_enc_opts_mapping = mapping;
5052 
5053     uplink_priv->encap = mlx5e_tc_tun_init(priv);
5054     if (IS_ERR(uplink_priv->encap)) {
5055         err = PTR_ERR(uplink_priv->encap);
5056         goto err_register_fib_notifier;
5057     }
5058 
5059     return 0;
5060 
5061 err_register_fib_notifier:
5062     mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5063 err_enc_opts_mapping:
5064     mapping_destroy(uplink_priv->tunnel_mapping);
5065 err_tun_mapping:
5066     mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
5067     mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
5068     mlx5_tc_ct_clean(uplink_priv->ct_priv);
5069     netdev_warn(priv->netdev,
5070             "Failed to initialize tc (eswitch), err: %d", err);
5071     mlx5e_tc_post_act_destroy(uplink_priv->post_act);
5072     return err;
5073 }
5074 
5075 void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv)
5076 {
5077     mlx5e_tc_tun_cleanup(uplink_priv->encap);
5078 
5079     mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5080     mapping_destroy(uplink_priv->tunnel_mapping);
5081 
5082     mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
5083     mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
5084     mlx5_tc_ct_clean(uplink_priv->ct_priv);
5085     mlx5e_flow_meters_cleanup(uplink_priv->flow_meters);
5086     mlx5e_tc_post_act_destroy(uplink_priv->post_act);
5087 }
5088 
5089 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
5090 {
5091     struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5092 
5093     return atomic_read(&tc_ht->nelems);
5094 }
5095 
5096 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
5097 {
5098     struct mlx5e_tc_flow *flow, *tmp;
5099 
5100     list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
5101         __mlx5e_tc_del_fdb_peer_flow(flow);
5102 }
5103 
5104 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
5105 {
5106     struct mlx5_rep_uplink_priv *rpriv =
5107         container_of(work, struct mlx5_rep_uplink_priv,
5108                  reoffload_flows_work);
5109     struct mlx5e_tc_flow *flow, *tmp;
5110 
5111     mutex_lock(&rpriv->unready_flows_lock);
5112     list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
5113         if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
5114             unready_flow_del(flow);
5115     }
5116     mutex_unlock(&rpriv->unready_flows_lock);
5117 }
5118 
5119 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
5120                      struct flow_cls_offload *cls_flower,
5121                      unsigned long flags)
5122 {
5123     switch (cls_flower->command) {
5124     case FLOW_CLS_REPLACE:
5125         return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
5126                           flags);
5127     case FLOW_CLS_DESTROY:
5128         return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
5129                        flags);
5130     case FLOW_CLS_STATS:
5131         return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
5132                       flags);
5133     default:
5134         return -EOPNOTSUPP;
5135     }
5136 }
5137 
5138 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
5139                 void *cb_priv)
5140 {
5141     unsigned long flags = MLX5_TC_FLAG(INGRESS);
5142     struct mlx5e_priv *priv = cb_priv;
5143 
5144     if (!priv->netdev || !netif_device_present(priv->netdev))
5145         return -EOPNOTSUPP;
5146 
5147     if (mlx5e_is_uplink_rep(priv))
5148         flags |= MLX5_TC_FLAG(ESW_OFFLOAD);
5149     else
5150         flags |= MLX5_TC_FLAG(NIC_OFFLOAD);
5151 
5152     switch (type) {
5153     case TC_SETUP_CLSFLOWER:
5154         return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
5155     default:
5156         return -EOPNOTSUPP;
5157     }
5158 }
5159 
5160 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
5161              struct sk_buff *skb)
5162 {
5163 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
5164     u32 chain = 0, chain_tag, reg_b, zone_restore_id;
5165     struct mlx5e_priv *priv = netdev_priv(skb->dev);
5166     struct mlx5e_tc_table *tc = priv->fs->tc;
5167     struct mlx5_mapped_obj mapped_obj;
5168     struct tc_skb_ext *tc_skb_ext;
5169     int err;
5170 
5171     reg_b = be32_to_cpu(cqe->ft_metadata);
5172 
5173     chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5174 
5175     err = mapping_find(tc->mapping, chain_tag, &mapped_obj);
5176     if (err) {
5177         netdev_dbg(priv->netdev,
5178                "Couldn't find chain for chain tag: %d, err: %d\n",
5179                chain_tag, err);
5180         return false;
5181     }
5182 
5183     if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
5184         chain = mapped_obj.chain;
5185         tc_skb_ext = tc_skb_ext_alloc(skb);
5186         if (WARN_ON(!tc_skb_ext))
5187             return false;
5188 
5189         tc_skb_ext->chain = chain;
5190 
5191         zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) &
5192             ESW_ZONE_ID_MASK;
5193 
5194         if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
5195                           zone_restore_id))
5196             return false;
5197     } else {
5198         netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
5199         return false;
5200     }
5201 #endif /* CONFIG_NET_TC_SKB_EXT */
5202 
5203     return true;
5204 }