Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
0002 /*
0003  * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
0004  */
0005 
0006 #include <rdma/ib_user_verbs.h>
0007 #include <rdma/ib_verbs.h>
0008 #include <rdma/uverbs_types.h>
0009 #include <rdma/uverbs_ioctl.h>
0010 #include <rdma/uverbs_std_types.h>
0011 #include <rdma/mlx5_user_ioctl_cmds.h>
0012 #include <rdma/mlx5_user_ioctl_verbs.h>
0013 #include <rdma/ib_hdrs.h>
0014 #include <rdma/ib_umem.h>
0015 #include <linux/mlx5/driver.h>
0016 #include <linux/mlx5/fs.h>
0017 #include <linux/mlx5/fs_helpers.h>
0018 #include <linux/mlx5/eswitch.h>
0019 #include <net/inet_ecn.h>
0020 #include "mlx5_ib.h"
0021 #include "counters.h"
0022 #include "devx.h"
0023 #include "fs.h"
0024 
0025 #define UVERBS_MODULE_NAME mlx5_ib
0026 #include <rdma/uverbs_named_ioctl.h>
0027 
0028 enum {
0029     MATCH_CRITERIA_ENABLE_OUTER_BIT,
0030     MATCH_CRITERIA_ENABLE_MISC_BIT,
0031     MATCH_CRITERIA_ENABLE_INNER_BIT,
0032     MATCH_CRITERIA_ENABLE_MISC2_BIT
0033 };
0034 
0035 #define HEADER_IS_ZERO(match_criteria, headers)                    \
0036     !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
0037             0, MLX5_FLD_SZ_BYTES(fte_match_param, headers)))       \
0038 
0039 static u8 get_match_criteria_enable(u32 *match_criteria)
0040 {
0041     u8 match_criteria_enable;
0042 
0043     match_criteria_enable =
0044         (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
0045         MATCH_CRITERIA_ENABLE_OUTER_BIT;
0046     match_criteria_enable |=
0047         (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
0048         MATCH_CRITERIA_ENABLE_MISC_BIT;
0049     match_criteria_enable |=
0050         (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
0051         MATCH_CRITERIA_ENABLE_INNER_BIT;
0052     match_criteria_enable |=
0053         (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
0054         MATCH_CRITERIA_ENABLE_MISC2_BIT;
0055 
0056     return match_criteria_enable;
0057 }
0058 
0059 static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
0060 {
0061     u8 entry_mask;
0062     u8 entry_val;
0063     int err = 0;
0064 
0065     if (!mask)
0066         goto out;
0067 
0068     entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
0069                   ip_protocol);
0070     entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
0071                  ip_protocol);
0072     if (!entry_mask) {
0073         MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
0074         MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
0075         goto out;
0076     }
0077     /* Don't override existing ip protocol */
0078     if (mask != entry_mask || val != entry_val)
0079         err = -EINVAL;
0080 out:
0081     return err;
0082 }
0083 
0084 static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
0085                bool inner)
0086 {
0087     if (inner) {
0088         MLX5_SET(fte_match_set_misc,
0089              misc_c, inner_ipv6_flow_label, mask);
0090         MLX5_SET(fte_match_set_misc,
0091              misc_v, inner_ipv6_flow_label, val);
0092     } else {
0093         MLX5_SET(fte_match_set_misc,
0094              misc_c, outer_ipv6_flow_label, mask);
0095         MLX5_SET(fte_match_set_misc,
0096              misc_v, outer_ipv6_flow_label, val);
0097     }
0098 }
0099 
0100 static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
0101 {
0102     MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
0103     MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
0104     MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
0105     MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
0106 }
0107 
0108 static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
0109 {
0110     if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
0111         !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
0112         return -EOPNOTSUPP;
0113 
0114     if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
0115         !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
0116         return -EOPNOTSUPP;
0117 
0118     if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
0119         !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
0120         return -EOPNOTSUPP;
0121 
0122     if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
0123         !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
0124         return -EOPNOTSUPP;
0125 
0126     return 0;
0127 }
0128 
0129 #define LAST_ETH_FIELD vlan_tag
0130 #define LAST_IB_FIELD sl
0131 #define LAST_IPV4_FIELD tos
0132 #define LAST_IPV6_FIELD traffic_class
0133 #define LAST_TCP_UDP_FIELD src_port
0134 #define LAST_TUNNEL_FIELD tunnel_id
0135 #define LAST_FLOW_TAG_FIELD tag_id
0136 #define LAST_DROP_FIELD size
0137 #define LAST_COUNTERS_FIELD counters
0138 
0139 /* Field is the last supported field */
0140 #define FIELDS_NOT_SUPPORTED(filter, field)                                    \
0141     memchr_inv((void *)&filter.field + sizeof(filter.field), 0,            \
0142            sizeof(filter) - offsetofend(typeof(filter), field))
0143 
0144 int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
0145                bool is_egress,
0146                struct mlx5_flow_act *action)
0147 {
0148 
0149     switch (maction->ib_action.type) {
0150     case IB_FLOW_ACTION_UNSPECIFIED:
0151         if (maction->flow_action_raw.sub_type ==
0152             MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
0153             if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
0154                 return -EINVAL;
0155             action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
0156             action->modify_hdr =
0157                 maction->flow_action_raw.modify_hdr;
0158             return 0;
0159         }
0160         if (maction->flow_action_raw.sub_type ==
0161             MLX5_IB_FLOW_ACTION_DECAP) {
0162             if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
0163                 return -EINVAL;
0164             action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
0165             return 0;
0166         }
0167         if (maction->flow_action_raw.sub_type ==
0168             MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
0169             if (action->action &
0170                 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
0171                 return -EINVAL;
0172             action->action |=
0173                 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
0174             action->pkt_reformat =
0175                 maction->flow_action_raw.pkt_reformat;
0176             return 0;
0177         }
0178         fallthrough;
0179     default:
0180         return -EOPNOTSUPP;
0181     }
0182 }
0183 
0184 static int parse_flow_attr(struct mlx5_core_dev *mdev,
0185                struct mlx5_flow_spec *spec,
0186                const union ib_flow_spec *ib_spec,
0187                const struct ib_flow_attr *flow_attr,
0188                struct mlx5_flow_act *action, u32 prev_type)
0189 {
0190     struct mlx5_flow_context *flow_context = &spec->flow_context;
0191     u32 *match_c = spec->match_criteria;
0192     u32 *match_v = spec->match_value;
0193     void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
0194                        misc_parameters);
0195     void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
0196                        misc_parameters);
0197     void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
0198                         misc_parameters_2);
0199     void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
0200                         misc_parameters_2);
0201     void *headers_c;
0202     void *headers_v;
0203     int match_ipv;
0204     int ret;
0205 
0206     if (ib_spec->type & IB_FLOW_SPEC_INNER) {
0207         headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
0208                      inner_headers);
0209         headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
0210                      inner_headers);
0211         match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
0212                     ft_field_support.inner_ip_version);
0213     } else {
0214         headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
0215                      outer_headers);
0216         headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
0217                      outer_headers);
0218         match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
0219                     ft_field_support.outer_ip_version);
0220     }
0221 
0222     switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
0223     case IB_FLOW_SPEC_ETH:
0224         if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
0225             return -EOPNOTSUPP;
0226 
0227         ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
0228                          dmac_47_16),
0229                 ib_spec->eth.mask.dst_mac);
0230         ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
0231                          dmac_47_16),
0232                 ib_spec->eth.val.dst_mac);
0233 
0234         ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
0235                          smac_47_16),
0236                 ib_spec->eth.mask.src_mac);
0237         ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
0238                          smac_47_16),
0239                 ib_spec->eth.val.src_mac);
0240 
0241         if (ib_spec->eth.mask.vlan_tag) {
0242             MLX5_SET(fte_match_set_lyr_2_4, headers_c,
0243                  cvlan_tag, 1);
0244             MLX5_SET(fte_match_set_lyr_2_4, headers_v,
0245                  cvlan_tag, 1);
0246 
0247             MLX5_SET(fte_match_set_lyr_2_4, headers_c,
0248                  first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
0249             MLX5_SET(fte_match_set_lyr_2_4, headers_v,
0250                  first_vid, ntohs(ib_spec->eth.val.vlan_tag));
0251 
0252             MLX5_SET(fte_match_set_lyr_2_4, headers_c,
0253                  first_cfi,
0254                  ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
0255             MLX5_SET(fte_match_set_lyr_2_4, headers_v,
0256                  first_cfi,
0257                  ntohs(ib_spec->eth.val.vlan_tag) >> 12);
0258 
0259             MLX5_SET(fte_match_set_lyr_2_4, headers_c,
0260                  first_prio,
0261                  ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
0262             MLX5_SET(fte_match_set_lyr_2_4, headers_v,
0263                  first_prio,
0264                  ntohs(ib_spec->eth.val.vlan_tag) >> 13);
0265         }
0266         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
0267              ethertype, ntohs(ib_spec->eth.mask.ether_type));
0268         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
0269              ethertype, ntohs(ib_spec->eth.val.ether_type));
0270         break;
0271     case IB_FLOW_SPEC_IPV4:
0272         if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
0273             return -EOPNOTSUPP;
0274 
0275         if (match_ipv) {
0276             MLX5_SET(fte_match_set_lyr_2_4, headers_c,
0277                  ip_version, 0xf);
0278             MLX5_SET(fte_match_set_lyr_2_4, headers_v,
0279                  ip_version, MLX5_FS_IPV4_VERSION);
0280         } else {
0281             MLX5_SET(fte_match_set_lyr_2_4, headers_c,
0282                  ethertype, 0xffff);
0283             MLX5_SET(fte_match_set_lyr_2_4, headers_v,
0284                  ethertype, ETH_P_IP);
0285         }
0286 
0287         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
0288                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
0289                &ib_spec->ipv4.mask.src_ip,
0290                sizeof(ib_spec->ipv4.mask.src_ip));
0291         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
0292                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
0293                &ib_spec->ipv4.val.src_ip,
0294                sizeof(ib_spec->ipv4.val.src_ip));
0295         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
0296                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
0297                &ib_spec->ipv4.mask.dst_ip,
0298                sizeof(ib_spec->ipv4.mask.dst_ip));
0299         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
0300                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
0301                &ib_spec->ipv4.val.dst_ip,
0302                sizeof(ib_spec->ipv4.val.dst_ip));
0303 
0304         set_tos(headers_c, headers_v,
0305             ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
0306 
0307         if (set_proto(headers_c, headers_v,
0308                   ib_spec->ipv4.mask.proto,
0309                   ib_spec->ipv4.val.proto))
0310             return -EINVAL;
0311         break;
0312     case IB_FLOW_SPEC_IPV6:
0313         if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
0314             return -EOPNOTSUPP;
0315 
0316         if (match_ipv) {
0317             MLX5_SET(fte_match_set_lyr_2_4, headers_c,
0318                  ip_version, 0xf);
0319             MLX5_SET(fte_match_set_lyr_2_4, headers_v,
0320                  ip_version, MLX5_FS_IPV6_VERSION);
0321         } else {
0322             MLX5_SET(fte_match_set_lyr_2_4, headers_c,
0323                  ethertype, 0xffff);
0324             MLX5_SET(fte_match_set_lyr_2_4, headers_v,
0325                  ethertype, ETH_P_IPV6);
0326         }
0327 
0328         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
0329                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
0330                &ib_spec->ipv6.mask.src_ip,
0331                sizeof(ib_spec->ipv6.mask.src_ip));
0332         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
0333                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
0334                &ib_spec->ipv6.val.src_ip,
0335                sizeof(ib_spec->ipv6.val.src_ip));
0336         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
0337                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
0338                &ib_spec->ipv6.mask.dst_ip,
0339                sizeof(ib_spec->ipv6.mask.dst_ip));
0340         memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
0341                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
0342                &ib_spec->ipv6.val.dst_ip,
0343                sizeof(ib_spec->ipv6.val.dst_ip));
0344 
0345         set_tos(headers_c, headers_v,
0346             ib_spec->ipv6.mask.traffic_class,
0347             ib_spec->ipv6.val.traffic_class);
0348 
0349         if (set_proto(headers_c, headers_v,
0350                   ib_spec->ipv6.mask.next_hdr,
0351                   ib_spec->ipv6.val.next_hdr))
0352             return -EINVAL;
0353 
0354         set_flow_label(misc_params_c, misc_params_v,
0355                    ntohl(ib_spec->ipv6.mask.flow_label),
0356                    ntohl(ib_spec->ipv6.val.flow_label),
0357                    ib_spec->type & IB_FLOW_SPEC_INNER);
0358         break;
0359     case IB_FLOW_SPEC_ESP:
0360         return -EOPNOTSUPP;
0361     case IB_FLOW_SPEC_TCP:
0362         if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
0363                      LAST_TCP_UDP_FIELD))
0364             return -EOPNOTSUPP;
0365 
0366         if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
0367             return -EINVAL;
0368 
0369         MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
0370              ntohs(ib_spec->tcp_udp.mask.src_port));
0371         MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
0372              ntohs(ib_spec->tcp_udp.val.src_port));
0373 
0374         MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
0375              ntohs(ib_spec->tcp_udp.mask.dst_port));
0376         MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
0377              ntohs(ib_spec->tcp_udp.val.dst_port));
0378         break;
0379     case IB_FLOW_SPEC_UDP:
0380         if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
0381                      LAST_TCP_UDP_FIELD))
0382             return -EOPNOTSUPP;
0383 
0384         if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
0385             return -EINVAL;
0386 
0387         MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
0388              ntohs(ib_spec->tcp_udp.mask.src_port));
0389         MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
0390              ntohs(ib_spec->tcp_udp.val.src_port));
0391 
0392         MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
0393              ntohs(ib_spec->tcp_udp.mask.dst_port));
0394         MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
0395              ntohs(ib_spec->tcp_udp.val.dst_port));
0396         break;
0397     case IB_FLOW_SPEC_GRE:
0398         if (ib_spec->gre.mask.c_ks_res0_ver)
0399             return -EOPNOTSUPP;
0400 
0401         if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
0402             return -EINVAL;
0403 
0404         MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0405              0xff);
0406         MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
0407              IPPROTO_GRE);
0408 
0409         MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
0410              ntohs(ib_spec->gre.mask.protocol));
0411         MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
0412              ntohs(ib_spec->gre.val.protocol));
0413 
0414         memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
0415                     gre_key.nvgre.hi),
0416                &ib_spec->gre.mask.key,
0417                sizeof(ib_spec->gre.mask.key));
0418         memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
0419                     gre_key.nvgre.hi),
0420                &ib_spec->gre.val.key,
0421                sizeof(ib_spec->gre.val.key));
0422         break;
0423     case IB_FLOW_SPEC_MPLS:
0424         switch (prev_type) {
0425         case IB_FLOW_SPEC_UDP:
0426             if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
0427                            ft_field_support.outer_first_mpls_over_udp),
0428                            &ib_spec->mpls.mask.tag))
0429                 return -EOPNOTSUPP;
0430 
0431             memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
0432                         outer_first_mpls_over_udp),
0433                    &ib_spec->mpls.val.tag,
0434                    sizeof(ib_spec->mpls.val.tag));
0435             memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
0436                         outer_first_mpls_over_udp),
0437                    &ib_spec->mpls.mask.tag,
0438                    sizeof(ib_spec->mpls.mask.tag));
0439             break;
0440         case IB_FLOW_SPEC_GRE:
0441             if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
0442                            ft_field_support.outer_first_mpls_over_gre),
0443                            &ib_spec->mpls.mask.tag))
0444                 return -EOPNOTSUPP;
0445 
0446             memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
0447                         outer_first_mpls_over_gre),
0448                    &ib_spec->mpls.val.tag,
0449                    sizeof(ib_spec->mpls.val.tag));
0450             memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
0451                         outer_first_mpls_over_gre),
0452                    &ib_spec->mpls.mask.tag,
0453                    sizeof(ib_spec->mpls.mask.tag));
0454             break;
0455         default:
0456             if (ib_spec->type & IB_FLOW_SPEC_INNER) {
0457                 if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
0458                                ft_field_support.inner_first_mpls),
0459                                &ib_spec->mpls.mask.tag))
0460                     return -EOPNOTSUPP;
0461 
0462                 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
0463                             inner_first_mpls),
0464                        &ib_spec->mpls.val.tag,
0465                        sizeof(ib_spec->mpls.val.tag));
0466                 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
0467                             inner_first_mpls),
0468                        &ib_spec->mpls.mask.tag,
0469                        sizeof(ib_spec->mpls.mask.tag));
0470             } else {
0471                 if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
0472                                ft_field_support.outer_first_mpls),
0473                                &ib_spec->mpls.mask.tag))
0474                     return -EOPNOTSUPP;
0475 
0476                 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
0477                             outer_first_mpls),
0478                        &ib_spec->mpls.val.tag,
0479                        sizeof(ib_spec->mpls.val.tag));
0480                 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
0481                             outer_first_mpls),
0482                        &ib_spec->mpls.mask.tag,
0483                        sizeof(ib_spec->mpls.mask.tag));
0484             }
0485         }
0486         break;
0487     case IB_FLOW_SPEC_VXLAN_TUNNEL:
0488         if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
0489                      LAST_TUNNEL_FIELD))
0490             return -EOPNOTSUPP;
0491 
0492         MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
0493              ntohl(ib_spec->tunnel.mask.tunnel_id));
0494         MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
0495              ntohl(ib_spec->tunnel.val.tunnel_id));
0496         break;
0497     case IB_FLOW_SPEC_ACTION_TAG:
0498         if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
0499                      LAST_FLOW_TAG_FIELD))
0500             return -EOPNOTSUPP;
0501         if (ib_spec->flow_tag.tag_id >= BIT(24))
0502             return -EINVAL;
0503 
0504         flow_context->flow_tag = ib_spec->flow_tag.tag_id;
0505         flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
0506         break;
0507     case IB_FLOW_SPEC_ACTION_DROP:
0508         if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
0509                      LAST_DROP_FIELD))
0510             return -EOPNOTSUPP;
0511         action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
0512         break;
0513     case IB_FLOW_SPEC_ACTION_HANDLE:
0514         ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
0515             flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
0516         if (ret)
0517             return ret;
0518         break;
0519     case IB_FLOW_SPEC_ACTION_COUNT:
0520         if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
0521                      LAST_COUNTERS_FIELD))
0522             return -EOPNOTSUPP;
0523 
0524         /* for now support only one counters spec per flow */
0525         if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
0526             return -EINVAL;
0527 
0528         action->counters = ib_spec->flow_count.counters;
0529         action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
0530         break;
0531     default:
0532         return -EINVAL;
0533     }
0534 
0535     return 0;
0536 }
0537 
0538 /* If a flow could catch both multicast and unicast packets,
0539  * it won't fall into the multicast flow steering table and this rule
0540  * could steal other multicast packets.
0541  */
0542 static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
0543 {
0544     union ib_flow_spec *flow_spec;
0545 
0546     if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
0547         ib_attr->num_of_specs < 1)
0548         return false;
0549 
0550     flow_spec = (union ib_flow_spec *)(ib_attr + 1);
0551     if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
0552         struct ib_flow_spec_ipv4 *ipv4_spec;
0553 
0554         ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
0555         if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
0556             return true;
0557 
0558         return false;
0559     }
0560 
0561     if (flow_spec->type == IB_FLOW_SPEC_ETH) {
0562         struct ib_flow_spec_eth *eth_spec;
0563 
0564         eth_spec = (struct ib_flow_spec_eth *)flow_spec;
0565         return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
0566                is_multicast_ether_addr(eth_spec->val.dst_mac);
0567     }
0568 
0569     return false;
0570 }
0571 
0572 static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
0573                    const struct ib_flow_attr *flow_attr,
0574                    bool check_inner)
0575 {
0576     union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
0577     int match_ipv = check_inner ?
0578             MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
0579                     ft_field_support.inner_ip_version) :
0580             MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
0581                     ft_field_support.outer_ip_version);
0582     int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
0583     bool ipv4_spec_valid, ipv6_spec_valid;
0584     unsigned int ip_spec_type = 0;
0585     bool has_ethertype = false;
0586     unsigned int spec_index;
0587     bool mask_valid = true;
0588     u16 eth_type = 0;
0589     bool type_valid;
0590 
0591     /* Validate that ethertype is correct */
0592     for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
0593         if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
0594             ib_spec->eth.mask.ether_type) {
0595             mask_valid = (ib_spec->eth.mask.ether_type ==
0596                       htons(0xffff));
0597             has_ethertype = true;
0598             eth_type = ntohs(ib_spec->eth.val.ether_type);
0599         } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
0600                (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
0601             ip_spec_type = ib_spec->type;
0602         }
0603         ib_spec = (void *)ib_spec + ib_spec->size;
0604     }
0605 
0606     type_valid = (!has_ethertype) || (!ip_spec_type);
0607     if (!type_valid && mask_valid) {
0608         ipv4_spec_valid = (eth_type == ETH_P_IP) &&
0609             (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
0610         ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
0611             (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
0612 
0613         type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
0614                  (((eth_type == ETH_P_MPLS_UC) ||
0615                    (eth_type == ETH_P_MPLS_MC)) && match_ipv);
0616     }
0617 
0618     return type_valid;
0619 }
0620 
0621 static bool is_valid_attr(struct mlx5_core_dev *mdev,
0622               const struct ib_flow_attr *flow_attr)
0623 {
0624     return is_valid_ethertype(mdev, flow_attr, false) &&
0625            is_valid_ethertype(mdev, flow_attr, true);
0626 }
0627 
0628 static void put_flow_table(struct mlx5_ib_dev *dev,
0629                struct mlx5_ib_flow_prio *prio, bool ft_added)
0630 {
0631     prio->refcount -= !!ft_added;
0632     if (!prio->refcount) {
0633         mlx5_destroy_flow_table(prio->flow_table);
0634         prio->flow_table = NULL;
0635     }
0636 }
0637 
0638 static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
0639 {
0640     struct mlx5_ib_flow_handler *handler = container_of(flow_id,
0641                               struct mlx5_ib_flow_handler,
0642                               ibflow);
0643     struct mlx5_ib_flow_handler *iter, *tmp;
0644     struct mlx5_ib_dev *dev = handler->dev;
0645 
0646     mutex_lock(&dev->flow_db->lock);
0647 
0648     list_for_each_entry_safe(iter, tmp, &handler->list, list) {
0649         mlx5_del_flow_rules(iter->rule);
0650         put_flow_table(dev, iter->prio, true);
0651         list_del(&iter->list);
0652         kfree(iter);
0653     }
0654 
0655     mlx5_del_flow_rules(handler->rule);
0656     put_flow_table(dev, handler->prio, true);
0657     mlx5_ib_counters_clear_description(handler->ibcounters);
0658     mutex_unlock(&dev->flow_db->lock);
0659     if (handler->flow_matcher)
0660         atomic_dec(&handler->flow_matcher->usecnt);
0661     kfree(handler);
0662 
0663     return 0;
0664 }
0665 
0666 static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
0667 {
0668     priority *= 2;
0669     if (!dont_trap)
0670         priority++;
0671     return priority;
0672 }
0673 
0674 enum flow_table_type {
0675     MLX5_IB_FT_RX,
0676     MLX5_IB_FT_TX
0677 };
0678 
0679 #define MLX5_FS_MAX_TYPES    6
0680 #define MLX5_FS_MAX_ENTRIES  BIT(16)
0681 
0682 static bool mlx5_ib_shared_ft_allowed(struct ib_device *device)
0683 {
0684     struct mlx5_ib_dev *dev = to_mdev(device);
0685 
0686     return MLX5_CAP_GEN(dev->mdev, shared_object_to_user_object_allowed);
0687 }
0688 
0689 static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
0690                        struct mlx5_flow_namespace *ns,
0691                        struct mlx5_ib_flow_prio *prio,
0692                        int priority,
0693                        int num_entries, int num_groups,
0694                        u32 flags)
0695 {
0696     struct mlx5_flow_table_attr ft_attr = {};
0697     struct mlx5_flow_table *ft;
0698 
0699     if (mlx5_ib_shared_ft_allowed(&dev->ib_dev))
0700         ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
0701     ft_attr.prio = priority;
0702     ft_attr.max_fte = num_entries;
0703     ft_attr.flags = flags;
0704     ft_attr.autogroup.max_num_groups = num_groups;
0705     ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
0706     if (IS_ERR(ft))
0707         return ERR_CAST(ft);
0708 
0709     prio->flow_table = ft;
0710     prio->refcount = 0;
0711     return prio;
0712 }
0713 
0714 static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
0715                         struct ib_flow_attr *flow_attr,
0716                         enum flow_table_type ft_type)
0717 {
0718     bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
0719     struct mlx5_flow_namespace *ns = NULL;
0720     enum mlx5_flow_namespace_type fn_type;
0721     struct mlx5_ib_flow_prio *prio;
0722     struct mlx5_flow_table *ft;
0723     int max_table_size;
0724     int num_entries;
0725     int num_groups;
0726     bool esw_encap;
0727     u32 flags = 0;
0728     int priority;
0729 
0730     max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
0731                                log_max_ft_size));
0732     esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
0733         DEVLINK_ESWITCH_ENCAP_MODE_NONE;
0734     switch (flow_attr->type) {
0735     case IB_FLOW_ATTR_NORMAL:
0736         if (flow_is_multicast_only(flow_attr) && !dont_trap)
0737             priority = MLX5_IB_FLOW_MCAST_PRIO;
0738         else
0739             priority = ib_prio_to_core_prio(flow_attr->priority,
0740                             dont_trap);
0741         if (ft_type == MLX5_IB_FT_RX) {
0742             fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
0743             prio = &dev->flow_db->prios[priority];
0744             if (!dev->is_rep && !esw_encap &&
0745                 MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
0746                 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
0747             if (!dev->is_rep && !esw_encap &&
0748                 MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
0749                               reformat_l3_tunnel_to_l2))
0750                 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
0751         } else {
0752             max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(
0753                 dev->mdev, log_max_ft_size));
0754             fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
0755             prio = &dev->flow_db->egress_prios[priority];
0756             if (!dev->is_rep && !esw_encap &&
0757                 MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
0758                 flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
0759         }
0760         ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
0761         num_entries = MLX5_FS_MAX_ENTRIES;
0762         num_groups = MLX5_FS_MAX_TYPES;
0763         break;
0764     case IB_FLOW_ATTR_ALL_DEFAULT:
0765     case IB_FLOW_ATTR_MC_DEFAULT:
0766         ns = mlx5_get_flow_namespace(dev->mdev,
0767                          MLX5_FLOW_NAMESPACE_LEFTOVERS);
0768         build_leftovers_ft_param(&priority, &num_entries, &num_groups);
0769         prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
0770         break;
0771     case IB_FLOW_ATTR_SNIFFER:
0772         if (!MLX5_CAP_FLOWTABLE(dev->mdev,
0773                     allow_sniffer_and_nic_rx_shared_tir))
0774             return ERR_PTR(-EOPNOTSUPP);
0775 
0776         ns = mlx5_get_flow_namespace(
0777             dev->mdev, ft_type == MLX5_IB_FT_RX ?
0778                        MLX5_FLOW_NAMESPACE_SNIFFER_RX :
0779                        MLX5_FLOW_NAMESPACE_SNIFFER_TX);
0780 
0781         prio = &dev->flow_db->sniffer[ft_type];
0782         priority = 0;
0783         num_entries = 1;
0784         num_groups = 1;
0785         break;
0786     default:
0787         break;
0788     }
0789 
0790     if (!ns)
0791         return ERR_PTR(-EOPNOTSUPP);
0792 
0793     max_table_size = min_t(int, num_entries, max_table_size);
0794 
0795     ft = prio->flow_table;
0796     if (!ft)
0797         return _get_prio(dev, ns, prio, priority, max_table_size,
0798                  num_groups, flags);
0799 
0800     return prio;
0801 }
0802 
0803 enum {
0804     RDMA_RX_ECN_OPCOUNTER_PRIO,
0805     RDMA_RX_CNP_OPCOUNTER_PRIO,
0806 };
0807 
0808 enum {
0809     RDMA_TX_CNP_OPCOUNTER_PRIO,
0810 };
0811 
0812 static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
0813                   struct mlx5_flow_spec *spec)
0814 {
0815     if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
0816                     ft_field_support.source_vhca_port) ||
0817         !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
0818                     ft_field_support.source_vhca_port))
0819         return -EOPNOTSUPP;
0820 
0821     MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
0822              misc_parameters.source_vhca_port);
0823     MLX5_SET(fte_match_param, &spec->match_value,
0824          misc_parameters.source_vhca_port, port_num);
0825 
0826     return 0;
0827 }
0828 
0829 static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
0830                struct mlx5_flow_spec *spec, int ipv)
0831 {
0832     if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
0833                     ft_field_support.outer_ip_version))
0834         return -EOPNOTSUPP;
0835 
0836     if (mlx5_core_mp_enabled(dev->mdev) &&
0837         set_vhca_port_spec(dev, port_num, spec))
0838         return -EOPNOTSUPP;
0839 
0840     MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
0841              outer_headers.ip_ecn);
0842     MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
0843          INET_ECN_CE);
0844     MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
0845              outer_headers.ip_version);
0846     MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
0847          ipv);
0848 
0849     spec->match_criteria_enable =
0850         get_match_criteria_enable(spec->match_criteria);
0851 
0852     return 0;
0853 }
0854 
0855 static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
0856             struct mlx5_flow_spec *spec)
0857 {
0858     if (mlx5_core_mp_enabled(dev->mdev) &&
0859         set_vhca_port_spec(dev, port_num, spec))
0860         return -EOPNOTSUPP;
0861 
0862     MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
0863              misc_parameters.bth_opcode);
0864     MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
0865          IB_BTH_OPCODE_CNP);
0866 
0867     spec->match_criteria_enable =
0868         get_match_criteria_enable(spec->match_criteria);
0869 
0870     return 0;
0871 }
0872 
0873 int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
0874              struct mlx5_ib_op_fc *opfc,
0875              enum mlx5_ib_optional_counter_type type)
0876 {
0877     enum mlx5_flow_namespace_type fn_type;
0878     int priority, i, err, spec_num;
0879     struct mlx5_flow_act flow_act = {};
0880     struct mlx5_flow_destination dst;
0881     struct mlx5_flow_namespace *ns;
0882     struct mlx5_ib_flow_prio *prio;
0883     struct mlx5_flow_spec *spec;
0884 
0885     spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
0886     if (!spec)
0887         return -ENOMEM;
0888 
0889     switch (type) {
0890     case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
0891         if (set_ecn_ce_spec(dev, port_num, &spec[0],
0892                     MLX5_FS_IPV4_VERSION) ||
0893             set_ecn_ce_spec(dev, port_num, &spec[1],
0894                     MLX5_FS_IPV6_VERSION)) {
0895             err = -EOPNOTSUPP;
0896             goto free;
0897         }
0898         spec_num = 2;
0899         fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
0900         priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
0901         break;
0902 
0903     case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
0904         if (!MLX5_CAP_FLOWTABLE(dev->mdev,
0905                     ft_field_support_2_nic_receive_rdma.bth_opcode) ||
0906             set_cnp_spec(dev, port_num, &spec[0])) {
0907             err = -EOPNOTSUPP;
0908             goto free;
0909         }
0910         spec_num = 1;
0911         fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
0912         priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
0913         break;
0914 
0915     case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
0916         if (!MLX5_CAP_FLOWTABLE(dev->mdev,
0917                     ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
0918             set_cnp_spec(dev, port_num, &spec[0])) {
0919             err = -EOPNOTSUPP;
0920             goto free;
0921         }
0922         spec_num = 1;
0923         fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
0924         priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
0925         break;
0926 
0927     default:
0928         err = -EOPNOTSUPP;
0929         goto free;
0930     }
0931 
0932     ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
0933     if (!ns) {
0934         err = -EOPNOTSUPP;
0935         goto free;
0936     }
0937 
0938     prio = &dev->flow_db->opfcs[type];
0939     if (!prio->flow_table) {
0940         prio = _get_prio(dev, ns, prio, priority,
0941                  dev->num_ports * MAX_OPFC_RULES, 1, 0);
0942         if (IS_ERR(prio)) {
0943             err = PTR_ERR(prio);
0944             goto free;
0945         }
0946     }
0947 
0948     dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
0949     dst.counter_id = mlx5_fc_id(opfc->fc);
0950 
0951     flow_act.action =
0952         MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
0953 
0954     for (i = 0; i < spec_num; i++) {
0955         opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
0956                             &flow_act, &dst, 1);
0957         if (IS_ERR(opfc->rule[i])) {
0958             err = PTR_ERR(opfc->rule[i]);
0959             goto del_rules;
0960         }
0961     }
0962     prio->refcount += spec_num;
0963     kfree(spec);
0964 
0965     return 0;
0966 
0967 del_rules:
0968     for (i -= 1; i >= 0; i--)
0969         mlx5_del_flow_rules(opfc->rule[i]);
0970     put_flow_table(dev, prio, false);
0971 free:
0972     kfree(spec);
0973     return err;
0974 }
0975 
0976 void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
0977                  struct mlx5_ib_op_fc *opfc,
0978                  enum mlx5_ib_optional_counter_type type)
0979 {
0980     int i;
0981 
0982     for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
0983         mlx5_del_flow_rules(opfc->rule[i]);
0984         put_flow_table(dev, &dev->flow_db->opfcs[type], true);
0985     }
0986 }
0987 
0988 static void set_underlay_qp(struct mlx5_ib_dev *dev,
0989                 struct mlx5_flow_spec *spec,
0990                 u32 underlay_qpn)
0991 {
0992     void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
0993                        spec->match_criteria,
0994                        misc_parameters);
0995     void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
0996                        misc_parameters);
0997 
0998     if (underlay_qpn &&
0999         MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
1000                       ft_field_support.bth_dst_qp)) {
1001         MLX5_SET(fte_match_set_misc,
1002              misc_params_v, bth_dst_qp, underlay_qpn);
1003         MLX5_SET(fte_match_set_misc,
1004              misc_params_c, bth_dst_qp, 0xffffff);
1005     }
1006 }
1007 
1008 static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
1009                      struct mlx5_flow_spec *spec,
1010                      struct mlx5_eswitch_rep *rep)
1011 {
1012     struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
1013     void *misc;
1014 
1015     if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
1016         misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1017                     misc_parameters_2);
1018 
1019         MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1020              mlx5_eswitch_get_vport_metadata_for_match(rep->esw,
1021                                    rep->vport));
1022         misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1023                     misc_parameters_2);
1024 
1025         MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
1026              mlx5_eswitch_get_vport_metadata_mask());
1027     } else {
1028         misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1029                     misc_parameters);
1030 
1031         MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
1032 
1033         misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1034                     misc_parameters);
1035 
1036         MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
1037     }
1038 }
1039 
1040 static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
1041                               struct mlx5_ib_flow_prio *ft_prio,
1042                               const struct ib_flow_attr *flow_attr,
1043                               struct mlx5_flow_destination *dst,
1044                               u32 underlay_qpn,
1045                               struct mlx5_ib_create_flow *ucmd)
1046 {
1047     struct mlx5_flow_table  *ft = ft_prio->flow_table;
1048     struct mlx5_ib_flow_handler *handler;
1049     struct mlx5_flow_act flow_act = {};
1050     struct mlx5_flow_spec *spec;
1051     struct mlx5_flow_destination dest_arr[2] = {};
1052     struct mlx5_flow_destination *rule_dst = dest_arr;
1053     const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
1054     unsigned int spec_index;
1055     u32 prev_type = 0;
1056     int err = 0;
1057     int dest_num = 0;
1058     bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
1059 
1060     if (!is_valid_attr(dev->mdev, flow_attr))
1061         return ERR_PTR(-EINVAL);
1062 
1063     if (dev->is_rep && is_egress)
1064         return ERR_PTR(-EINVAL);
1065 
1066     spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1067     handler = kzalloc(sizeof(*handler), GFP_KERNEL);
1068     if (!handler || !spec) {
1069         err = -ENOMEM;
1070         goto free;
1071     }
1072 
1073     INIT_LIST_HEAD(&handler->list);
1074 
1075     for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
1076         err = parse_flow_attr(dev->mdev, spec,
1077                       ib_flow, flow_attr, &flow_act,
1078                       prev_type);
1079         if (err < 0)
1080             goto free;
1081 
1082         prev_type = ((union ib_flow_spec *)ib_flow)->type;
1083         ib_flow += ((union ib_flow_spec *)ib_flow)->size;
1084     }
1085 
1086     if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
1087         memcpy(&dest_arr[0], dst, sizeof(*dst));
1088         dest_num++;
1089     }
1090 
1091     if (!flow_is_multicast_only(flow_attr))
1092         set_underlay_qp(dev, spec, underlay_qpn);
1093 
1094     if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) {
1095         struct mlx5_eswitch_rep *rep;
1096 
1097         rep = dev->port[flow_attr->port - 1].rep;
1098         if (!rep) {
1099             err = -EINVAL;
1100             goto free;
1101         }
1102 
1103         mlx5_ib_set_rule_source_port(dev, spec, rep);
1104     }
1105 
1106     spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
1107 
1108     if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1109         struct mlx5_ib_mcounters *mcounters;
1110 
1111         err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd);
1112         if (err)
1113             goto free;
1114 
1115         mcounters = to_mcounters(flow_act.counters);
1116         handler->ibcounters = flow_act.counters;
1117         dest_arr[dest_num].type =
1118             MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1119         dest_arr[dest_num].counter_id =
1120             mlx5_fc_id(mcounters->hw_cntrs_hndl);
1121         dest_num++;
1122     }
1123 
1124     if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
1125         if (!dest_num)
1126             rule_dst = NULL;
1127     } else {
1128         if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
1129             flow_act.action |=
1130                 MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
1131         if (is_egress)
1132             flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1133         else if (dest_num)
1134             flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1135     }
1136 
1137     if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG)  &&
1138         (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1139          flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
1140         mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
1141                  spec->flow_context.flow_tag, flow_attr->type);
1142         err = -EINVAL;
1143         goto free;
1144     }
1145     handler->rule = mlx5_add_flow_rules(ft, spec,
1146                         &flow_act,
1147                         rule_dst, dest_num);
1148 
1149     if (IS_ERR(handler->rule)) {
1150         err = PTR_ERR(handler->rule);
1151         goto free;
1152     }
1153 
1154     ft_prio->refcount++;
1155     handler->prio = ft_prio;
1156     handler->dev = dev;
1157 
1158     ft_prio->flow_table = ft;
1159 free:
1160     if (err && handler) {
1161         mlx5_ib_counters_clear_description(handler->ibcounters);
1162         kfree(handler);
1163     }
1164     kvfree(spec);
1165     return err ? ERR_PTR(err) : handler;
1166 }
1167 
1168 static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
1169                              struct mlx5_ib_flow_prio *ft_prio,
1170                              const struct ib_flow_attr *flow_attr,
1171                              struct mlx5_flow_destination *dst)
1172 {
1173     return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
1174 }
1175 
1176 enum {
1177     LEFTOVERS_MC,
1178     LEFTOVERS_UC,
1179 };
1180 
1181 static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
1182                               struct mlx5_ib_flow_prio *ft_prio,
1183                               struct ib_flow_attr *flow_attr,
1184                               struct mlx5_flow_destination *dst)
1185 {
1186     struct mlx5_ib_flow_handler *handler_ucast = NULL;
1187     struct mlx5_ib_flow_handler *handler = NULL;
1188 
1189     static struct {
1190         struct ib_flow_attr flow_attr;
1191         struct ib_flow_spec_eth eth_flow;
1192     } leftovers_specs[] = {
1193         [LEFTOVERS_MC] = {
1194             .flow_attr = {
1195                 .num_of_specs = 1,
1196                 .size = sizeof(leftovers_specs[0])
1197             },
1198             .eth_flow = {
1199                 .type = IB_FLOW_SPEC_ETH,
1200                 .size = sizeof(struct ib_flow_spec_eth),
1201                 .mask = {.dst_mac = {0x1} },
1202                 .val =  {.dst_mac = {0x1} }
1203             }
1204         },
1205         [LEFTOVERS_UC] = {
1206             .flow_attr = {
1207                 .num_of_specs = 1,
1208                 .size = sizeof(leftovers_specs[0])
1209             },
1210             .eth_flow = {
1211                 .type = IB_FLOW_SPEC_ETH,
1212                 .size = sizeof(struct ib_flow_spec_eth),
1213                 .mask = {.dst_mac = {0x1} },
1214                 .val = {.dst_mac = {} }
1215             }
1216         }
1217     };
1218 
1219     handler = create_flow_rule(dev, ft_prio,
1220                    &leftovers_specs[LEFTOVERS_MC].flow_attr,
1221                    dst);
1222     if (!IS_ERR(handler) &&
1223         flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
1224         handler_ucast = create_flow_rule(dev, ft_prio,
1225                          &leftovers_specs[LEFTOVERS_UC].flow_attr,
1226                          dst);
1227         if (IS_ERR(handler_ucast)) {
1228             mlx5_del_flow_rules(handler->rule);
1229             ft_prio->refcount--;
1230             kfree(handler);
1231             handler = handler_ucast;
1232         } else {
1233             list_add(&handler_ucast->list, &handler->list);
1234         }
1235     }
1236 
1237     return handler;
1238 }
1239 
1240 static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
1241                             struct mlx5_ib_flow_prio *ft_rx,
1242                             struct mlx5_ib_flow_prio *ft_tx,
1243                             struct mlx5_flow_destination *dst)
1244 {
1245     struct mlx5_ib_flow_handler *handler_rx;
1246     struct mlx5_ib_flow_handler *handler_tx;
1247     int err;
1248     static const struct ib_flow_attr flow_attr  = {
1249         .num_of_specs = 0,
1250         .type = IB_FLOW_ATTR_SNIFFER,
1251         .size = sizeof(flow_attr)
1252     };
1253 
1254     handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
1255     if (IS_ERR(handler_rx)) {
1256         err = PTR_ERR(handler_rx);
1257         goto err;
1258     }
1259 
1260     handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
1261     if (IS_ERR(handler_tx)) {
1262         err = PTR_ERR(handler_tx);
1263         goto err_tx;
1264     }
1265 
1266     list_add(&handler_tx->list, &handler_rx->list);
1267 
1268     return handler_rx;
1269 
1270 err_tx:
1271     mlx5_del_flow_rules(handler_rx->rule);
1272     ft_rx->refcount--;
1273     kfree(handler_rx);
1274 err:
1275     return ERR_PTR(err);
1276 }
1277 
1278 static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
1279                        struct ib_flow_attr *flow_attr,
1280                        struct ib_udata *udata)
1281 {
1282     struct mlx5_ib_dev *dev = to_mdev(qp->device);
1283     struct mlx5_ib_qp *mqp = to_mqp(qp);
1284     struct mlx5_ib_flow_handler *handler = NULL;
1285     struct mlx5_flow_destination *dst = NULL;
1286     struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
1287     struct mlx5_ib_flow_prio *ft_prio;
1288     bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
1289     struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
1290     size_t min_ucmd_sz, required_ucmd_sz;
1291     int err;
1292     int underlay_qpn;
1293 
1294     if (udata && udata->inlen) {
1295         min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved);
1296         if (udata->inlen < min_ucmd_sz)
1297             return ERR_PTR(-EOPNOTSUPP);
1298 
1299         err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
1300         if (err)
1301             return ERR_PTR(err);
1302 
1303         /* currently supports only one counters data */
1304         if (ucmd_hdr.ncounters_data > 1)
1305             return ERR_PTR(-EINVAL);
1306 
1307         required_ucmd_sz = min_ucmd_sz +
1308             sizeof(struct mlx5_ib_flow_counters_data) *
1309             ucmd_hdr.ncounters_data;
1310         if (udata->inlen > required_ucmd_sz &&
1311             !ib_is_udata_cleared(udata, required_ucmd_sz,
1312                      udata->inlen - required_ucmd_sz))
1313             return ERR_PTR(-EOPNOTSUPP);
1314 
1315         ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
1316         if (!ucmd)
1317             return ERR_PTR(-ENOMEM);
1318 
1319         err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
1320         if (err)
1321             goto free_ucmd;
1322     }
1323 
1324     if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
1325         err = -ENOMEM;
1326         goto free_ucmd;
1327     }
1328 
1329     if (flow_attr->flags &
1330         ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS)) {
1331         err = -EINVAL;
1332         goto free_ucmd;
1333     }
1334 
1335     if (is_egress &&
1336         (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
1337          flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
1338         err = -EINVAL;
1339         goto free_ucmd;
1340     }
1341 
1342     dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1343     if (!dst) {
1344         err = -ENOMEM;
1345         goto free_ucmd;
1346     }
1347 
1348     mutex_lock(&dev->flow_db->lock);
1349 
1350     ft_prio = get_flow_table(dev, flow_attr,
1351                  is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
1352     if (IS_ERR(ft_prio)) {
1353         err = PTR_ERR(ft_prio);
1354         goto unlock;
1355     }
1356     if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
1357         ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
1358         if (IS_ERR(ft_prio_tx)) {
1359             err = PTR_ERR(ft_prio_tx);
1360             ft_prio_tx = NULL;
1361             goto destroy_ft;
1362         }
1363     }
1364 
1365     if (is_egress) {
1366         dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1367     } else {
1368         dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1369         if (mqp->is_rss)
1370             dst->tir_num = mqp->rss_qp.tirn;
1371         else
1372             dst->tir_num = mqp->raw_packet_qp.rq.tirn;
1373     }
1374 
1375     switch (flow_attr->type) {
1376     case IB_FLOW_ATTR_NORMAL:
1377         underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
1378                        mqp->underlay_qpn :
1379                        0;
1380         handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
1381                         underlay_qpn, ucmd);
1382         break;
1383     case IB_FLOW_ATTR_ALL_DEFAULT:
1384     case IB_FLOW_ATTR_MC_DEFAULT:
1385         handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst);
1386         break;
1387     case IB_FLOW_ATTR_SNIFFER:
1388         handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
1389         break;
1390     default:
1391         err = -EINVAL;
1392         goto destroy_ft;
1393     }
1394 
1395     if (IS_ERR(handler)) {
1396         err = PTR_ERR(handler);
1397         handler = NULL;
1398         goto destroy_ft;
1399     }
1400 
1401     mutex_unlock(&dev->flow_db->lock);
1402     kfree(dst);
1403     kfree(ucmd);
1404 
1405     return &handler->ibflow;
1406 
1407 destroy_ft:
1408     put_flow_table(dev, ft_prio, false);
1409     if (ft_prio_tx)
1410         put_flow_table(dev, ft_prio_tx, false);
1411 unlock:
1412     mutex_unlock(&dev->flow_db->lock);
1413     kfree(dst);
1414 free_ucmd:
1415     kfree(ucmd);
1416     return ERR_PTR(err);
1417 }
1418 
1419 static struct mlx5_ib_flow_prio *
1420 _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
1421         enum mlx5_flow_namespace_type ns_type,
1422         bool mcast)
1423 {
1424     struct mlx5_flow_namespace *ns = NULL;
1425     struct mlx5_ib_flow_prio *prio = NULL;
1426     int max_table_size = 0;
1427     bool esw_encap;
1428     u32 flags = 0;
1429     int priority;
1430 
1431     if (mcast)
1432         priority = MLX5_IB_FLOW_MCAST_PRIO;
1433     else
1434         priority = ib_prio_to_core_prio(user_priority, false);
1435 
1436     esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
1437         DEVLINK_ESWITCH_ENCAP_MODE_NONE;
1438     switch (ns_type) {
1439     case MLX5_FLOW_NAMESPACE_BYPASS:
1440         max_table_size = BIT(
1441             MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size));
1442         if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
1443             flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
1444         if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
1445                           reformat_l3_tunnel_to_l2) &&
1446             !esw_encap)
1447             flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1448         break;
1449     case MLX5_FLOW_NAMESPACE_EGRESS:
1450         max_table_size = BIT(
1451             MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
1452         if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) &&
1453             !esw_encap)
1454             flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1455         break;
1456     case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
1457         max_table_size = BIT(
1458             MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
1459         if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
1460             flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
1461         if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev,
1462                            reformat_l3_tunnel_to_l2) &&
1463             esw_encap)
1464             flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
1465         priority = user_priority;
1466         break;
1467     case MLX5_FLOW_NAMESPACE_RDMA_RX:
1468         max_table_size = BIT(
1469             MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size));
1470         priority = user_priority;
1471         break;
1472     case MLX5_FLOW_NAMESPACE_RDMA_TX:
1473         max_table_size = BIT(
1474             MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
1475         priority = user_priority;
1476         break;
1477     default:
1478         break;
1479     }
1480 
1481     max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
1482 
1483     ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
1484     if (!ns)
1485         return ERR_PTR(-EOPNOTSUPP);
1486 
1487     switch (ns_type) {
1488     case MLX5_FLOW_NAMESPACE_BYPASS:
1489         prio = &dev->flow_db->prios[priority];
1490         break;
1491     case MLX5_FLOW_NAMESPACE_EGRESS:
1492         prio = &dev->flow_db->egress_prios[priority];
1493         break;
1494     case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
1495         prio = &dev->flow_db->fdb[priority];
1496         break;
1497     case MLX5_FLOW_NAMESPACE_RDMA_RX:
1498         prio = &dev->flow_db->rdma_rx[priority];
1499         break;
1500     case MLX5_FLOW_NAMESPACE_RDMA_TX:
1501         prio = &dev->flow_db->rdma_tx[priority];
1502         break;
1503     default: return ERR_PTR(-EINVAL);
1504     }
1505 
1506     if (!prio)
1507         return ERR_PTR(-EINVAL);
1508 
1509     if (prio->flow_table)
1510         return prio;
1511 
1512     return _get_prio(dev, ns, prio, priority, max_table_size,
1513              MLX5_FS_MAX_TYPES, flags);
1514 }
1515 
1516 static struct mlx5_ib_flow_handler *
1517 _create_raw_flow_rule(struct mlx5_ib_dev *dev,
1518               struct mlx5_ib_flow_prio *ft_prio,
1519               struct mlx5_flow_destination *dst,
1520               struct mlx5_ib_flow_matcher  *fs_matcher,
1521               struct mlx5_flow_context *flow_context,
1522               struct mlx5_flow_act *flow_act,
1523               void *cmd_in, int inlen,
1524               int dst_num)
1525 {
1526     struct mlx5_ib_flow_handler *handler;
1527     struct mlx5_flow_spec *spec;
1528     struct mlx5_flow_table *ft = ft_prio->flow_table;
1529     int err = 0;
1530 
1531     spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1532     handler = kzalloc(sizeof(*handler), GFP_KERNEL);
1533     if (!handler || !spec) {
1534         err = -ENOMEM;
1535         goto free;
1536     }
1537 
1538     INIT_LIST_HEAD(&handler->list);
1539 
1540     memcpy(spec->match_value, cmd_in, inlen);
1541     memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
1542            fs_matcher->mask_len);
1543     spec->match_criteria_enable = fs_matcher->match_criteria_enable;
1544     spec->flow_context = *flow_context;
1545 
1546     handler->rule = mlx5_add_flow_rules(ft, spec,
1547                         flow_act, dst, dst_num);
1548 
1549     if (IS_ERR(handler->rule)) {
1550         err = PTR_ERR(handler->rule);
1551         goto free;
1552     }
1553 
1554     ft_prio->refcount++;
1555     handler->prio = ft_prio;
1556     handler->dev = dev;
1557     ft_prio->flow_table = ft;
1558 
1559 free:
1560     if (err)
1561         kfree(handler);
1562     kvfree(spec);
1563     return err ? ERR_PTR(err) : handler;
1564 }
1565 
1566 static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
1567                 void *match_v)
1568 {
1569     void *match_c;
1570     void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
1571     void *dmac, *dmac_mask;
1572     void *ipv4, *ipv4_mask;
1573 
1574     if (!(fs_matcher->match_criteria_enable &
1575           (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
1576         return false;
1577 
1578     match_c = fs_matcher->matcher_mask.match_params;
1579     match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
1580                        outer_headers);
1581     match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
1582                        outer_headers);
1583 
1584     dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
1585                 dmac_47_16);
1586     dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
1587                  dmac_47_16);
1588 
1589     if (is_multicast_ether_addr(dmac) &&
1590         is_multicast_ether_addr(dmac_mask))
1591         return true;
1592 
1593     ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
1594                 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
1595 
1596     ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
1597                  dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
1598 
1599     if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
1600         ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
1601         return true;
1602 
1603     return false;
1604 }
1605 
1606 static struct mlx5_ib_flow_handler *raw_fs_rule_add(
1607     struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
1608     struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act,
1609     u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type)
1610 {
1611     struct mlx5_flow_destination *dst;
1612     struct mlx5_ib_flow_prio *ft_prio;
1613     struct mlx5_ib_flow_handler *handler;
1614     int dst_num = 0;
1615     bool mcast;
1616     int err;
1617 
1618     if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
1619         return ERR_PTR(-EOPNOTSUPP);
1620 
1621     if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
1622         return ERR_PTR(-ENOMEM);
1623 
1624     dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
1625     if (!dst)
1626         return ERR_PTR(-ENOMEM);
1627 
1628     mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
1629     mutex_lock(&dev->flow_db->lock);
1630 
1631     ft_prio = _get_flow_table(dev, fs_matcher->priority,
1632                   fs_matcher->ns_type, mcast);
1633     if (IS_ERR(ft_prio)) {
1634         err = PTR_ERR(ft_prio);
1635         goto unlock;
1636     }
1637 
1638     switch (dest_type) {
1639     case MLX5_FLOW_DESTINATION_TYPE_TIR:
1640         dst[dst_num].type = dest_type;
1641         dst[dst_num++].tir_num = dest_id;
1642         flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1643         break;
1644     case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
1645         dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
1646         dst[dst_num++].ft_num = dest_id;
1647         flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1648         break;
1649     case MLX5_FLOW_DESTINATION_TYPE_PORT:
1650         dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1651         flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
1652         break;
1653     default:
1654         break;
1655     }
1656 
1657     if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1658         dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1659         dst[dst_num].counter_id = counter_id;
1660         dst_num++;
1661     }
1662 
1663     handler = _create_raw_flow_rule(dev, ft_prio, dst_num ? dst : NULL,
1664                     fs_matcher, flow_context, flow_act,
1665                     cmd_in, inlen, dst_num);
1666 
1667     if (IS_ERR(handler)) {
1668         err = PTR_ERR(handler);
1669         goto destroy_ft;
1670     }
1671 
1672     mutex_unlock(&dev->flow_db->lock);
1673     atomic_inc(&fs_matcher->usecnt);
1674     handler->flow_matcher = fs_matcher;
1675 
1676     kfree(dst);
1677 
1678     return handler;
1679 
1680 destroy_ft:
1681     put_flow_table(dev, ft_prio, false);
1682 unlock:
1683     mutex_unlock(&dev->flow_db->lock);
1684     kfree(dst);
1685 
1686     return ERR_PTR(err);
1687 }
1688 
1689 static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
1690 {
1691     switch (maction->flow_action_raw.sub_type) {
1692     case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
1693         mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
1694                        maction->flow_action_raw.modify_hdr);
1695         break;
1696     case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
1697         mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
1698                          maction->flow_action_raw.pkt_reformat);
1699         break;
1700     case MLX5_IB_FLOW_ACTION_DECAP:
1701         break;
1702     default:
1703         break;
1704     }
1705 }
1706 
1707 static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
1708 {
1709     struct mlx5_ib_flow_action *maction = to_mflow_act(action);
1710 
1711     switch (action->type) {
1712     case IB_FLOW_ACTION_UNSPECIFIED:
1713         destroy_flow_action_raw(maction);
1714         break;
1715     default:
1716         WARN_ON(true);
1717         break;
1718     }
1719 
1720     kfree(maction);
1721     return 0;
1722 }
1723 
1724 static int
1725 mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
1726                  enum mlx5_flow_namespace_type *namespace)
1727 {
1728     switch (table_type) {
1729     case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
1730         *namespace = MLX5_FLOW_NAMESPACE_BYPASS;
1731         break;
1732     case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
1733         *namespace = MLX5_FLOW_NAMESPACE_EGRESS;
1734         break;
1735     case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
1736         *namespace = MLX5_FLOW_NAMESPACE_FDB_BYPASS;
1737         break;
1738     case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
1739         *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
1740         break;
1741     case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
1742         *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
1743         break;
1744     default:
1745         return -EINVAL;
1746     }
1747 
1748     return 0;
1749 }
1750 
1751 static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
1752     [MLX5_IB_FLOW_TYPE_NORMAL] = {
1753         .type = UVERBS_ATTR_TYPE_PTR_IN,
1754         .u.ptr = {
1755             .len = sizeof(u16), /* data is priority */
1756             .min_len = sizeof(u16),
1757         }
1758     },
1759     [MLX5_IB_FLOW_TYPE_SNIFFER] = {
1760         .type = UVERBS_ATTR_TYPE_PTR_IN,
1761         UVERBS_ATTR_NO_DATA(),
1762     },
1763     [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
1764         .type = UVERBS_ATTR_TYPE_PTR_IN,
1765         UVERBS_ATTR_NO_DATA(),
1766     },
1767     [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
1768         .type = UVERBS_ATTR_TYPE_PTR_IN,
1769         UVERBS_ATTR_NO_DATA(),
1770     },
1771 };
1772 
1773 static bool is_flow_dest(void *obj, int *dest_id, int *dest_type)
1774 {
1775     struct devx_obj *devx_obj = obj;
1776     u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
1777 
1778     switch (opcode) {
1779     case MLX5_CMD_OP_DESTROY_TIR:
1780         *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1781         *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
1782                     obj_id);
1783         return true;
1784 
1785     case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
1786         *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1787         *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
1788                     table_id);
1789         return true;
1790     default:
1791         return false;
1792     }
1793 }
1794 
1795 static int get_dests(struct uverbs_attr_bundle *attrs,
1796              struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
1797              int *dest_type, struct ib_qp **qp, u32 *flags)
1798 {
1799     bool dest_devx, dest_qp;
1800     void *devx_obj;
1801     int err;
1802 
1803     dest_devx = uverbs_attr_is_valid(attrs,
1804                      MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
1805     dest_qp = uverbs_attr_is_valid(attrs,
1806                        MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
1807 
1808     *flags = 0;
1809     err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
1810                  MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
1811                      MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
1812     if (err)
1813         return err;
1814 
1815     /* Both flags are not allowed */
1816     if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
1817         *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
1818         return -EINVAL;
1819 
1820     if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
1821         if (dest_devx && (dest_qp || *flags))
1822             return -EINVAL;
1823         else if (dest_qp && *flags)
1824             return -EINVAL;
1825     }
1826 
1827     /* Allow only DEVX object, drop as dest for FDB */
1828     if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
1829         !(dest_devx || (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
1830         return -EINVAL;
1831 
1832     /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
1833     if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
1834         ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
1835         return -EINVAL;
1836 
1837     *qp = NULL;
1838     if (dest_devx) {
1839         devx_obj =
1840             uverbs_attr_get_obj(attrs,
1841                         MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
1842 
1843         /* Verify that the given DEVX object is a flow
1844          * steering destination.
1845          */
1846         if (!is_flow_dest(devx_obj, dest_id, dest_type))
1847             return -EINVAL;
1848         /* Allow only flow table as dest when inserting to FDB or RDMA_RX */
1849         if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
1850              fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
1851             *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
1852             return -EINVAL;
1853     } else if (dest_qp) {
1854         struct mlx5_ib_qp *mqp;
1855 
1856         *qp = uverbs_attr_get_obj(attrs,
1857                       MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
1858         if (IS_ERR(*qp))
1859             return PTR_ERR(*qp);
1860 
1861         if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
1862             return -EINVAL;
1863 
1864         mqp = to_mqp(*qp);
1865         if (mqp->is_rss)
1866             *dest_id = mqp->rss_qp.tirn;
1867         else
1868             *dest_id = mqp->raw_packet_qp.rq.tirn;
1869         *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1870     } else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
1871             fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
1872            !(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
1873         *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
1874     }
1875 
1876     if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
1877         (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
1878          fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
1879         return -EINVAL;
1880 
1881     return 0;
1882 }
1883 
1884 static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id)
1885 {
1886     struct devx_obj *devx_obj = obj;
1887     u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
1888 
1889     if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
1890 
1891         if (offset && offset >= devx_obj->flow_counter_bulk_size)
1892             return false;
1893 
1894         *counter_id = MLX5_GET(dealloc_flow_counter_in,
1895                        devx_obj->dinbox,
1896                        flow_counter_id);
1897         *counter_id += offset;
1898         return true;
1899     }
1900 
1901     return false;
1902 }
1903 
1904 #define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
1905 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
1906     struct uverbs_attr_bundle *attrs)
1907 {
1908     struct mlx5_flow_context flow_context = {.flow_tag =
1909         MLX5_FS_DEFAULT_FLOW_TAG};
1910     u32 *offset_attr, offset = 0, counter_id = 0;
1911     int dest_id, dest_type = -1, inlen, len, ret, i;
1912     struct mlx5_ib_flow_handler *flow_handler;
1913     struct mlx5_ib_flow_matcher *fs_matcher;
1914     struct ib_uobject **arr_flow_actions;
1915     struct ib_uflow_resources *uflow_res;
1916     struct mlx5_flow_act flow_act = {};
1917     struct ib_qp *qp = NULL;
1918     void *devx_obj, *cmd_in;
1919     struct ib_uobject *uobj;
1920     struct mlx5_ib_dev *dev;
1921     u32 flags;
1922 
1923     if (!capable(CAP_NET_RAW))
1924         return -EPERM;
1925 
1926     fs_matcher = uverbs_attr_get_obj(attrs,
1927                      MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
1928     uobj =  uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
1929     dev = mlx5_udata_to_mdev(&attrs->driver_udata);
1930 
1931     if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
1932         return -EINVAL;
1933 
1934     if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
1935         flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
1936 
1937     if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
1938         flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
1939 
1940     len = uverbs_attr_get_uobjs_arr(attrs,
1941         MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
1942     if (len) {
1943         devx_obj = arr_flow_actions[0]->object;
1944 
1945         if (uverbs_attr_is_valid(attrs,
1946                      MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
1947 
1948             int num_offsets = uverbs_attr_ptr_get_array_size(
1949                 attrs,
1950                 MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
1951                 sizeof(u32));
1952 
1953             if (num_offsets != 1)
1954                 return -EINVAL;
1955 
1956             offset_attr = uverbs_attr_get_alloced_ptr(
1957                 attrs,
1958                 MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
1959             offset = *offset_attr;
1960         }
1961 
1962         if (!is_flow_counter(devx_obj, offset, &counter_id))
1963             return -EINVAL;
1964 
1965         flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1966     }
1967 
1968     cmd_in = uverbs_attr_get_alloced_ptr(
1969         attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
1970     inlen = uverbs_attr_get_len(attrs,
1971                     MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
1972 
1973     uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
1974     if (!uflow_res)
1975         return -ENOMEM;
1976 
1977     len = uverbs_attr_get_uobjs_arr(attrs,
1978         MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
1979     for (i = 0; i < len; i++) {
1980         struct mlx5_ib_flow_action *maction =
1981             to_mflow_act(arr_flow_actions[i]->object);
1982 
1983         ret = parse_flow_flow_action(maction, false, &flow_act);
1984         if (ret)
1985             goto err_out;
1986         flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
1987                    arr_flow_actions[i]->object);
1988     }
1989 
1990     ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
1991                    MLX5_IB_ATTR_CREATE_FLOW_TAG);
1992     if (!ret) {
1993         if (flow_context.flow_tag >= BIT(24)) {
1994             ret = -EINVAL;
1995             goto err_out;
1996         }
1997         flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
1998     }
1999 
2000     flow_handler =
2001         raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act,
2002                 counter_id, cmd_in, inlen, dest_id, dest_type);
2003     if (IS_ERR(flow_handler)) {
2004         ret = PTR_ERR(flow_handler);
2005         goto err_out;
2006     }
2007 
2008     ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
2009 
2010     return 0;
2011 err_out:
2012     ib_uverbs_flow_resources_free(uflow_res);
2013     return ret;
2014 }
2015 
2016 static int flow_matcher_cleanup(struct ib_uobject *uobject,
2017                 enum rdma_remove_reason why,
2018                 struct uverbs_attr_bundle *attrs)
2019 {
2020     struct mlx5_ib_flow_matcher *obj = uobject->object;
2021 
2022     if (atomic_read(&obj->usecnt))
2023         return -EBUSY;
2024 
2025     kfree(obj);
2026     return 0;
2027 }
2028 
2029 static int steering_anchor_cleanup(struct ib_uobject *uobject,
2030                    enum rdma_remove_reason why,
2031                    struct uverbs_attr_bundle *attrs)
2032 {
2033     struct mlx5_ib_steering_anchor *obj = uobject->object;
2034 
2035     if (atomic_read(&obj->usecnt))
2036         return -EBUSY;
2037 
2038     mutex_lock(&obj->dev->flow_db->lock);
2039     put_flow_table(obj->dev, obj->ft_prio, true);
2040     mutex_unlock(&obj->dev->flow_db->lock);
2041 
2042     kfree(obj);
2043     return 0;
2044 }
2045 
2046 static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
2047                   struct mlx5_ib_flow_matcher *obj)
2048 {
2049     enum mlx5_ib_uapi_flow_table_type ft_type =
2050         MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
2051     u32 flags;
2052     int err;
2053 
2054     /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
2055      * users should switch to it. We leave this to not break userspace
2056      */
2057     if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
2058         uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
2059         return -EINVAL;
2060 
2061     if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
2062         err = uverbs_get_const(&ft_type, attrs,
2063                        MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
2064         if (err)
2065             return err;
2066 
2067         err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
2068         if (err)
2069             return err;
2070 
2071         return 0;
2072     }
2073 
2074     if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
2075         err = uverbs_get_flags32(&flags, attrs,
2076                      MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
2077                      IB_FLOW_ATTR_FLAGS_EGRESS);
2078         if (err)
2079             return err;
2080 
2081         if (flags)
2082             return mlx5_ib_ft_type_to_namespace(
2083                 MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
2084                 &obj->ns_type);
2085     }
2086 
2087     obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
2088 
2089     return 0;
2090 }
2091 
2092 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
2093     struct uverbs_attr_bundle *attrs)
2094 {
2095     struct ib_uobject *uobj = uverbs_attr_get_uobject(
2096         attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
2097     struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
2098     struct mlx5_ib_flow_matcher *obj;
2099     int err;
2100 
2101     obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
2102     if (!obj)
2103         return -ENOMEM;
2104 
2105     obj->mask_len = uverbs_attr_get_len(
2106         attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
2107     err = uverbs_copy_from(&obj->matcher_mask,
2108                    attrs,
2109                    MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
2110     if (err)
2111         goto end;
2112 
2113     obj->flow_type = uverbs_attr_get_enum_id(
2114         attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
2115 
2116     if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
2117         err = uverbs_copy_from(&obj->priority,
2118                        attrs,
2119                        MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
2120         if (err)
2121             goto end;
2122     }
2123 
2124     err = uverbs_copy_from(&obj->match_criteria_enable,
2125                    attrs,
2126                    MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
2127     if (err)
2128         goto end;
2129 
2130     err = mlx5_ib_matcher_ns(attrs, obj);
2131     if (err)
2132         goto end;
2133 
2134     if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
2135         mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) {
2136         err = -EINVAL;
2137         goto end;
2138     }
2139 
2140     uobj->object = obj;
2141     obj->mdev = dev->mdev;
2142     atomic_set(&obj->usecnt, 0);
2143     return 0;
2144 
2145 end:
2146     kfree(obj);
2147     return err;
2148 }
2149 
2150 static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
2151     struct uverbs_attr_bundle *attrs)
2152 {
2153     struct ib_uobject *uobj = uverbs_attr_get_uobject(
2154         attrs, MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE);
2155     struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
2156     enum mlx5_ib_uapi_flow_table_type ib_uapi_ft_type;
2157     enum mlx5_flow_namespace_type ns_type;
2158     struct mlx5_ib_steering_anchor *obj;
2159     struct mlx5_ib_flow_prio *ft_prio;
2160     u16 priority;
2161     u32 ft_id;
2162     int err;
2163 
2164     if (!capable(CAP_NET_RAW))
2165         return -EPERM;
2166 
2167     err = uverbs_get_const(&ib_uapi_ft_type, attrs,
2168                    MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE);
2169     if (err)
2170         return err;
2171 
2172     err = mlx5_ib_ft_type_to_namespace(ib_uapi_ft_type, &ns_type);
2173     if (err)
2174         return err;
2175 
2176     err = uverbs_copy_from(&priority, attrs,
2177                    MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY);
2178     if (err)
2179         return err;
2180 
2181     obj = kzalloc(sizeof(*obj), GFP_KERNEL);
2182     if (!obj)
2183         return -ENOMEM;
2184 
2185     mutex_lock(&dev->flow_db->lock);
2186     ft_prio = _get_flow_table(dev, priority, ns_type, 0);
2187     if (IS_ERR(ft_prio)) {
2188         mutex_unlock(&dev->flow_db->lock);
2189         err = PTR_ERR(ft_prio);
2190         goto free_obj;
2191     }
2192 
2193     ft_prio->refcount++;
2194     ft_id = mlx5_flow_table_id(ft_prio->flow_table);
2195     mutex_unlock(&dev->flow_db->lock);
2196 
2197     err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
2198                  &ft_id, sizeof(ft_id));
2199     if (err)
2200         goto put_flow_table;
2201 
2202     uobj->object = obj;
2203     obj->dev = dev;
2204     obj->ft_prio = ft_prio;
2205     atomic_set(&obj->usecnt, 0);
2206 
2207     return 0;
2208 
2209 put_flow_table:
2210     mutex_lock(&dev->flow_db->lock);
2211     put_flow_table(dev, ft_prio, true);
2212     mutex_unlock(&dev->flow_db->lock);
2213 free_obj:
2214     kfree(obj);
2215 
2216     return err;
2217 }
2218 
2219 static struct ib_flow_action *
2220 mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
2221                  enum mlx5_ib_uapi_flow_table_type ft_type,
2222                  u8 num_actions, void *in)
2223 {
2224     enum mlx5_flow_namespace_type namespace;
2225     struct mlx5_ib_flow_action *maction;
2226     int ret;
2227 
2228     ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
2229     if (ret)
2230         return ERR_PTR(-EINVAL);
2231 
2232     maction = kzalloc(sizeof(*maction), GFP_KERNEL);
2233     if (!maction)
2234         return ERR_PTR(-ENOMEM);
2235 
2236     maction->flow_action_raw.modify_hdr =
2237         mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
2238 
2239     if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
2240         ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
2241         kfree(maction);
2242         return ERR_PTR(ret);
2243     }
2244     maction->flow_action_raw.sub_type =
2245         MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
2246     maction->flow_action_raw.dev = dev;
2247 
2248     return &maction->ib_action;
2249 }
2250 
2251 static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
2252 {
2253     return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
2254                      max_modify_header_actions) ||
2255            MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
2256                      max_modify_header_actions) ||
2257            MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
2258                      max_modify_header_actions);
2259 }
2260 
2261 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
2262     struct uverbs_attr_bundle *attrs)
2263 {
2264     struct ib_uobject *uobj = uverbs_attr_get_uobject(
2265         attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
2266     struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
2267     enum mlx5_ib_uapi_flow_table_type ft_type;
2268     struct ib_flow_action *action;
2269     int num_actions;
2270     void *in;
2271     int ret;
2272 
2273     if (!mlx5_ib_modify_header_supported(mdev))
2274         return -EOPNOTSUPP;
2275 
2276     in = uverbs_attr_get_alloced_ptr(attrs,
2277         MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
2278 
2279     num_actions = uverbs_attr_ptr_get_array_size(
2280         attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
2281         MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
2282     if (num_actions < 0)
2283         return num_actions;
2284 
2285     ret = uverbs_get_const(&ft_type, attrs,
2286                    MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
2287     if (ret)
2288         return ret;
2289     action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
2290     if (IS_ERR(action))
2291         return PTR_ERR(action);
2292 
2293     uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
2294                        IB_FLOW_ACTION_UNSPECIFIED);
2295 
2296     return 0;
2297 }
2298 
2299 static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
2300                               u8 packet_reformat_type,
2301                               u8 ft_type)
2302 {
2303     switch (packet_reformat_type) {
2304     case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
2305         if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
2306             return MLX5_CAP_FLOWTABLE(ibdev->mdev,
2307                           encap_general_header);
2308         break;
2309     case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
2310         if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
2311             return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
2312                 reformat_l2_to_l3_tunnel);
2313         break;
2314     case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
2315         if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
2316             return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
2317                 reformat_l3_tunnel_to_l2);
2318         break;
2319     case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
2320         if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
2321             return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
2322         break;
2323     default:
2324         break;
2325     }
2326 
2327     return false;
2328 }
2329 
2330 static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
2331 {
2332     switch (dv_prt) {
2333     case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
2334         *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
2335         break;
2336     case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
2337         *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
2338         break;
2339     case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
2340         *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
2341         break;
2342     default:
2343         return -EINVAL;
2344     }
2345 
2346     return 0;
2347 }
2348 
2349 static int mlx5_ib_flow_action_create_packet_reformat_ctx(
2350     struct mlx5_ib_dev *dev,
2351     struct mlx5_ib_flow_action *maction,
2352     u8 ft_type, u8 dv_prt,
2353     void *in, size_t len)
2354 {
2355     struct mlx5_pkt_reformat_params reformat_params;
2356     enum mlx5_flow_namespace_type namespace;
2357     u8 prm_prt;
2358     int ret;
2359 
2360     ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
2361     if (ret)
2362         return ret;
2363 
2364     ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
2365     if (ret)
2366         return ret;
2367 
2368     memset(&reformat_params, 0, sizeof(reformat_params));
2369     reformat_params.type = prm_prt;
2370     reformat_params.size = len;
2371     reformat_params.data = in;
2372     maction->flow_action_raw.pkt_reformat =
2373         mlx5_packet_reformat_alloc(dev->mdev, &reformat_params,
2374                        namespace);
2375     if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
2376         ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
2377         return ret;
2378     }
2379 
2380     maction->flow_action_raw.sub_type =
2381         MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
2382     maction->flow_action_raw.dev = dev;
2383 
2384     return 0;
2385 }
2386 
2387 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
2388     struct uverbs_attr_bundle *attrs)
2389 {
2390     struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
2391         MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
2392     struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
2393     enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
2394     enum mlx5_ib_uapi_flow_table_type ft_type;
2395     struct mlx5_ib_flow_action *maction;
2396     int ret;
2397 
2398     ret = uverbs_get_const(&ft_type, attrs,
2399                    MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
2400     if (ret)
2401         return ret;
2402 
2403     ret = uverbs_get_const(&dv_prt, attrs,
2404                    MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
2405     if (ret)
2406         return ret;
2407 
2408     if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
2409         return -EOPNOTSUPP;
2410 
2411     maction = kzalloc(sizeof(*maction), GFP_KERNEL);
2412     if (!maction)
2413         return -ENOMEM;
2414 
2415     if (dv_prt ==
2416         MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
2417         maction->flow_action_raw.sub_type =
2418             MLX5_IB_FLOW_ACTION_DECAP;
2419         maction->flow_action_raw.dev = mdev;
2420     } else {
2421         void *in;
2422         int len;
2423 
2424         in = uverbs_attr_get_alloced_ptr(attrs,
2425             MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
2426         if (IS_ERR(in)) {
2427             ret = PTR_ERR(in);
2428             goto free_maction;
2429         }
2430 
2431         len = uverbs_attr_get_len(attrs,
2432             MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
2433 
2434         ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
2435             maction, ft_type, dv_prt, in, len);
2436         if (ret)
2437             goto free_maction;
2438     }
2439 
2440     uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
2441                        IB_FLOW_ACTION_UNSPECIFIED);
2442     return 0;
2443 
2444 free_maction:
2445     kfree(maction);
2446     return ret;
2447 }
2448 
2449 DECLARE_UVERBS_NAMED_METHOD(
2450     MLX5_IB_METHOD_CREATE_FLOW,
2451     UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
2452             UVERBS_OBJECT_FLOW,
2453             UVERBS_ACCESS_NEW,
2454             UA_MANDATORY),
2455     UVERBS_ATTR_PTR_IN(
2456         MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
2457         UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
2458         UA_MANDATORY,
2459         UA_ALLOC_AND_COPY),
2460     UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
2461             MLX5_IB_OBJECT_FLOW_MATCHER,
2462             UVERBS_ACCESS_READ,
2463             UA_MANDATORY),
2464     UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
2465             UVERBS_OBJECT_QP,
2466             UVERBS_ACCESS_READ),
2467     UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
2468             MLX5_IB_OBJECT_DEVX_OBJ,
2469             UVERBS_ACCESS_READ),
2470     UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
2471                  UVERBS_OBJECT_FLOW_ACTION,
2472                  UVERBS_ACCESS_READ, 1,
2473                  MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
2474                  UA_OPTIONAL),
2475     UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
2476                UVERBS_ATTR_TYPE(u32),
2477                UA_OPTIONAL),
2478     UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
2479                  MLX5_IB_OBJECT_DEVX_OBJ,
2480                  UVERBS_ACCESS_READ, 1, 1,
2481                  UA_OPTIONAL),
2482     UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
2483                UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
2484                UA_OPTIONAL,
2485                UA_ALLOC_AND_COPY),
2486     UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
2487                  enum mlx5_ib_create_flow_flags,
2488                  UA_OPTIONAL));
2489 
2490 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2491     MLX5_IB_METHOD_DESTROY_FLOW,
2492     UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
2493             UVERBS_OBJECT_FLOW,
2494             UVERBS_ACCESS_DESTROY,
2495             UA_MANDATORY));
2496 
2497 ADD_UVERBS_METHODS(mlx5_ib_fs,
2498            UVERBS_OBJECT_FLOW,
2499            &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
2500            &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
2501 
2502 DECLARE_UVERBS_NAMED_METHOD(
2503     MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
2504     UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
2505             UVERBS_OBJECT_FLOW_ACTION,
2506             UVERBS_ACCESS_NEW,
2507             UA_MANDATORY),
2508     UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
2509                UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
2510                    set_add_copy_action_in_auto)),
2511                UA_MANDATORY,
2512                UA_ALLOC_AND_COPY),
2513     UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
2514                  enum mlx5_ib_uapi_flow_table_type,
2515                  UA_MANDATORY));
2516 
2517 DECLARE_UVERBS_NAMED_METHOD(
2518     MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
2519     UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
2520             UVERBS_OBJECT_FLOW_ACTION,
2521             UVERBS_ACCESS_NEW,
2522             UA_MANDATORY),
2523     UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
2524                UVERBS_ATTR_MIN_SIZE(1),
2525                UA_ALLOC_AND_COPY,
2526                UA_OPTIONAL),
2527     UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
2528                  enum mlx5_ib_uapi_flow_action_packet_reformat_type,
2529                  UA_MANDATORY),
2530     UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
2531                  enum mlx5_ib_uapi_flow_table_type,
2532                  UA_MANDATORY));
2533 
2534 ADD_UVERBS_METHODS(
2535     mlx5_ib_flow_actions,
2536     UVERBS_OBJECT_FLOW_ACTION,
2537     &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
2538     &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
2539 
2540 DECLARE_UVERBS_NAMED_METHOD(
2541     MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
2542     UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
2543             MLX5_IB_OBJECT_FLOW_MATCHER,
2544             UVERBS_ACCESS_NEW,
2545             UA_MANDATORY),
2546     UVERBS_ATTR_PTR_IN(
2547         MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
2548         UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
2549         UA_MANDATORY),
2550     UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
2551                 mlx5_ib_flow_type,
2552                 UA_MANDATORY),
2553     UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
2554                UVERBS_ATTR_TYPE(u8),
2555                UA_MANDATORY),
2556     UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
2557                  enum ib_flow_flags,
2558                  UA_OPTIONAL),
2559     UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
2560                  enum mlx5_ib_uapi_flow_table_type,
2561                  UA_OPTIONAL));
2562 
2563 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2564     MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
2565     UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
2566             MLX5_IB_OBJECT_FLOW_MATCHER,
2567             UVERBS_ACCESS_DESTROY,
2568             UA_MANDATORY));
2569 
2570 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
2571                 UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
2572                 &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
2573                 &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
2574 
2575 DECLARE_UVERBS_NAMED_METHOD(
2576     MLX5_IB_METHOD_STEERING_ANCHOR_CREATE,
2577     UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE,
2578             MLX5_IB_OBJECT_STEERING_ANCHOR,
2579             UVERBS_ACCESS_NEW,
2580             UA_MANDATORY),
2581     UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE,
2582                  enum mlx5_ib_uapi_flow_table_type,
2583                  UA_MANDATORY),
2584     UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY,
2585                UVERBS_ATTR_TYPE(u16),
2586                UA_MANDATORY),
2587     UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
2588                UVERBS_ATTR_TYPE(u32),
2589                UA_MANDATORY));
2590 
2591 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2592     MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY,
2593     UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_DESTROY_HANDLE,
2594             MLX5_IB_OBJECT_STEERING_ANCHOR,
2595             UVERBS_ACCESS_DESTROY,
2596             UA_MANDATORY));
2597 
2598 DECLARE_UVERBS_NAMED_OBJECT(
2599     MLX5_IB_OBJECT_STEERING_ANCHOR,
2600     UVERBS_TYPE_ALLOC_IDR(steering_anchor_cleanup),
2601     &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE),
2602     &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY));
2603 
2604 const struct uapi_definition mlx5_ib_flow_defs[] = {
2605     UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2606         MLX5_IB_OBJECT_FLOW_MATCHER),
2607     UAPI_DEF_CHAIN_OBJ_TREE(
2608         UVERBS_OBJECT_FLOW,
2609         &mlx5_ib_fs),
2610     UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
2611                 &mlx5_ib_flow_actions),
2612     UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2613         MLX5_IB_OBJECT_STEERING_ANCHOR,
2614         UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)),
2615     {},
2616 };
2617 
2618 static const struct ib_device_ops flow_ops = {
2619     .create_flow = mlx5_ib_create_flow,
2620     .destroy_flow = mlx5_ib_destroy_flow,
2621     .destroy_flow_action = mlx5_ib_destroy_flow_action,
2622 };
2623 
2624 int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
2625 {
2626     dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
2627 
2628     if (!dev->flow_db)
2629         return -ENOMEM;
2630 
2631     mutex_init(&dev->flow_db->lock);
2632 
2633     ib_set_device_ops(&dev->ib_dev, &flow_ops);
2634     return 0;
2635 }