Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (c) 2007-2014 Nicira, Inc.
0004  */
0005 
0006 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0007 
0008 #include <linux/init.h>
0009 #include <linux/module.h>
0010 #include <linux/if_arp.h>
0011 #include <linux/if_vlan.h>
0012 #include <linux/in.h>
0013 #include <linux/ip.h>
0014 #include <linux/jhash.h>
0015 #include <linux/delay.h>
0016 #include <linux/time.h>
0017 #include <linux/etherdevice.h>
0018 #include <linux/genetlink.h>
0019 #include <linux/kernel.h>
0020 #include <linux/kthread.h>
0021 #include <linux/mutex.h>
0022 #include <linux/percpu.h>
0023 #include <linux/rcupdate.h>
0024 #include <linux/tcp.h>
0025 #include <linux/udp.h>
0026 #include <linux/ethtool.h>
0027 #include <linux/wait.h>
0028 #include <asm/div64.h>
0029 #include <linux/highmem.h>
0030 #include <linux/netfilter_bridge.h>
0031 #include <linux/netfilter_ipv4.h>
0032 #include <linux/inetdevice.h>
0033 #include <linux/list.h>
0034 #include <linux/openvswitch.h>
0035 #include <linux/rculist.h>
0036 #include <linux/dmi.h>
0037 #include <net/genetlink.h>
0038 #include <net/net_namespace.h>
0039 #include <net/netns/generic.h>
0040 #include <net/pkt_cls.h>
0041 
0042 #include "datapath.h"
0043 #include "flow.h"
0044 #include "flow_table.h"
0045 #include "flow_netlink.h"
0046 #include "meter.h"
0047 #include "openvswitch_trace.h"
0048 #include "vport-internal_dev.h"
0049 #include "vport-netdev.h"
0050 
0051 unsigned int ovs_net_id __read_mostly;
0052 
0053 static struct genl_family dp_packet_genl_family;
0054 static struct genl_family dp_flow_genl_family;
0055 static struct genl_family dp_datapath_genl_family;
0056 
0057 static const struct nla_policy flow_policy[];
0058 
0059 static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
0060     .name = OVS_FLOW_MCGROUP,
0061 };
0062 
0063 static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
0064     .name = OVS_DATAPATH_MCGROUP,
0065 };
0066 
0067 static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
0068     .name = OVS_VPORT_MCGROUP,
0069 };
0070 
0071 /* Check if need to build a reply message.
0072  * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
0073 static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
0074                 unsigned int group)
0075 {
0076     return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
0077            genl_has_listeners(family, genl_info_net(info), group);
0078 }
0079 
0080 static void ovs_notify(struct genl_family *family,
0081                struct sk_buff *skb, struct genl_info *info)
0082 {
0083     genl_notify(family, skb, info, 0, GFP_KERNEL);
0084 }
0085 
0086 /**
0087  * DOC: Locking:
0088  *
0089  * All writes e.g. Writes to device state (add/remove datapath, port, set
0090  * operations on vports, etc.), Writes to other state (flow table
0091  * modifications, set miscellaneous datapath parameters, etc.) are protected
0092  * by ovs_lock.
0093  *
0094  * Reads are protected by RCU.
0095  *
0096  * There are a few special cases (mostly stats) that have their own
0097  * synchronization but they nest under all of above and don't interact with
0098  * each other.
0099  *
0100  * The RTNL lock nests inside ovs_mutex.
0101  */
0102 
0103 static DEFINE_MUTEX(ovs_mutex);
0104 
0105 void ovs_lock(void)
0106 {
0107     mutex_lock(&ovs_mutex);
0108 }
0109 
0110 void ovs_unlock(void)
0111 {
0112     mutex_unlock(&ovs_mutex);
0113 }
0114 
0115 #ifdef CONFIG_LOCKDEP
0116 int lockdep_ovsl_is_held(void)
0117 {
0118     if (debug_locks)
0119         return lockdep_is_held(&ovs_mutex);
0120     else
0121         return 1;
0122 }
0123 #endif
0124 
0125 static struct vport *new_vport(const struct vport_parms *);
0126 static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
0127                  const struct sw_flow_key *,
0128                  const struct dp_upcall_info *,
0129                  uint32_t cutlen);
0130 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
0131                   const struct sw_flow_key *,
0132                   const struct dp_upcall_info *,
0133                   uint32_t cutlen);
0134 
0135 static void ovs_dp_masks_rebalance(struct work_struct *work);
0136 
0137 static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *);
0138 
0139 /* Must be called with rcu_read_lock or ovs_mutex. */
0140 const char *ovs_dp_name(const struct datapath *dp)
0141 {
0142     struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
0143     return ovs_vport_name(vport);
0144 }
0145 
0146 static int get_dpifindex(const struct datapath *dp)
0147 {
0148     struct vport *local;
0149     int ifindex;
0150 
0151     rcu_read_lock();
0152 
0153     local = ovs_vport_rcu(dp, OVSP_LOCAL);
0154     if (local)
0155         ifindex = local->dev->ifindex;
0156     else
0157         ifindex = 0;
0158 
0159     rcu_read_unlock();
0160 
0161     return ifindex;
0162 }
0163 
0164 static void destroy_dp_rcu(struct rcu_head *rcu)
0165 {
0166     struct datapath *dp = container_of(rcu, struct datapath, rcu);
0167 
0168     ovs_flow_tbl_destroy(&dp->table);
0169     free_percpu(dp->stats_percpu);
0170     kfree(dp->ports);
0171     ovs_meters_exit(dp);
0172     kfree(rcu_dereference_raw(dp->upcall_portids));
0173     kfree(dp);
0174 }
0175 
0176 static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
0177                         u16 port_no)
0178 {
0179     return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
0180 }
0181 
0182 /* Called with ovs_mutex or RCU read lock. */
0183 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
0184 {
0185     struct vport *vport;
0186     struct hlist_head *head;
0187 
0188     head = vport_hash_bucket(dp, port_no);
0189     hlist_for_each_entry_rcu(vport, head, dp_hash_node,
0190                  lockdep_ovsl_is_held()) {
0191         if (vport->port_no == port_no)
0192             return vport;
0193     }
0194     return NULL;
0195 }
0196 
0197 /* Called with ovs_mutex. */
0198 static struct vport *new_vport(const struct vport_parms *parms)
0199 {
0200     struct vport *vport;
0201 
0202     vport = ovs_vport_add(parms);
0203     if (!IS_ERR(vport)) {
0204         struct datapath *dp = parms->dp;
0205         struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
0206 
0207         hlist_add_head_rcu(&vport->dp_hash_node, head);
0208     }
0209     return vport;
0210 }
0211 
0212 void ovs_dp_detach_port(struct vport *p)
0213 {
0214     ASSERT_OVSL();
0215 
0216     /* First drop references to device. */
0217     hlist_del_rcu(&p->dp_hash_node);
0218 
0219     /* Then destroy it. */
0220     ovs_vport_del(p);
0221 }
0222 
0223 /* Must be called with rcu_read_lock. */
0224 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
0225 {
0226     const struct vport *p = OVS_CB(skb)->input_vport;
0227     struct datapath *dp = p->dp;
0228     struct sw_flow *flow;
0229     struct sw_flow_actions *sf_acts;
0230     struct dp_stats_percpu *stats;
0231     u64 *stats_counter;
0232     u32 n_mask_hit;
0233     u32 n_cache_hit;
0234     int error;
0235 
0236     stats = this_cpu_ptr(dp->stats_percpu);
0237 
0238     /* Look up flow. */
0239     flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
0240                      &n_mask_hit, &n_cache_hit);
0241     if (unlikely(!flow)) {
0242         struct dp_upcall_info upcall;
0243 
0244         memset(&upcall, 0, sizeof(upcall));
0245         upcall.cmd = OVS_PACKET_CMD_MISS;
0246 
0247         if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
0248             upcall.portid =
0249                 ovs_dp_get_upcall_portid(dp, smp_processor_id());
0250         else
0251             upcall.portid = ovs_vport_find_upcall_portid(p, skb);
0252 
0253         upcall.mru = OVS_CB(skb)->mru;
0254         error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
0255         if (unlikely(error))
0256             kfree_skb(skb);
0257         else
0258             consume_skb(skb);
0259         stats_counter = &stats->n_missed;
0260         goto out;
0261     }
0262 
0263     ovs_flow_stats_update(flow, key->tp.flags, skb);
0264     sf_acts = rcu_dereference(flow->sf_acts);
0265     error = ovs_execute_actions(dp, skb, sf_acts, key);
0266     if (unlikely(error))
0267         net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
0268                     ovs_dp_name(dp), error);
0269 
0270     stats_counter = &stats->n_hit;
0271 
0272 out:
0273     /* Update datapath statistics. */
0274     u64_stats_update_begin(&stats->syncp);
0275     (*stats_counter)++;
0276     stats->n_mask_hit += n_mask_hit;
0277     stats->n_cache_hit += n_cache_hit;
0278     u64_stats_update_end(&stats->syncp);
0279 }
0280 
0281 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
0282           const struct sw_flow_key *key,
0283           const struct dp_upcall_info *upcall_info,
0284           uint32_t cutlen)
0285 {
0286     struct dp_stats_percpu *stats;
0287     int err;
0288 
0289     if (trace_ovs_dp_upcall_enabled())
0290         trace_ovs_dp_upcall(dp, skb, key, upcall_info);
0291 
0292     if (upcall_info->portid == 0) {
0293         err = -ENOTCONN;
0294         goto err;
0295     }
0296 
0297     if (!skb_is_gso(skb))
0298         err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
0299     else
0300         err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
0301     if (err)
0302         goto err;
0303 
0304     return 0;
0305 
0306 err:
0307     stats = this_cpu_ptr(dp->stats_percpu);
0308 
0309     u64_stats_update_begin(&stats->syncp);
0310     stats->n_lost++;
0311     u64_stats_update_end(&stats->syncp);
0312 
0313     return err;
0314 }
0315 
0316 static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
0317                  const struct sw_flow_key *key,
0318                  const struct dp_upcall_info *upcall_info,
0319                  uint32_t cutlen)
0320 {
0321     unsigned int gso_type = skb_shinfo(skb)->gso_type;
0322     struct sw_flow_key later_key;
0323     struct sk_buff *segs, *nskb;
0324     int err;
0325 
0326     BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET);
0327     segs = __skb_gso_segment(skb, NETIF_F_SG, false);
0328     if (IS_ERR(segs))
0329         return PTR_ERR(segs);
0330     if (segs == NULL)
0331         return -EINVAL;
0332 
0333     if (gso_type & SKB_GSO_UDP) {
0334         /* The initial flow key extracted by ovs_flow_key_extract()
0335          * in this case is for a first fragment, so we need to
0336          * properly mark later fragments.
0337          */
0338         later_key = *key;
0339         later_key.ip.frag = OVS_FRAG_TYPE_LATER;
0340     }
0341 
0342     /* Queue all of the segments. */
0343     skb_list_walk_safe(segs, skb, nskb) {
0344         if (gso_type & SKB_GSO_UDP && skb != segs)
0345             key = &later_key;
0346 
0347         err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
0348         if (err)
0349             break;
0350 
0351     }
0352 
0353     /* Free all of the segments. */
0354     skb_list_walk_safe(segs, skb, nskb) {
0355         if (err)
0356             kfree_skb(skb);
0357         else
0358             consume_skb(skb);
0359     }
0360     return err;
0361 }
0362 
0363 static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
0364                   unsigned int hdrlen, int actions_attrlen)
0365 {
0366     size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
0367         + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
0368         + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
0369         + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
0370         + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
0371 
0372     /* OVS_PACKET_ATTR_USERDATA */
0373     if (upcall_info->userdata)
0374         size += NLA_ALIGN(upcall_info->userdata->nla_len);
0375 
0376     /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
0377     if (upcall_info->egress_tun_info)
0378         size += nla_total_size(ovs_tun_key_attr_size());
0379 
0380     /* OVS_PACKET_ATTR_ACTIONS */
0381     if (upcall_info->actions_len)
0382         size += nla_total_size(actions_attrlen);
0383 
0384     /* OVS_PACKET_ATTR_MRU */
0385     if (upcall_info->mru)
0386         size += nla_total_size(sizeof(upcall_info->mru));
0387 
0388     return size;
0389 }
0390 
0391 static void pad_packet(struct datapath *dp, struct sk_buff *skb)
0392 {
0393     if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
0394         size_t plen = NLA_ALIGN(skb->len) - skb->len;
0395 
0396         if (plen > 0)
0397             skb_put_zero(skb, plen);
0398     }
0399 }
0400 
0401 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
0402                   const struct sw_flow_key *key,
0403                   const struct dp_upcall_info *upcall_info,
0404                   uint32_t cutlen)
0405 {
0406     struct ovs_header *upcall;
0407     struct sk_buff *nskb = NULL;
0408     struct sk_buff *user_skb = NULL; /* to be queued to userspace */
0409     struct nlattr *nla;
0410     size_t len;
0411     unsigned int hlen;
0412     int err, dp_ifindex;
0413     u64 hash;
0414 
0415     dp_ifindex = get_dpifindex(dp);
0416     if (!dp_ifindex)
0417         return -ENODEV;
0418 
0419     if (skb_vlan_tag_present(skb)) {
0420         nskb = skb_clone(skb, GFP_ATOMIC);
0421         if (!nskb)
0422             return -ENOMEM;
0423 
0424         nskb = __vlan_hwaccel_push_inside(nskb);
0425         if (!nskb)
0426             return -ENOMEM;
0427 
0428         skb = nskb;
0429     }
0430 
0431     if (nla_attr_size(skb->len) > USHRT_MAX) {
0432         err = -EFBIG;
0433         goto out;
0434     }
0435 
0436     /* Complete checksum if needed */
0437     if (skb->ip_summed == CHECKSUM_PARTIAL &&
0438         (err = skb_csum_hwoffload_help(skb, 0)))
0439         goto out;
0440 
0441     /* Older versions of OVS user space enforce alignment of the last
0442      * Netlink attribute to NLA_ALIGNTO which would require extensive
0443      * padding logic. Only perform zerocopy if padding is not required.
0444      */
0445     if (dp->user_features & OVS_DP_F_UNALIGNED)
0446         hlen = skb_zerocopy_headlen(skb);
0447     else
0448         hlen = skb->len;
0449 
0450     len = upcall_msg_size(upcall_info, hlen - cutlen,
0451                   OVS_CB(skb)->acts_origlen);
0452     user_skb = genlmsg_new(len, GFP_ATOMIC);
0453     if (!user_skb) {
0454         err = -ENOMEM;
0455         goto out;
0456     }
0457 
0458     upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
0459                  0, upcall_info->cmd);
0460     if (!upcall) {
0461         err = -EINVAL;
0462         goto out;
0463     }
0464     upcall->dp_ifindex = dp_ifindex;
0465 
0466     err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
0467     if (err)
0468         goto out;
0469 
0470     if (upcall_info->userdata)
0471         __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
0472               nla_len(upcall_info->userdata),
0473               nla_data(upcall_info->userdata));
0474 
0475     if (upcall_info->egress_tun_info) {
0476         nla = nla_nest_start_noflag(user_skb,
0477                         OVS_PACKET_ATTR_EGRESS_TUN_KEY);
0478         if (!nla) {
0479             err = -EMSGSIZE;
0480             goto out;
0481         }
0482         err = ovs_nla_put_tunnel_info(user_skb,
0483                           upcall_info->egress_tun_info);
0484         if (err)
0485             goto out;
0486 
0487         nla_nest_end(user_skb, nla);
0488     }
0489 
0490     if (upcall_info->actions_len) {
0491         nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
0492         if (!nla) {
0493             err = -EMSGSIZE;
0494             goto out;
0495         }
0496         err = ovs_nla_put_actions(upcall_info->actions,
0497                       upcall_info->actions_len,
0498                       user_skb);
0499         if (!err)
0500             nla_nest_end(user_skb, nla);
0501         else
0502             nla_nest_cancel(user_skb, nla);
0503     }
0504 
0505     /* Add OVS_PACKET_ATTR_MRU */
0506     if (upcall_info->mru &&
0507         nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
0508         err = -ENOBUFS;
0509         goto out;
0510     }
0511 
0512     /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
0513     if (cutlen > 0 &&
0514         nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
0515         err = -ENOBUFS;
0516         goto out;
0517     }
0518 
0519     /* Add OVS_PACKET_ATTR_HASH */
0520     hash = skb_get_hash_raw(skb);
0521     if (skb->sw_hash)
0522         hash |= OVS_PACKET_HASH_SW_BIT;
0523 
0524     if (skb->l4_hash)
0525         hash |= OVS_PACKET_HASH_L4_BIT;
0526 
0527     if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
0528         err = -ENOBUFS;
0529         goto out;
0530     }
0531 
0532     /* Only reserve room for attribute header, packet data is added
0533      * in skb_zerocopy() */
0534     if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
0535         err = -ENOBUFS;
0536         goto out;
0537     }
0538     nla->nla_len = nla_attr_size(skb->len - cutlen);
0539 
0540     err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
0541     if (err)
0542         goto out;
0543 
0544     /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
0545     pad_packet(dp, user_skb);
0546 
0547     ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
0548 
0549     err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
0550     user_skb = NULL;
0551 out:
0552     if (err)
0553         skb_tx_error(skb);
0554     kfree_skb(user_skb);
0555     kfree_skb(nskb);
0556     return err;
0557 }
0558 
0559 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
0560 {
0561     struct ovs_header *ovs_header = info->userhdr;
0562     struct net *net = sock_net(skb->sk);
0563     struct nlattr **a = info->attrs;
0564     struct sw_flow_actions *acts;
0565     struct sk_buff *packet;
0566     struct sw_flow *flow;
0567     struct sw_flow_actions *sf_acts;
0568     struct datapath *dp;
0569     struct vport *input_vport;
0570     u16 mru = 0;
0571     u64 hash;
0572     int len;
0573     int err;
0574     bool log = !a[OVS_PACKET_ATTR_PROBE];
0575 
0576     err = -EINVAL;
0577     if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
0578         !a[OVS_PACKET_ATTR_ACTIONS])
0579         goto err;
0580 
0581     len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
0582     packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
0583     err = -ENOMEM;
0584     if (!packet)
0585         goto err;
0586     skb_reserve(packet, NET_IP_ALIGN);
0587 
0588     nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
0589 
0590     /* Set packet's mru */
0591     if (a[OVS_PACKET_ATTR_MRU]) {
0592         mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
0593         packet->ignore_df = 1;
0594     }
0595     OVS_CB(packet)->mru = mru;
0596 
0597     if (a[OVS_PACKET_ATTR_HASH]) {
0598         hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
0599 
0600         __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
0601                    !!(hash & OVS_PACKET_HASH_SW_BIT),
0602                    !!(hash & OVS_PACKET_HASH_L4_BIT));
0603     }
0604 
0605     /* Build an sw_flow for sending this packet. */
0606     flow = ovs_flow_alloc();
0607     err = PTR_ERR(flow);
0608     if (IS_ERR(flow))
0609         goto err_kfree_skb;
0610 
0611     err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
0612                          packet, &flow->key, log);
0613     if (err)
0614         goto err_flow_free;
0615 
0616     err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
0617                    &flow->key, &acts, log);
0618     if (err)
0619         goto err_flow_free;
0620 
0621     rcu_assign_pointer(flow->sf_acts, acts);
0622     packet->priority = flow->key.phy.priority;
0623     packet->mark = flow->key.phy.skb_mark;
0624 
0625     rcu_read_lock();
0626     dp = get_dp_rcu(net, ovs_header->dp_ifindex);
0627     err = -ENODEV;
0628     if (!dp)
0629         goto err_unlock;
0630 
0631     input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
0632     if (!input_vport)
0633         input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
0634 
0635     if (!input_vport)
0636         goto err_unlock;
0637 
0638     packet->dev = input_vport->dev;
0639     OVS_CB(packet)->input_vport = input_vport;
0640     sf_acts = rcu_dereference(flow->sf_acts);
0641 
0642     local_bh_disable();
0643     err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
0644     local_bh_enable();
0645     rcu_read_unlock();
0646 
0647     ovs_flow_free(flow, false);
0648     return err;
0649 
0650 err_unlock:
0651     rcu_read_unlock();
0652 err_flow_free:
0653     ovs_flow_free(flow, false);
0654 err_kfree_skb:
0655     kfree_skb(packet);
0656 err:
0657     return err;
0658 }
0659 
0660 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
0661     [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
0662     [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
0663     [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
0664     [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
0665     [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
0666     [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
0667 };
0668 
0669 static const struct genl_small_ops dp_packet_genl_ops[] = {
0670     { .cmd = OVS_PACKET_CMD_EXECUTE,
0671       .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
0672       .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
0673       .doit = ovs_packet_cmd_execute
0674     }
0675 };
0676 
0677 static struct genl_family dp_packet_genl_family __ro_after_init = {
0678     .hdrsize = sizeof(struct ovs_header),
0679     .name = OVS_PACKET_FAMILY,
0680     .version = OVS_PACKET_VERSION,
0681     .maxattr = OVS_PACKET_ATTR_MAX,
0682     .policy = packet_policy,
0683     .netnsok = true,
0684     .parallel_ops = true,
0685     .small_ops = dp_packet_genl_ops,
0686     .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
0687     .module = THIS_MODULE,
0688 };
0689 
0690 static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
0691              struct ovs_dp_megaflow_stats *mega_stats)
0692 {
0693     int i;
0694 
0695     memset(mega_stats, 0, sizeof(*mega_stats));
0696 
0697     stats->n_flows = ovs_flow_tbl_count(&dp->table);
0698     mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
0699 
0700     stats->n_hit = stats->n_missed = stats->n_lost = 0;
0701 
0702     for_each_possible_cpu(i) {
0703         const struct dp_stats_percpu *percpu_stats;
0704         struct dp_stats_percpu local_stats;
0705         unsigned int start;
0706 
0707         percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
0708 
0709         do {
0710             start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
0711             local_stats = *percpu_stats;
0712         } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
0713 
0714         stats->n_hit += local_stats.n_hit;
0715         stats->n_missed += local_stats.n_missed;
0716         stats->n_lost += local_stats.n_lost;
0717         mega_stats->n_mask_hit += local_stats.n_mask_hit;
0718         mega_stats->n_cache_hit += local_stats.n_cache_hit;
0719     }
0720 }
0721 
0722 static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
0723 {
0724     return ovs_identifier_is_ufid(sfid) &&
0725            !(ufid_flags & OVS_UFID_F_OMIT_KEY);
0726 }
0727 
0728 static bool should_fill_mask(uint32_t ufid_flags)
0729 {
0730     return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
0731 }
0732 
0733 static bool should_fill_actions(uint32_t ufid_flags)
0734 {
0735     return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
0736 }
0737 
0738 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
0739                     const struct sw_flow_id *sfid,
0740                     uint32_t ufid_flags)
0741 {
0742     size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
0743 
0744     /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
0745      * see ovs_nla_put_identifier()
0746      */
0747     if (sfid && ovs_identifier_is_ufid(sfid))
0748         len += nla_total_size(sfid->ufid_len);
0749     else
0750         len += nla_total_size(ovs_key_attr_size());
0751 
0752     /* OVS_FLOW_ATTR_KEY */
0753     if (!sfid || should_fill_key(sfid, ufid_flags))
0754         len += nla_total_size(ovs_key_attr_size());
0755 
0756     /* OVS_FLOW_ATTR_MASK */
0757     if (should_fill_mask(ufid_flags))
0758         len += nla_total_size(ovs_key_attr_size());
0759 
0760     /* OVS_FLOW_ATTR_ACTIONS */
0761     if (should_fill_actions(ufid_flags))
0762         len += nla_total_size(acts->orig_len);
0763 
0764     return len
0765         + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
0766         + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
0767         + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
0768 }
0769 
0770 /* Called with ovs_mutex or RCU read lock. */
0771 static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
0772                    struct sk_buff *skb)
0773 {
0774     struct ovs_flow_stats stats;
0775     __be16 tcp_flags;
0776     unsigned long used;
0777 
0778     ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
0779 
0780     if (used &&
0781         nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
0782                   OVS_FLOW_ATTR_PAD))
0783         return -EMSGSIZE;
0784 
0785     if (stats.n_packets &&
0786         nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
0787               sizeof(struct ovs_flow_stats), &stats,
0788               OVS_FLOW_ATTR_PAD))
0789         return -EMSGSIZE;
0790 
0791     if ((u8)ntohs(tcp_flags) &&
0792          nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
0793         return -EMSGSIZE;
0794 
0795     return 0;
0796 }
0797 
0798 /* Called with ovs_mutex or RCU read lock. */
0799 static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
0800                      struct sk_buff *skb, int skb_orig_len)
0801 {
0802     struct nlattr *start;
0803     int err;
0804 
0805     /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
0806      * this is the first flow to be dumped into 'skb'.  This is unusual for
0807      * Netlink but individual action lists can be longer than
0808      * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
0809      * The userspace caller can always fetch the actions separately if it
0810      * really wants them.  (Most userspace callers in fact don't care.)
0811      *
0812      * This can only fail for dump operations because the skb is always
0813      * properly sized for single flows.
0814      */
0815     start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
0816     if (start) {
0817         const struct sw_flow_actions *sf_acts;
0818 
0819         sf_acts = rcu_dereference_ovsl(flow->sf_acts);
0820         err = ovs_nla_put_actions(sf_acts->actions,
0821                       sf_acts->actions_len, skb);
0822 
0823         if (!err)
0824             nla_nest_end(skb, start);
0825         else {
0826             if (skb_orig_len)
0827                 return err;
0828 
0829             nla_nest_cancel(skb, start);
0830         }
0831     } else if (skb_orig_len) {
0832         return -EMSGSIZE;
0833     }
0834 
0835     return 0;
0836 }
0837 
0838 /* Called with ovs_mutex or RCU read lock. */
0839 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
0840                   struct sk_buff *skb, u32 portid,
0841                   u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
0842 {
0843     const int skb_orig_len = skb->len;
0844     struct ovs_header *ovs_header;
0845     int err;
0846 
0847     ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
0848                  flags, cmd);
0849     if (!ovs_header)
0850         return -EMSGSIZE;
0851 
0852     ovs_header->dp_ifindex = dp_ifindex;
0853 
0854     err = ovs_nla_put_identifier(flow, skb);
0855     if (err)
0856         goto error;
0857 
0858     if (should_fill_key(&flow->id, ufid_flags)) {
0859         err = ovs_nla_put_masked_key(flow, skb);
0860         if (err)
0861             goto error;
0862     }
0863 
0864     if (should_fill_mask(ufid_flags)) {
0865         err = ovs_nla_put_mask(flow, skb);
0866         if (err)
0867             goto error;
0868     }
0869 
0870     err = ovs_flow_cmd_fill_stats(flow, skb);
0871     if (err)
0872         goto error;
0873 
0874     if (should_fill_actions(ufid_flags)) {
0875         err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
0876         if (err)
0877             goto error;
0878     }
0879 
0880     genlmsg_end(skb, ovs_header);
0881     return 0;
0882 
0883 error:
0884     genlmsg_cancel(skb, ovs_header);
0885     return err;
0886 }
0887 
0888 /* May not be called with RCU read lock. */
0889 static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
0890                            const struct sw_flow_id *sfid,
0891                            struct genl_info *info,
0892                            bool always,
0893                            uint32_t ufid_flags)
0894 {
0895     struct sk_buff *skb;
0896     size_t len;
0897 
0898     if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
0899         return NULL;
0900 
0901     len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
0902     skb = genlmsg_new(len, GFP_KERNEL);
0903     if (!skb)
0904         return ERR_PTR(-ENOMEM);
0905 
0906     return skb;
0907 }
0908 
0909 /* Called with ovs_mutex. */
0910 static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
0911                            int dp_ifindex,
0912                            struct genl_info *info, u8 cmd,
0913                            bool always, u32 ufid_flags)
0914 {
0915     struct sk_buff *skb;
0916     int retval;
0917 
0918     skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
0919                       &flow->id, info, always, ufid_flags);
0920     if (IS_ERR_OR_NULL(skb))
0921         return skb;
0922 
0923     retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
0924                     info->snd_portid, info->snd_seq, 0,
0925                     cmd, ufid_flags);
0926     if (WARN_ON_ONCE(retval < 0)) {
0927         kfree_skb(skb);
0928         skb = ERR_PTR(retval);
0929     }
0930     return skb;
0931 }
0932 
0933 static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
0934 {
0935     struct net *net = sock_net(skb->sk);
0936     struct nlattr **a = info->attrs;
0937     struct ovs_header *ovs_header = info->userhdr;
0938     struct sw_flow *flow = NULL, *new_flow;
0939     struct sw_flow_mask mask;
0940     struct sk_buff *reply;
0941     struct datapath *dp;
0942     struct sw_flow_actions *acts;
0943     struct sw_flow_match match;
0944     u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
0945     int error;
0946     bool log = !a[OVS_FLOW_ATTR_PROBE];
0947 
0948     /* Must have key and actions. */
0949     error = -EINVAL;
0950     if (!a[OVS_FLOW_ATTR_KEY]) {
0951         OVS_NLERR(log, "Flow key attr not present in new flow.");
0952         goto error;
0953     }
0954     if (!a[OVS_FLOW_ATTR_ACTIONS]) {
0955         OVS_NLERR(log, "Flow actions attr not present in new flow.");
0956         goto error;
0957     }
0958 
0959     /* Most of the time we need to allocate a new flow, do it before
0960      * locking.
0961      */
0962     new_flow = ovs_flow_alloc();
0963     if (IS_ERR(new_flow)) {
0964         error = PTR_ERR(new_flow);
0965         goto error;
0966     }
0967 
0968     /* Extract key. */
0969     ovs_match_init(&match, &new_flow->key, false, &mask);
0970     error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
0971                   a[OVS_FLOW_ATTR_MASK], log);
0972     if (error)
0973         goto err_kfree_flow;
0974 
0975     /* Extract flow identifier. */
0976     error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
0977                        &new_flow->key, log);
0978     if (error)
0979         goto err_kfree_flow;
0980 
0981     /* unmasked key is needed to match when ufid is not used. */
0982     if (ovs_identifier_is_key(&new_flow->id))
0983         match.key = new_flow->id.unmasked_key;
0984 
0985     ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
0986 
0987     /* Validate actions. */
0988     error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
0989                      &new_flow->key, &acts, log);
0990     if (error) {
0991         OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
0992         goto err_kfree_flow;
0993     }
0994 
0995     reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
0996                     ufid_flags);
0997     if (IS_ERR(reply)) {
0998         error = PTR_ERR(reply);
0999         goto err_kfree_acts;
1000     }
1001 
1002     ovs_lock();
1003     dp = get_dp(net, ovs_header->dp_ifindex);
1004     if (unlikely(!dp)) {
1005         error = -ENODEV;
1006         goto err_unlock_ovs;
1007     }
1008 
1009     /* Check if this is a duplicate flow */
1010     if (ovs_identifier_is_ufid(&new_flow->id))
1011         flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
1012     if (!flow)
1013         flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
1014     if (likely(!flow)) {
1015         rcu_assign_pointer(new_flow->sf_acts, acts);
1016 
1017         /* Put flow in bucket. */
1018         error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
1019         if (unlikely(error)) {
1020             acts = NULL;
1021             goto err_unlock_ovs;
1022         }
1023 
1024         if (unlikely(reply)) {
1025             error = ovs_flow_cmd_fill_info(new_flow,
1026                                ovs_header->dp_ifindex,
1027                                reply, info->snd_portid,
1028                                info->snd_seq, 0,
1029                                OVS_FLOW_CMD_NEW,
1030                                ufid_flags);
1031             BUG_ON(error < 0);
1032         }
1033         ovs_unlock();
1034     } else {
1035         struct sw_flow_actions *old_acts;
1036 
1037         /* Bail out if we're not allowed to modify an existing flow.
1038          * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1039          * because Generic Netlink treats the latter as a dump
1040          * request.  We also accept NLM_F_EXCL in case that bug ever
1041          * gets fixed.
1042          */
1043         if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1044                              | NLM_F_EXCL))) {
1045             error = -EEXIST;
1046             goto err_unlock_ovs;
1047         }
1048         /* The flow identifier has to be the same for flow updates.
1049          * Look for any overlapping flow.
1050          */
1051         if (unlikely(!ovs_flow_cmp(flow, &match))) {
1052             if (ovs_identifier_is_key(&flow->id))
1053                 flow = ovs_flow_tbl_lookup_exact(&dp->table,
1054                                  &match);
1055             else /* UFID matches but key is different */
1056                 flow = NULL;
1057             if (!flow) {
1058                 error = -ENOENT;
1059                 goto err_unlock_ovs;
1060             }
1061         }
1062         /* Update actions. */
1063         old_acts = ovsl_dereference(flow->sf_acts);
1064         rcu_assign_pointer(flow->sf_acts, acts);
1065 
1066         if (unlikely(reply)) {
1067             error = ovs_flow_cmd_fill_info(flow,
1068                                ovs_header->dp_ifindex,
1069                                reply, info->snd_portid,
1070                                info->snd_seq, 0,
1071                                OVS_FLOW_CMD_NEW,
1072                                ufid_flags);
1073             BUG_ON(error < 0);
1074         }
1075         ovs_unlock();
1076 
1077         ovs_nla_free_flow_actions_rcu(old_acts);
1078         ovs_flow_free(new_flow, false);
1079     }
1080 
1081     if (reply)
1082         ovs_notify(&dp_flow_genl_family, reply, info);
1083     return 0;
1084 
1085 err_unlock_ovs:
1086     ovs_unlock();
1087     kfree_skb(reply);
1088 err_kfree_acts:
1089     ovs_nla_free_flow_actions(acts);
1090 err_kfree_flow:
1091     ovs_flow_free(new_flow, false);
1092 error:
1093     return error;
1094 }
1095 
1096 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1097 static noinline_for_stack
1098 struct sw_flow_actions *get_flow_actions(struct net *net,
1099                      const struct nlattr *a,
1100                      const struct sw_flow_key *key,
1101                      const struct sw_flow_mask *mask,
1102                      bool log)
1103 {
1104     struct sw_flow_actions *acts;
1105     struct sw_flow_key masked_key;
1106     int error;
1107 
1108     ovs_flow_mask_key(&masked_key, key, true, mask);
1109     error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1110     if (error) {
1111         OVS_NLERR(log,
1112               "Actions may not be safe on all matching packets");
1113         return ERR_PTR(error);
1114     }
1115 
1116     return acts;
1117 }
1118 
1119 /* Factor out match-init and action-copy to avoid
1120  * "Wframe-larger-than=1024" warning. Because mask is only
1121  * used to get actions, we new a function to save some
1122  * stack space.
1123  *
1124  * If there are not key and action attrs, we return 0
1125  * directly. In the case, the caller will also not use the
1126  * match as before. If there is action attr, we try to get
1127  * actions and save them to *acts. Before returning from
1128  * the function, we reset the match->mask pointer. Because
1129  * we should not to return match object with dangling reference
1130  * to mask.
1131  * */
1132 static noinline_for_stack int
1133 ovs_nla_init_match_and_action(struct net *net,
1134                   struct sw_flow_match *match,
1135                   struct sw_flow_key *key,
1136                   struct nlattr **a,
1137                   struct sw_flow_actions **acts,
1138                   bool log)
1139 {
1140     struct sw_flow_mask mask;
1141     int error = 0;
1142 
1143     if (a[OVS_FLOW_ATTR_KEY]) {
1144         ovs_match_init(match, key, true, &mask);
1145         error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
1146                       a[OVS_FLOW_ATTR_MASK], log);
1147         if (error)
1148             goto error;
1149     }
1150 
1151     if (a[OVS_FLOW_ATTR_ACTIONS]) {
1152         if (!a[OVS_FLOW_ATTR_KEY]) {
1153             OVS_NLERR(log,
1154                   "Flow key attribute not present in set flow.");
1155             error = -EINVAL;
1156             goto error;
1157         }
1158 
1159         *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
1160                      &mask, log);
1161         if (IS_ERR(*acts)) {
1162             error = PTR_ERR(*acts);
1163             goto error;
1164         }
1165     }
1166 
1167     /* On success, error is 0. */
1168 error:
1169     match->mask = NULL;
1170     return error;
1171 }
1172 
1173 static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1174 {
1175     struct net *net = sock_net(skb->sk);
1176     struct nlattr **a = info->attrs;
1177     struct ovs_header *ovs_header = info->userhdr;
1178     struct sw_flow_key key;
1179     struct sw_flow *flow;
1180     struct sk_buff *reply = NULL;
1181     struct datapath *dp;
1182     struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1183     struct sw_flow_match match;
1184     struct sw_flow_id sfid;
1185     u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1186     int error = 0;
1187     bool log = !a[OVS_FLOW_ATTR_PROBE];
1188     bool ufid_present;
1189 
1190     ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1191     if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
1192         OVS_NLERR(log,
1193               "Flow set message rejected, Key attribute missing.");
1194         return -EINVAL;
1195     }
1196 
1197     error = ovs_nla_init_match_and_action(net, &match, &key, a,
1198                           &acts, log);
1199     if (error)
1200         goto error;
1201 
1202     if (acts) {
1203         /* Can allocate before locking if have acts. */
1204         reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1205                         ufid_flags);
1206         if (IS_ERR(reply)) {
1207             error = PTR_ERR(reply);
1208             goto err_kfree_acts;
1209         }
1210     }
1211 
1212     ovs_lock();
1213     dp = get_dp(net, ovs_header->dp_ifindex);
1214     if (unlikely(!dp)) {
1215         error = -ENODEV;
1216         goto err_unlock_ovs;
1217     }
1218     /* Check that the flow exists. */
1219     if (ufid_present)
1220         flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1221     else
1222         flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1223     if (unlikely(!flow)) {
1224         error = -ENOENT;
1225         goto err_unlock_ovs;
1226     }
1227 
1228     /* Update actions, if present. */
1229     if (likely(acts)) {
1230         old_acts = ovsl_dereference(flow->sf_acts);
1231         rcu_assign_pointer(flow->sf_acts, acts);
1232 
1233         if (unlikely(reply)) {
1234             error = ovs_flow_cmd_fill_info(flow,
1235                                ovs_header->dp_ifindex,
1236                                reply, info->snd_portid,
1237                                info->snd_seq, 0,
1238                                OVS_FLOW_CMD_SET,
1239                                ufid_flags);
1240             BUG_ON(error < 0);
1241         }
1242     } else {
1243         /* Could not alloc without acts before locking. */
1244         reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1245                         info, OVS_FLOW_CMD_SET, false,
1246                         ufid_flags);
1247 
1248         if (IS_ERR(reply)) {
1249             error = PTR_ERR(reply);
1250             goto err_unlock_ovs;
1251         }
1252     }
1253 
1254     /* Clear stats. */
1255     if (a[OVS_FLOW_ATTR_CLEAR])
1256         ovs_flow_stats_clear(flow);
1257     ovs_unlock();
1258 
1259     if (reply)
1260         ovs_notify(&dp_flow_genl_family, reply, info);
1261     if (old_acts)
1262         ovs_nla_free_flow_actions_rcu(old_acts);
1263 
1264     return 0;
1265 
1266 err_unlock_ovs:
1267     ovs_unlock();
1268     kfree_skb(reply);
1269 err_kfree_acts:
1270     ovs_nla_free_flow_actions(acts);
1271 error:
1272     return error;
1273 }
1274 
1275 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1276 {
1277     struct nlattr **a = info->attrs;
1278     struct ovs_header *ovs_header = info->userhdr;
1279     struct net *net = sock_net(skb->sk);
1280     struct sw_flow_key key;
1281     struct sk_buff *reply;
1282     struct sw_flow *flow;
1283     struct datapath *dp;
1284     struct sw_flow_match match;
1285     struct sw_flow_id ufid;
1286     u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1287     int err = 0;
1288     bool log = !a[OVS_FLOW_ATTR_PROBE];
1289     bool ufid_present;
1290 
1291     ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1292     if (a[OVS_FLOW_ATTR_KEY]) {
1293         ovs_match_init(&match, &key, true, NULL);
1294         err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
1295                     log);
1296     } else if (!ufid_present) {
1297         OVS_NLERR(log,
1298               "Flow get message rejected, Key attribute missing.");
1299         err = -EINVAL;
1300     }
1301     if (err)
1302         return err;
1303 
1304     ovs_lock();
1305     dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1306     if (!dp) {
1307         err = -ENODEV;
1308         goto unlock;
1309     }
1310 
1311     if (ufid_present)
1312         flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1313     else
1314         flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1315     if (!flow) {
1316         err = -ENOENT;
1317         goto unlock;
1318     }
1319 
1320     reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1321                     OVS_FLOW_CMD_GET, true, ufid_flags);
1322     if (IS_ERR(reply)) {
1323         err = PTR_ERR(reply);
1324         goto unlock;
1325     }
1326 
1327     ovs_unlock();
1328     return genlmsg_reply(reply, info);
1329 unlock:
1330     ovs_unlock();
1331     return err;
1332 }
1333 
1334 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1335 {
1336     struct nlattr **a = info->attrs;
1337     struct ovs_header *ovs_header = info->userhdr;
1338     struct net *net = sock_net(skb->sk);
1339     struct sw_flow_key key;
1340     struct sk_buff *reply;
1341     struct sw_flow *flow = NULL;
1342     struct datapath *dp;
1343     struct sw_flow_match match;
1344     struct sw_flow_id ufid;
1345     u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1346     int err;
1347     bool log = !a[OVS_FLOW_ATTR_PROBE];
1348     bool ufid_present;
1349 
1350     ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1351     if (a[OVS_FLOW_ATTR_KEY]) {
1352         ovs_match_init(&match, &key, true, NULL);
1353         err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1354                     NULL, log);
1355         if (unlikely(err))
1356             return err;
1357     }
1358 
1359     ovs_lock();
1360     dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1361     if (unlikely(!dp)) {
1362         err = -ENODEV;
1363         goto unlock;
1364     }
1365 
1366     if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1367         err = ovs_flow_tbl_flush(&dp->table);
1368         goto unlock;
1369     }
1370 
1371     if (ufid_present)
1372         flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1373     else
1374         flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1375     if (unlikely(!flow)) {
1376         err = -ENOENT;
1377         goto unlock;
1378     }
1379 
1380     ovs_flow_tbl_remove(&dp->table, flow);
1381     ovs_unlock();
1382 
1383     reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1384                     &flow->id, info, false, ufid_flags);
1385     if (likely(reply)) {
1386         if (!IS_ERR(reply)) {
1387             rcu_read_lock();    /*To keep RCU checker happy. */
1388             err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1389                              reply, info->snd_portid,
1390                              info->snd_seq, 0,
1391                              OVS_FLOW_CMD_DEL,
1392                              ufid_flags);
1393             rcu_read_unlock();
1394             if (WARN_ON_ONCE(err < 0)) {
1395                 kfree_skb(reply);
1396                 goto out_free;
1397             }
1398 
1399             ovs_notify(&dp_flow_genl_family, reply, info);
1400         } else {
1401             netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0,
1402                     PTR_ERR(reply));
1403         }
1404     }
1405 
1406 out_free:
1407     ovs_flow_free(flow, true);
1408     return 0;
1409 unlock:
1410     ovs_unlock();
1411     return err;
1412 }
1413 
1414 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1415 {
1416     struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1417     struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1418     struct table_instance *ti;
1419     struct datapath *dp;
1420     u32 ufid_flags;
1421     int err;
1422 
1423     err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
1424                        OVS_FLOW_ATTR_MAX, flow_policy, NULL);
1425     if (err)
1426         return err;
1427     ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1428 
1429     rcu_read_lock();
1430     dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1431     if (!dp) {
1432         rcu_read_unlock();
1433         return -ENODEV;
1434     }
1435 
1436     ti = rcu_dereference(dp->table.ti);
1437     for (;;) {
1438         struct sw_flow *flow;
1439         u32 bucket, obj;
1440 
1441         bucket = cb->args[0];
1442         obj = cb->args[1];
1443         flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1444         if (!flow)
1445             break;
1446 
1447         if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1448                        NETLINK_CB(cb->skb).portid,
1449                        cb->nlh->nlmsg_seq, NLM_F_MULTI,
1450                        OVS_FLOW_CMD_GET, ufid_flags) < 0)
1451             break;
1452 
1453         cb->args[0] = bucket;
1454         cb->args[1] = obj;
1455     }
1456     rcu_read_unlock();
1457     return skb->len;
1458 }
1459 
1460 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1461     [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1462     [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1463     [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1464     [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1465     [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1466     [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1467     [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1468 };
1469 
1470 static const struct genl_small_ops dp_flow_genl_ops[] = {
1471     { .cmd = OVS_FLOW_CMD_NEW,
1472       .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1473       .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1474       .doit = ovs_flow_cmd_new
1475     },
1476     { .cmd = OVS_FLOW_CMD_DEL,
1477       .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1478       .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1479       .doit = ovs_flow_cmd_del
1480     },
1481     { .cmd = OVS_FLOW_CMD_GET,
1482       .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1483       .flags = 0,           /* OK for unprivileged users. */
1484       .doit = ovs_flow_cmd_get,
1485       .dumpit = ovs_flow_cmd_dump
1486     },
1487     { .cmd = OVS_FLOW_CMD_SET,
1488       .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1489       .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1490       .doit = ovs_flow_cmd_set,
1491     },
1492 };
1493 
1494 static struct genl_family dp_flow_genl_family __ro_after_init = {
1495     .hdrsize = sizeof(struct ovs_header),
1496     .name = OVS_FLOW_FAMILY,
1497     .version = OVS_FLOW_VERSION,
1498     .maxattr = OVS_FLOW_ATTR_MAX,
1499     .policy = flow_policy,
1500     .netnsok = true,
1501     .parallel_ops = true,
1502     .small_ops = dp_flow_genl_ops,
1503     .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops),
1504     .mcgrps = &ovs_dp_flow_multicast_group,
1505     .n_mcgrps = 1,
1506     .module = THIS_MODULE,
1507 };
1508 
1509 static size_t ovs_dp_cmd_msg_size(void)
1510 {
1511     size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1512 
1513     msgsize += nla_total_size(IFNAMSIZ);
1514     msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1515     msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
1516     msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1517     msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
1518 
1519     return msgsize;
1520 }
1521 
1522 /* Called with ovs_mutex. */
1523 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1524                 u32 portid, u32 seq, u32 flags, u8 cmd)
1525 {
1526     struct ovs_header *ovs_header;
1527     struct ovs_dp_stats dp_stats;
1528     struct ovs_dp_megaflow_stats dp_megaflow_stats;
1529     int err;
1530 
1531     ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1532                  flags, cmd);
1533     if (!ovs_header)
1534         goto error;
1535 
1536     ovs_header->dp_ifindex = get_dpifindex(dp);
1537 
1538     err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1539     if (err)
1540         goto nla_put_failure;
1541 
1542     get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1543     if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1544               &dp_stats, OVS_DP_ATTR_PAD))
1545         goto nla_put_failure;
1546 
1547     if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1548               sizeof(struct ovs_dp_megaflow_stats),
1549               &dp_megaflow_stats, OVS_DP_ATTR_PAD))
1550         goto nla_put_failure;
1551 
1552     if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1553         goto nla_put_failure;
1554 
1555     if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
1556             ovs_flow_tbl_masks_cache_size(&dp->table)))
1557         goto nla_put_failure;
1558 
1559     genlmsg_end(skb, ovs_header);
1560     return 0;
1561 
1562 nla_put_failure:
1563     genlmsg_cancel(skb, ovs_header);
1564 error:
1565     return -EMSGSIZE;
1566 }
1567 
1568 static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1569 {
1570     return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1571 }
1572 
1573 /* Called with rcu_read_lock or ovs_mutex. */
1574 static struct datapath *lookup_datapath(struct net *net,
1575                     const struct ovs_header *ovs_header,
1576                     struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1577 {
1578     struct datapath *dp;
1579 
1580     if (!a[OVS_DP_ATTR_NAME])
1581         dp = get_dp(net, ovs_header->dp_ifindex);
1582     else {
1583         struct vport *vport;
1584 
1585         vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1586         dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1587     }
1588     return dp ? dp : ERR_PTR(-ENODEV);
1589 }
1590 
1591 static void ovs_dp_reset_user_features(struct sk_buff *skb,
1592                        struct genl_info *info)
1593 {
1594     struct datapath *dp;
1595 
1596     dp = lookup_datapath(sock_net(skb->sk), info->userhdr,
1597                  info->attrs);
1598     if (IS_ERR(dp))
1599         return;
1600 
1601     WARN(dp->user_features, "Dropping previously announced user features\n");
1602     dp->user_features = 0;
1603 }
1604 
1605 static int ovs_dp_set_upcall_portids(struct datapath *dp,
1606                   const struct nlattr *ids)
1607 {
1608     struct dp_nlsk_pids *old, *dp_nlsk_pids;
1609 
1610     if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
1611         return -EINVAL;
1612 
1613     old = ovsl_dereference(dp->upcall_portids);
1614 
1615     dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids),
1616                    GFP_KERNEL);
1617     if (!dp_nlsk_pids)
1618         return -ENOMEM;
1619 
1620     dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32);
1621     nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids));
1622 
1623     rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids);
1624 
1625     kfree_rcu(old, rcu);
1626 
1627     return 0;
1628 }
1629 
1630 u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
1631 {
1632     struct dp_nlsk_pids *dp_nlsk_pids;
1633 
1634     dp_nlsk_pids = rcu_dereference(dp->upcall_portids);
1635 
1636     if (dp_nlsk_pids) {
1637         if (cpu_id < dp_nlsk_pids->n_pids) {
1638             return dp_nlsk_pids->pids[cpu_id];
1639         } else if (dp_nlsk_pids->n_pids > 0 &&
1640                cpu_id >= dp_nlsk_pids->n_pids) {
1641             /* If the number of netlink PIDs is mismatched with
1642              * the number of CPUs as seen by the kernel, log this
1643              * and send the upcall to an arbitrary socket (0) in
1644              * order to not drop packets
1645              */
1646             pr_info_ratelimited("cpu_id mismatch with handler threads");
1647             return dp_nlsk_pids->pids[cpu_id %
1648                           dp_nlsk_pids->n_pids];
1649         } else {
1650             return 0;
1651         }
1652     } else {
1653         return 0;
1654     }
1655 }
1656 
1657 static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1658 {
1659     u32 user_features = 0, old_features = dp->user_features;
1660     int err;
1661 
1662     if (a[OVS_DP_ATTR_USER_FEATURES]) {
1663         user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1664 
1665         if (user_features & ~(OVS_DP_F_VPORT_PIDS |
1666                       OVS_DP_F_UNALIGNED |
1667                       OVS_DP_F_TC_RECIRC_SHARING |
1668                       OVS_DP_F_DISPATCH_UPCALL_PER_CPU))
1669             return -EOPNOTSUPP;
1670 
1671 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1672         if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
1673             return -EOPNOTSUPP;
1674 #endif
1675     }
1676 
1677     if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
1678         int err;
1679         u32 cache_size;
1680 
1681         cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
1682         err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
1683         if (err)
1684             return err;
1685     }
1686 
1687     dp->user_features = user_features;
1688 
1689     if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU &&
1690         a[OVS_DP_ATTR_PER_CPU_PIDS]) {
1691         /* Upcall Netlink Port IDs have been updated */
1692         err = ovs_dp_set_upcall_portids(dp,
1693                         a[OVS_DP_ATTR_PER_CPU_PIDS]);
1694         if (err)
1695             return err;
1696     }
1697 
1698     if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
1699         !(old_features & OVS_DP_F_TC_RECIRC_SHARING))
1700         tc_skb_ext_tc_enable();
1701     else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
1702          (old_features & OVS_DP_F_TC_RECIRC_SHARING))
1703         tc_skb_ext_tc_disable();
1704 
1705     return 0;
1706 }
1707 
1708 static int ovs_dp_stats_init(struct datapath *dp)
1709 {
1710     dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1711     if (!dp->stats_percpu)
1712         return -ENOMEM;
1713 
1714     return 0;
1715 }
1716 
1717 static int ovs_dp_vport_init(struct datapath *dp)
1718 {
1719     int i;
1720 
1721     dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
1722                   sizeof(struct hlist_head),
1723                   GFP_KERNEL);
1724     if (!dp->ports)
1725         return -ENOMEM;
1726 
1727     for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1728         INIT_HLIST_HEAD(&dp->ports[i]);
1729 
1730     return 0;
1731 }
1732 
1733 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1734 {
1735     struct nlattr **a = info->attrs;
1736     struct vport_parms parms;
1737     struct sk_buff *reply;
1738     struct datapath *dp;
1739     struct vport *vport;
1740     struct ovs_net *ovs_net;
1741     int err;
1742 
1743     err = -EINVAL;
1744     if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1745         goto err;
1746 
1747     reply = ovs_dp_cmd_alloc_info();
1748     if (!reply)
1749         return -ENOMEM;
1750 
1751     err = -ENOMEM;
1752     dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1753     if (dp == NULL)
1754         goto err_destroy_reply;
1755 
1756     ovs_dp_set_net(dp, sock_net(skb->sk));
1757 
1758     /* Allocate table. */
1759     err = ovs_flow_tbl_init(&dp->table);
1760     if (err)
1761         goto err_destroy_dp;
1762 
1763     err = ovs_dp_stats_init(dp);
1764     if (err)
1765         goto err_destroy_table;
1766 
1767     err = ovs_dp_vport_init(dp);
1768     if (err)
1769         goto err_destroy_stats;
1770 
1771     err = ovs_meters_init(dp);
1772     if (err)
1773         goto err_destroy_ports;
1774 
1775     /* Set up our datapath device. */
1776     parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1777     parms.type = OVS_VPORT_TYPE_INTERNAL;
1778     parms.options = NULL;
1779     parms.dp = dp;
1780     parms.port_no = OVSP_LOCAL;
1781     parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1782 
1783     /* So far only local changes have been made, now need the lock. */
1784     ovs_lock();
1785 
1786     err = ovs_dp_change(dp, a);
1787     if (err)
1788         goto err_unlock_and_destroy_meters;
1789 
1790     vport = new_vport(&parms);
1791     if (IS_ERR(vport)) {
1792         err = PTR_ERR(vport);
1793         if (err == -EBUSY)
1794             err = -EEXIST;
1795 
1796         if (err == -EEXIST) {
1797             /* An outdated user space instance that does not understand
1798              * the concept of user_features has attempted to create a new
1799              * datapath and is likely to reuse it. Drop all user features.
1800              */
1801             if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1802                 ovs_dp_reset_user_features(skb, info);
1803         }
1804 
1805         goto err_destroy_portids;
1806     }
1807 
1808     err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1809                    info->snd_seq, 0, OVS_DP_CMD_NEW);
1810     BUG_ON(err < 0);
1811 
1812     ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1813     list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1814 
1815     ovs_unlock();
1816 
1817     ovs_notify(&dp_datapath_genl_family, reply, info);
1818     return 0;
1819 
1820 err_destroy_portids:
1821     kfree(rcu_dereference_raw(dp->upcall_portids));
1822 err_unlock_and_destroy_meters:
1823     ovs_unlock();
1824     ovs_meters_exit(dp);
1825 err_destroy_ports:
1826     kfree(dp->ports);
1827 err_destroy_stats:
1828     free_percpu(dp->stats_percpu);
1829 err_destroy_table:
1830     ovs_flow_tbl_destroy(&dp->table);
1831 err_destroy_dp:
1832     kfree(dp);
1833 err_destroy_reply:
1834     kfree_skb(reply);
1835 err:
1836     return err;
1837 }
1838 
1839 /* Called with ovs_mutex. */
1840 static void __dp_destroy(struct datapath *dp)
1841 {
1842     struct flow_table *table = &dp->table;
1843     int i;
1844 
1845     if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
1846         tc_skb_ext_tc_disable();
1847 
1848     for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1849         struct vport *vport;
1850         struct hlist_node *n;
1851 
1852         hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1853             if (vport->port_no != OVSP_LOCAL)
1854                 ovs_dp_detach_port(vport);
1855     }
1856 
1857     list_del_rcu(&dp->list_node);
1858 
1859     /* OVSP_LOCAL is datapath internal port. We need to make sure that
1860      * all ports in datapath are destroyed first before freeing datapath.
1861      */
1862     ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1863 
1864     /* Flush sw_flow in the tables. RCU cb only releases resource
1865      * such as dp, ports and tables. That may avoid some issues
1866      * such as RCU usage warning.
1867      */
1868     table_instance_flow_flush(table, ovsl_dereference(table->ti),
1869                   ovsl_dereference(table->ufid_ti));
1870 
1871     /* RCU destroy the ports, meters and flow tables. */
1872     call_rcu(&dp->rcu, destroy_dp_rcu);
1873 }
1874 
1875 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1876 {
1877     struct sk_buff *reply;
1878     struct datapath *dp;
1879     int err;
1880 
1881     reply = ovs_dp_cmd_alloc_info();
1882     if (!reply)
1883         return -ENOMEM;
1884 
1885     ovs_lock();
1886     dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1887     err = PTR_ERR(dp);
1888     if (IS_ERR(dp))
1889         goto err_unlock_free;
1890 
1891     err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1892                    info->snd_seq, 0, OVS_DP_CMD_DEL);
1893     BUG_ON(err < 0);
1894 
1895     __dp_destroy(dp);
1896     ovs_unlock();
1897 
1898     ovs_notify(&dp_datapath_genl_family, reply, info);
1899 
1900     return 0;
1901 
1902 err_unlock_free:
1903     ovs_unlock();
1904     kfree_skb(reply);
1905     return err;
1906 }
1907 
1908 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1909 {
1910     struct sk_buff *reply;
1911     struct datapath *dp;
1912     int err;
1913 
1914     reply = ovs_dp_cmd_alloc_info();
1915     if (!reply)
1916         return -ENOMEM;
1917 
1918     ovs_lock();
1919     dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1920     err = PTR_ERR(dp);
1921     if (IS_ERR(dp))
1922         goto err_unlock_free;
1923 
1924     err = ovs_dp_change(dp, info->attrs);
1925     if (err)
1926         goto err_unlock_free;
1927 
1928     err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1929                    info->snd_seq, 0, OVS_DP_CMD_SET);
1930     BUG_ON(err < 0);
1931 
1932     ovs_unlock();
1933     ovs_notify(&dp_datapath_genl_family, reply, info);
1934 
1935     return 0;
1936 
1937 err_unlock_free:
1938     ovs_unlock();
1939     kfree_skb(reply);
1940     return err;
1941 }
1942 
1943 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1944 {
1945     struct sk_buff *reply;
1946     struct datapath *dp;
1947     int err;
1948 
1949     reply = ovs_dp_cmd_alloc_info();
1950     if (!reply)
1951         return -ENOMEM;
1952 
1953     ovs_lock();
1954     dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1955     if (IS_ERR(dp)) {
1956         err = PTR_ERR(dp);
1957         goto err_unlock_free;
1958     }
1959     err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1960                    info->snd_seq, 0, OVS_DP_CMD_GET);
1961     BUG_ON(err < 0);
1962     ovs_unlock();
1963 
1964     return genlmsg_reply(reply, info);
1965 
1966 err_unlock_free:
1967     ovs_unlock();
1968     kfree_skb(reply);
1969     return err;
1970 }
1971 
1972 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1973 {
1974     struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1975     struct datapath *dp;
1976     int skip = cb->args[0];
1977     int i = 0;
1978 
1979     ovs_lock();
1980     list_for_each_entry(dp, &ovs_net->dps, list_node) {
1981         if (i >= skip &&
1982             ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1983                      cb->nlh->nlmsg_seq, NLM_F_MULTI,
1984                      OVS_DP_CMD_GET) < 0)
1985             break;
1986         i++;
1987     }
1988     ovs_unlock();
1989 
1990     cb->args[0] = i;
1991 
1992     return skb->len;
1993 }
1994 
1995 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1996     [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1997     [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1998     [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1999     [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32, 0,
2000         PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
2001 };
2002 
2003 static const struct genl_small_ops dp_datapath_genl_ops[] = {
2004     { .cmd = OVS_DP_CMD_NEW,
2005       .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2006       .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2007       .doit = ovs_dp_cmd_new
2008     },
2009     { .cmd = OVS_DP_CMD_DEL,
2010       .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2011       .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2012       .doit = ovs_dp_cmd_del
2013     },
2014     { .cmd = OVS_DP_CMD_GET,
2015       .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2016       .flags = 0,           /* OK for unprivileged users. */
2017       .doit = ovs_dp_cmd_get,
2018       .dumpit = ovs_dp_cmd_dump
2019     },
2020     { .cmd = OVS_DP_CMD_SET,
2021       .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2022       .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2023       .doit = ovs_dp_cmd_set,
2024     },
2025 };
2026 
2027 static struct genl_family dp_datapath_genl_family __ro_after_init = {
2028     .hdrsize = sizeof(struct ovs_header),
2029     .name = OVS_DATAPATH_FAMILY,
2030     .version = OVS_DATAPATH_VERSION,
2031     .maxattr = OVS_DP_ATTR_MAX,
2032     .policy = datapath_policy,
2033     .netnsok = true,
2034     .parallel_ops = true,
2035     .small_ops = dp_datapath_genl_ops,
2036     .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops),
2037     .mcgrps = &ovs_dp_datapath_multicast_group,
2038     .n_mcgrps = 1,
2039     .module = THIS_MODULE,
2040 };
2041 
2042 /* Called with ovs_mutex or RCU read lock. */
2043 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
2044                    struct net *net, u32 portid, u32 seq,
2045                    u32 flags, u8 cmd, gfp_t gfp)
2046 {
2047     struct ovs_header *ovs_header;
2048     struct ovs_vport_stats vport_stats;
2049     int err;
2050 
2051     ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
2052                  flags, cmd);
2053     if (!ovs_header)
2054         return -EMSGSIZE;
2055 
2056     ovs_header->dp_ifindex = get_dpifindex(vport->dp);
2057 
2058     if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
2059         nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
2060         nla_put_string(skb, OVS_VPORT_ATTR_NAME,
2061                ovs_vport_name(vport)) ||
2062         nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
2063         goto nla_put_failure;
2064 
2065     if (!net_eq(net, dev_net(vport->dev))) {
2066         int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
2067 
2068         if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
2069             goto nla_put_failure;
2070     }
2071 
2072     ovs_vport_get_stats(vport, &vport_stats);
2073     if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
2074               sizeof(struct ovs_vport_stats), &vport_stats,
2075               OVS_VPORT_ATTR_PAD))
2076         goto nla_put_failure;
2077 
2078     if (ovs_vport_get_upcall_portids(vport, skb))
2079         goto nla_put_failure;
2080 
2081     err = ovs_vport_get_options(vport, skb);
2082     if (err == -EMSGSIZE)
2083         goto error;
2084 
2085     genlmsg_end(skb, ovs_header);
2086     return 0;
2087 
2088 nla_put_failure:
2089     err = -EMSGSIZE;
2090 error:
2091     genlmsg_cancel(skb, ovs_header);
2092     return err;
2093 }
2094 
2095 static struct sk_buff *ovs_vport_cmd_alloc_info(void)
2096 {
2097     return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2098 }
2099 
2100 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
2101 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
2102                      u32 portid, u32 seq, u8 cmd)
2103 {
2104     struct sk_buff *skb;
2105     int retval;
2106 
2107     skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2108     if (!skb)
2109         return ERR_PTR(-ENOMEM);
2110 
2111     retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
2112                      GFP_KERNEL);
2113     BUG_ON(retval < 0);
2114 
2115     return skb;
2116 }
2117 
2118 /* Called with ovs_mutex or RCU read lock. */
2119 static struct vport *lookup_vport(struct net *net,
2120                   const struct ovs_header *ovs_header,
2121                   struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
2122 {
2123     struct datapath *dp;
2124     struct vport *vport;
2125 
2126     if (a[OVS_VPORT_ATTR_IFINDEX])
2127         return ERR_PTR(-EOPNOTSUPP);
2128     if (a[OVS_VPORT_ATTR_NAME]) {
2129         vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
2130         if (!vport)
2131             return ERR_PTR(-ENODEV);
2132         if (ovs_header->dp_ifindex &&
2133             ovs_header->dp_ifindex != get_dpifindex(vport->dp))
2134             return ERR_PTR(-ENODEV);
2135         return vport;
2136     } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
2137         u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
2138 
2139         if (port_no >= DP_MAX_PORTS)
2140             return ERR_PTR(-EFBIG);
2141 
2142         dp = get_dp(net, ovs_header->dp_ifindex);
2143         if (!dp)
2144             return ERR_PTR(-ENODEV);
2145 
2146         vport = ovs_vport_ovsl_rcu(dp, port_no);
2147         if (!vport)
2148             return ERR_PTR(-ENODEV);
2149         return vport;
2150     } else
2151         return ERR_PTR(-EINVAL);
2152 
2153 }
2154 
2155 static unsigned int ovs_get_max_headroom(struct datapath *dp)
2156 {
2157     unsigned int dev_headroom, max_headroom = 0;
2158     struct net_device *dev;
2159     struct vport *vport;
2160     int i;
2161 
2162     for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2163         hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2164                      lockdep_ovsl_is_held()) {
2165             dev = vport->dev;
2166             dev_headroom = netdev_get_fwd_headroom(dev);
2167             if (dev_headroom > max_headroom)
2168                 max_headroom = dev_headroom;
2169         }
2170     }
2171 
2172     return max_headroom;
2173 }
2174 
2175 /* Called with ovs_mutex */
2176 static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
2177 {
2178     struct vport *vport;
2179     int i;
2180 
2181     dp->max_headroom = new_headroom;
2182     for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2183         hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2184                      lockdep_ovsl_is_held())
2185             netdev_set_rx_headroom(vport->dev, new_headroom);
2186     }
2187 }
2188 
2189 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
2190 {
2191     struct nlattr **a = info->attrs;
2192     struct ovs_header *ovs_header = info->userhdr;
2193     struct vport_parms parms;
2194     struct sk_buff *reply;
2195     struct vport *vport;
2196     struct datapath *dp;
2197     unsigned int new_headroom;
2198     u32 port_no;
2199     int err;
2200 
2201     if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
2202         !a[OVS_VPORT_ATTR_UPCALL_PID])
2203         return -EINVAL;
2204     if (a[OVS_VPORT_ATTR_IFINDEX])
2205         return -EOPNOTSUPP;
2206 
2207     port_no = a[OVS_VPORT_ATTR_PORT_NO]
2208         ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
2209     if (port_no >= DP_MAX_PORTS)
2210         return -EFBIG;
2211 
2212     reply = ovs_vport_cmd_alloc_info();
2213     if (!reply)
2214         return -ENOMEM;
2215 
2216     ovs_lock();
2217 restart:
2218     dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
2219     err = -ENODEV;
2220     if (!dp)
2221         goto exit_unlock_free;
2222 
2223     if (port_no) {
2224         vport = ovs_vport_ovsl(dp, port_no);
2225         err = -EBUSY;
2226         if (vport)
2227             goto exit_unlock_free;
2228     } else {
2229         for (port_no = 1; ; port_no++) {
2230             if (port_no >= DP_MAX_PORTS) {
2231                 err = -EFBIG;
2232                 goto exit_unlock_free;
2233             }
2234             vport = ovs_vport_ovsl(dp, port_no);
2235             if (!vport)
2236                 break;
2237         }
2238     }
2239 
2240     parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2241     parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2242     parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2243     parms.dp = dp;
2244     parms.port_no = port_no;
2245     parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
2246 
2247     vport = new_vport(&parms);
2248     err = PTR_ERR(vport);
2249     if (IS_ERR(vport)) {
2250         if (err == -EAGAIN)
2251             goto restart;
2252         goto exit_unlock_free;
2253     }
2254 
2255     err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2256                       info->snd_portid, info->snd_seq, 0,
2257                       OVS_VPORT_CMD_NEW, GFP_KERNEL);
2258 
2259     new_headroom = netdev_get_fwd_headroom(vport->dev);
2260 
2261     if (new_headroom > dp->max_headroom)
2262         ovs_update_headroom(dp, new_headroom);
2263     else
2264         netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2265 
2266     BUG_ON(err < 0);
2267     ovs_unlock();
2268 
2269     ovs_notify(&dp_vport_genl_family, reply, info);
2270     return 0;
2271 
2272 exit_unlock_free:
2273     ovs_unlock();
2274     kfree_skb(reply);
2275     return err;
2276 }
2277 
2278 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2279 {
2280     struct nlattr **a = info->attrs;
2281     struct sk_buff *reply;
2282     struct vport *vport;
2283     int err;
2284 
2285     reply = ovs_vport_cmd_alloc_info();
2286     if (!reply)
2287         return -ENOMEM;
2288 
2289     ovs_lock();
2290     vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2291     err = PTR_ERR(vport);
2292     if (IS_ERR(vport))
2293         goto exit_unlock_free;
2294 
2295     if (a[OVS_VPORT_ATTR_TYPE] &&
2296         nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2297         err = -EINVAL;
2298         goto exit_unlock_free;
2299     }
2300 
2301     if (a[OVS_VPORT_ATTR_OPTIONS]) {
2302         err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2303         if (err)
2304             goto exit_unlock_free;
2305     }
2306 
2307 
2308     if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2309         struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2310 
2311         err = ovs_vport_set_upcall_portids(vport, ids);
2312         if (err)
2313             goto exit_unlock_free;
2314     }
2315 
2316     err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2317                       info->snd_portid, info->snd_seq, 0,
2318                       OVS_VPORT_CMD_SET, GFP_KERNEL);
2319     BUG_ON(err < 0);
2320 
2321     ovs_unlock();
2322     ovs_notify(&dp_vport_genl_family, reply, info);
2323     return 0;
2324 
2325 exit_unlock_free:
2326     ovs_unlock();
2327     kfree_skb(reply);
2328     return err;
2329 }
2330 
2331 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2332 {
2333     bool update_headroom = false;
2334     struct nlattr **a = info->attrs;
2335     struct sk_buff *reply;
2336     struct datapath *dp;
2337     struct vport *vport;
2338     unsigned int new_headroom;
2339     int err;
2340 
2341     reply = ovs_vport_cmd_alloc_info();
2342     if (!reply)
2343         return -ENOMEM;
2344 
2345     ovs_lock();
2346     vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2347     err = PTR_ERR(vport);
2348     if (IS_ERR(vport))
2349         goto exit_unlock_free;
2350 
2351     if (vport->port_no == OVSP_LOCAL) {
2352         err = -EINVAL;
2353         goto exit_unlock_free;
2354     }
2355 
2356     err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2357                       info->snd_portid, info->snd_seq, 0,
2358                       OVS_VPORT_CMD_DEL, GFP_KERNEL);
2359     BUG_ON(err < 0);
2360 
2361     /* the vport deletion may trigger dp headroom update */
2362     dp = vport->dp;
2363     if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2364         update_headroom = true;
2365 
2366     netdev_reset_rx_headroom(vport->dev);
2367     ovs_dp_detach_port(vport);
2368 
2369     if (update_headroom) {
2370         new_headroom = ovs_get_max_headroom(dp);
2371 
2372         if (new_headroom < dp->max_headroom)
2373             ovs_update_headroom(dp, new_headroom);
2374     }
2375     ovs_unlock();
2376 
2377     ovs_notify(&dp_vport_genl_family, reply, info);
2378     return 0;
2379 
2380 exit_unlock_free:
2381     ovs_unlock();
2382     kfree_skb(reply);
2383     return err;
2384 }
2385 
2386 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2387 {
2388     struct nlattr **a = info->attrs;
2389     struct ovs_header *ovs_header = info->userhdr;
2390     struct sk_buff *reply;
2391     struct vport *vport;
2392     int err;
2393 
2394     reply = ovs_vport_cmd_alloc_info();
2395     if (!reply)
2396         return -ENOMEM;
2397 
2398     rcu_read_lock();
2399     vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2400     err = PTR_ERR(vport);
2401     if (IS_ERR(vport))
2402         goto exit_unlock_free;
2403     err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2404                       info->snd_portid, info->snd_seq, 0,
2405                       OVS_VPORT_CMD_GET, GFP_ATOMIC);
2406     BUG_ON(err < 0);
2407     rcu_read_unlock();
2408 
2409     return genlmsg_reply(reply, info);
2410 
2411 exit_unlock_free:
2412     rcu_read_unlock();
2413     kfree_skb(reply);
2414     return err;
2415 }
2416 
2417 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2418 {
2419     struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2420     struct datapath *dp;
2421     int bucket = cb->args[0], skip = cb->args[1];
2422     int i, j = 0;
2423 
2424     rcu_read_lock();
2425     dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2426     if (!dp) {
2427         rcu_read_unlock();
2428         return -ENODEV;
2429     }
2430     for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2431         struct vport *vport;
2432 
2433         j = 0;
2434         hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2435             if (j >= skip &&
2436                 ovs_vport_cmd_fill_info(vport, skb,
2437                             sock_net(skb->sk),
2438                             NETLINK_CB(cb->skb).portid,
2439                             cb->nlh->nlmsg_seq,
2440                             NLM_F_MULTI,
2441                             OVS_VPORT_CMD_GET,
2442                             GFP_ATOMIC) < 0)
2443                 goto out;
2444 
2445             j++;
2446         }
2447         skip = 0;
2448     }
2449 out:
2450     rcu_read_unlock();
2451 
2452     cb->args[0] = i;
2453     cb->args[1] = j;
2454 
2455     return skb->len;
2456 }
2457 
2458 static void ovs_dp_masks_rebalance(struct work_struct *work)
2459 {
2460     struct ovs_net *ovs_net = container_of(work, struct ovs_net,
2461                            masks_rebalance.work);
2462     struct datapath *dp;
2463 
2464     ovs_lock();
2465 
2466     list_for_each_entry(dp, &ovs_net->dps, list_node)
2467         ovs_flow_masks_rebalance(&dp->table);
2468 
2469     ovs_unlock();
2470 
2471     schedule_delayed_work(&ovs_net->masks_rebalance,
2472                   msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2473 }
2474 
2475 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2476     [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2477     [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2478     [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2479     [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2480     [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
2481     [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2482     [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
2483     [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
2484 };
2485 
2486 static const struct genl_small_ops dp_vport_genl_ops[] = {
2487     { .cmd = OVS_VPORT_CMD_NEW,
2488       .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2489       .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2490       .doit = ovs_vport_cmd_new
2491     },
2492     { .cmd = OVS_VPORT_CMD_DEL,
2493       .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2494       .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2495       .doit = ovs_vport_cmd_del
2496     },
2497     { .cmd = OVS_VPORT_CMD_GET,
2498       .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2499       .flags = 0,           /* OK for unprivileged users. */
2500       .doit = ovs_vport_cmd_get,
2501       .dumpit = ovs_vport_cmd_dump
2502     },
2503     { .cmd = OVS_VPORT_CMD_SET,
2504       .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2505       .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2506       .doit = ovs_vport_cmd_set,
2507     },
2508 };
2509 
2510 struct genl_family dp_vport_genl_family __ro_after_init = {
2511     .hdrsize = sizeof(struct ovs_header),
2512     .name = OVS_VPORT_FAMILY,
2513     .version = OVS_VPORT_VERSION,
2514     .maxattr = OVS_VPORT_ATTR_MAX,
2515     .policy = vport_policy,
2516     .netnsok = true,
2517     .parallel_ops = true,
2518     .small_ops = dp_vport_genl_ops,
2519     .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops),
2520     .mcgrps = &ovs_dp_vport_multicast_group,
2521     .n_mcgrps = 1,
2522     .module = THIS_MODULE,
2523 };
2524 
2525 static struct genl_family * const dp_genl_families[] = {
2526     &dp_datapath_genl_family,
2527     &dp_vport_genl_family,
2528     &dp_flow_genl_family,
2529     &dp_packet_genl_family,
2530     &dp_meter_genl_family,
2531 #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
2532     &dp_ct_limit_genl_family,
2533 #endif
2534 };
2535 
2536 static void dp_unregister_genl(int n_families)
2537 {
2538     int i;
2539 
2540     for (i = 0; i < n_families; i++)
2541         genl_unregister_family(dp_genl_families[i]);
2542 }
2543 
2544 static int __init dp_register_genl(void)
2545 {
2546     int err;
2547     int i;
2548 
2549     for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2550 
2551         err = genl_register_family(dp_genl_families[i]);
2552         if (err)
2553             goto error;
2554     }
2555 
2556     return 0;
2557 
2558 error:
2559     dp_unregister_genl(i);
2560     return err;
2561 }
2562 
2563 static int __net_init ovs_init_net(struct net *net)
2564 {
2565     struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2566     int err;
2567 
2568     INIT_LIST_HEAD(&ovs_net->dps);
2569     INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2570     INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
2571 
2572     err = ovs_ct_init(net);
2573     if (err)
2574         return err;
2575 
2576     schedule_delayed_work(&ovs_net->masks_rebalance,
2577                   msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2578     return 0;
2579 }
2580 
2581 static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2582                         struct list_head *head)
2583 {
2584     struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2585     struct datapath *dp;
2586 
2587     list_for_each_entry(dp, &ovs_net->dps, list_node) {
2588         int i;
2589 
2590         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2591             struct vport *vport;
2592 
2593             hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2594                 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2595                     continue;
2596 
2597                 if (dev_net(vport->dev) == dnet)
2598                     list_add(&vport->detach_list, head);
2599             }
2600         }
2601     }
2602 }
2603 
2604 static void __net_exit ovs_exit_net(struct net *dnet)
2605 {
2606     struct datapath *dp, *dp_next;
2607     struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2608     struct vport *vport, *vport_next;
2609     struct net *net;
2610     LIST_HEAD(head);
2611 
2612     ovs_lock();
2613 
2614     ovs_ct_exit(dnet);
2615 
2616     list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2617         __dp_destroy(dp);
2618 
2619     down_read(&net_rwsem);
2620     for_each_net(net)
2621         list_vports_from_net(net, dnet, &head);
2622     up_read(&net_rwsem);
2623 
2624     /* Detach all vports from given namespace. */
2625     list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2626         list_del(&vport->detach_list);
2627         ovs_dp_detach_port(vport);
2628     }
2629 
2630     ovs_unlock();
2631 
2632     cancel_delayed_work_sync(&ovs_net->masks_rebalance);
2633     cancel_work_sync(&ovs_net->dp_notify_work);
2634 }
2635 
2636 static struct pernet_operations ovs_net_ops = {
2637     .init = ovs_init_net,
2638     .exit = ovs_exit_net,
2639     .id   = &ovs_net_id,
2640     .size = sizeof(struct ovs_net),
2641 };
2642 
2643 static int __init dp_init(void)
2644 {
2645     int err;
2646 
2647     BUILD_BUG_ON(sizeof(struct ovs_skb_cb) >
2648              sizeof_field(struct sk_buff, cb));
2649 
2650     pr_info("Open vSwitch switching datapath\n");
2651 
2652     err = action_fifos_init();
2653     if (err)
2654         goto error;
2655 
2656     err = ovs_internal_dev_rtnl_link_register();
2657     if (err)
2658         goto error_action_fifos_exit;
2659 
2660     err = ovs_flow_init();
2661     if (err)
2662         goto error_unreg_rtnl_link;
2663 
2664     err = ovs_vport_init();
2665     if (err)
2666         goto error_flow_exit;
2667 
2668     err = register_pernet_device(&ovs_net_ops);
2669     if (err)
2670         goto error_vport_exit;
2671 
2672     err = register_netdevice_notifier(&ovs_dp_device_notifier);
2673     if (err)
2674         goto error_netns_exit;
2675 
2676     err = ovs_netdev_init();
2677     if (err)
2678         goto error_unreg_notifier;
2679 
2680     err = dp_register_genl();
2681     if (err < 0)
2682         goto error_unreg_netdev;
2683 
2684     return 0;
2685 
2686 error_unreg_netdev:
2687     ovs_netdev_exit();
2688 error_unreg_notifier:
2689     unregister_netdevice_notifier(&ovs_dp_device_notifier);
2690 error_netns_exit:
2691     unregister_pernet_device(&ovs_net_ops);
2692 error_vport_exit:
2693     ovs_vport_exit();
2694 error_flow_exit:
2695     ovs_flow_exit();
2696 error_unreg_rtnl_link:
2697     ovs_internal_dev_rtnl_link_unregister();
2698 error_action_fifos_exit:
2699     action_fifos_exit();
2700 error:
2701     return err;
2702 }
2703 
2704 static void dp_cleanup(void)
2705 {
2706     dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2707     ovs_netdev_exit();
2708     unregister_netdevice_notifier(&ovs_dp_device_notifier);
2709     unregister_pernet_device(&ovs_net_ops);
2710     rcu_barrier();
2711     ovs_vport_exit();
2712     ovs_flow_exit();
2713     ovs_internal_dev_rtnl_link_unregister();
2714     action_fifos_exit();
2715 }
2716 
2717 module_init(dp_init);
2718 module_exit(dp_cleanup);
2719 
2720 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2721 MODULE_LICENSE("GPL");
2722 MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2723 MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2724 MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2725 MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
2726 MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
2727 MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);