Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (c) 2007-2014 Nicira, Inc.
0004  */
0005 
0006 #include <linux/etherdevice.h>
0007 #include <linux/if.h>
0008 #include <linux/if_vlan.h>
0009 #include <linux/jhash.h>
0010 #include <linux/kernel.h>
0011 #include <linux/list.h>
0012 #include <linux/mutex.h>
0013 #include <linux/percpu.h>
0014 #include <linux/rcupdate.h>
0015 #include <linux/rtnetlink.h>
0016 #include <linux/compat.h>
0017 #include <net/net_namespace.h>
0018 #include <linux/module.h>
0019 
0020 #include "datapath.h"
0021 #include "vport.h"
0022 #include "vport-internal_dev.h"
0023 
0024 static LIST_HEAD(vport_ops_list);
0025 
0026 /* Protected by RCU read lock for reading, ovs_mutex for writing. */
0027 static struct hlist_head *dev_table;
0028 #define VPORT_HASH_BUCKETS 1024
0029 
0030 /**
0031  *  ovs_vport_init - initialize vport subsystem
0032  *
0033  * Called at module load time to initialize the vport subsystem.
0034  */
0035 int ovs_vport_init(void)
0036 {
0037     dev_table = kcalloc(VPORT_HASH_BUCKETS, sizeof(struct hlist_head),
0038                 GFP_KERNEL);
0039     if (!dev_table)
0040         return -ENOMEM;
0041 
0042     return 0;
0043 }
0044 
0045 /**
0046  *  ovs_vport_exit - shutdown vport subsystem
0047  *
0048  * Called at module exit time to shutdown the vport subsystem.
0049  */
0050 void ovs_vport_exit(void)
0051 {
0052     kfree(dev_table);
0053 }
0054 
0055 static struct hlist_head *hash_bucket(const struct net *net, const char *name)
0056 {
0057     unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
0058     return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
0059 }
0060 
0061 int __ovs_vport_ops_register(struct vport_ops *ops)
0062 {
0063     int err = -EEXIST;
0064     struct vport_ops *o;
0065 
0066     ovs_lock();
0067     list_for_each_entry(o, &vport_ops_list, list)
0068         if (ops->type == o->type)
0069             goto errout;
0070 
0071     list_add_tail(&ops->list, &vport_ops_list);
0072     err = 0;
0073 errout:
0074     ovs_unlock();
0075     return err;
0076 }
0077 EXPORT_SYMBOL_GPL(__ovs_vport_ops_register);
0078 
0079 void ovs_vport_ops_unregister(struct vport_ops *ops)
0080 {
0081     ovs_lock();
0082     list_del(&ops->list);
0083     ovs_unlock();
0084 }
0085 EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister);
0086 
0087 /**
0088  *  ovs_vport_locate - find a port that has already been created
0089  *
0090  * @net: network namespace
0091  * @name: name of port to find
0092  *
0093  * Must be called with ovs or RCU read lock.
0094  */
0095 struct vport *ovs_vport_locate(const struct net *net, const char *name)
0096 {
0097     struct hlist_head *bucket = hash_bucket(net, name);
0098     struct vport *vport;
0099 
0100     hlist_for_each_entry_rcu(vport, bucket, hash_node,
0101                  lockdep_ovsl_is_held())
0102         if (!strcmp(name, ovs_vport_name(vport)) &&
0103             net_eq(ovs_dp_get_net(vport->dp), net))
0104             return vport;
0105 
0106     return NULL;
0107 }
0108 
0109 /**
0110  *  ovs_vport_alloc - allocate and initialize new vport
0111  *
0112  * @priv_size: Size of private data area to allocate.
0113  * @ops: vport device ops
0114  * @parms: information about new vport.
0115  *
0116  * Allocate and initialize a new vport defined by @ops.  The vport will contain
0117  * a private data area of size @priv_size that can be accessed using
0118  * vport_priv().  Some parameters of the vport will be initialized from @parms.
0119  * @vports that are no longer needed should be released with
0120  * vport_free().
0121  */
0122 struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
0123                   const struct vport_parms *parms)
0124 {
0125     struct vport *vport;
0126     size_t alloc_size;
0127 
0128     alloc_size = sizeof(struct vport);
0129     if (priv_size) {
0130         alloc_size = ALIGN(alloc_size, VPORT_ALIGN);
0131         alloc_size += priv_size;
0132     }
0133 
0134     vport = kzalloc(alloc_size, GFP_KERNEL);
0135     if (!vport)
0136         return ERR_PTR(-ENOMEM);
0137 
0138     vport->dp = parms->dp;
0139     vport->port_no = parms->port_no;
0140     vport->ops = ops;
0141     INIT_HLIST_NODE(&vport->dp_hash_node);
0142 
0143     if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
0144         kfree(vport);
0145         return ERR_PTR(-EINVAL);
0146     }
0147 
0148     return vport;
0149 }
0150 EXPORT_SYMBOL_GPL(ovs_vport_alloc);
0151 
0152 /**
0153  *  ovs_vport_free - uninitialize and free vport
0154  *
0155  * @vport: vport to free
0156  *
0157  * Frees a vport allocated with vport_alloc() when it is no longer needed.
0158  *
0159  * The caller must ensure that an RCU grace period has passed since the last
0160  * time @vport was in a datapath.
0161  */
0162 void ovs_vport_free(struct vport *vport)
0163 {
0164     /* vport is freed from RCU callback or error path, Therefore
0165      * it is safe to use raw dereference.
0166      */
0167     kfree(rcu_dereference_raw(vport->upcall_portids));
0168     kfree(vport);
0169 }
0170 EXPORT_SYMBOL_GPL(ovs_vport_free);
0171 
0172 static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms)
0173 {
0174     struct vport_ops *ops;
0175 
0176     list_for_each_entry(ops, &vport_ops_list, list)
0177         if (ops->type == parms->type)
0178             return ops;
0179 
0180     return NULL;
0181 }
0182 
0183 /**
0184  *  ovs_vport_add - add vport device (for kernel callers)
0185  *
0186  * @parms: Information about new vport.
0187  *
0188  * Creates a new vport with the specified configuration (which is dependent on
0189  * device type).  ovs_mutex must be held.
0190  */
0191 struct vport *ovs_vport_add(const struct vport_parms *parms)
0192 {
0193     struct vport_ops *ops;
0194     struct vport *vport;
0195 
0196     ops = ovs_vport_lookup(parms);
0197     if (ops) {
0198         struct hlist_head *bucket;
0199 
0200         if (!try_module_get(ops->owner))
0201             return ERR_PTR(-EAFNOSUPPORT);
0202 
0203         vport = ops->create(parms);
0204         if (IS_ERR(vport)) {
0205             module_put(ops->owner);
0206             return vport;
0207         }
0208 
0209         bucket = hash_bucket(ovs_dp_get_net(vport->dp),
0210                      ovs_vport_name(vport));
0211         hlist_add_head_rcu(&vport->hash_node, bucket);
0212         return vport;
0213     }
0214 
0215     /* Unlock to attempt module load and return -EAGAIN if load
0216      * was successful as we need to restart the port addition
0217      * workflow.
0218      */
0219     ovs_unlock();
0220     request_module("vport-type-%d", parms->type);
0221     ovs_lock();
0222 
0223     if (!ovs_vport_lookup(parms))
0224         return ERR_PTR(-EAFNOSUPPORT);
0225     else
0226         return ERR_PTR(-EAGAIN);
0227 }
0228 
0229 /**
0230  *  ovs_vport_set_options - modify existing vport device (for kernel callers)
0231  *
0232  * @vport: vport to modify.
0233  * @options: New configuration.
0234  *
0235  * Modifies an existing device with the specified configuration (which is
0236  * dependent on device type).  ovs_mutex must be held.
0237  */
0238 int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
0239 {
0240     if (!vport->ops->set_options)
0241         return -EOPNOTSUPP;
0242     return vport->ops->set_options(vport, options);
0243 }
0244 
0245 /**
0246  *  ovs_vport_del - delete existing vport device
0247  *
0248  * @vport: vport to delete.
0249  *
0250  * Detaches @vport from its datapath and destroys it.  ovs_mutex must
0251  * be held.
0252  */
0253 void ovs_vport_del(struct vport *vport)
0254 {
0255     hlist_del_rcu(&vport->hash_node);
0256     module_put(vport->ops->owner);
0257     vport->ops->destroy(vport);
0258 }
0259 
0260 /**
0261  *  ovs_vport_get_stats - retrieve device stats
0262  *
0263  * @vport: vport from which to retrieve the stats
0264  * @stats: location to store stats
0265  *
0266  * Retrieves transmit, receive, and error stats for the given device.
0267  *
0268  * Must be called with ovs_mutex or rcu_read_lock.
0269  */
0270 void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
0271 {
0272     const struct rtnl_link_stats64 *dev_stats;
0273     struct rtnl_link_stats64 temp;
0274 
0275     dev_stats = dev_get_stats(vport->dev, &temp);
0276     stats->rx_errors  = dev_stats->rx_errors;
0277     stats->tx_errors  = dev_stats->tx_errors;
0278     stats->tx_dropped = dev_stats->tx_dropped;
0279     stats->rx_dropped = dev_stats->rx_dropped;
0280 
0281     stats->rx_bytes   = dev_stats->rx_bytes;
0282     stats->rx_packets = dev_stats->rx_packets;
0283     stats->tx_bytes   = dev_stats->tx_bytes;
0284     stats->tx_packets = dev_stats->tx_packets;
0285 }
0286 
0287 /**
0288  *  ovs_vport_get_options - retrieve device options
0289  *
0290  * @vport: vport from which to retrieve the options.
0291  * @skb: sk_buff where options should be appended.
0292  *
0293  * Retrieves the configuration of the given device, appending an
0294  * %OVS_VPORT_ATTR_OPTIONS attribute that in turn contains nested
0295  * vport-specific attributes to @skb.
0296  *
0297  * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room, or another
0298  * negative error code if a real error occurred.  If an error occurs, @skb is
0299  * left unmodified.
0300  *
0301  * Must be called with ovs_mutex or rcu_read_lock.
0302  */
0303 int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
0304 {
0305     struct nlattr *nla;
0306     int err;
0307 
0308     if (!vport->ops->get_options)
0309         return 0;
0310 
0311     nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_OPTIONS);
0312     if (!nla)
0313         return -EMSGSIZE;
0314 
0315     err = vport->ops->get_options(vport, skb);
0316     if (err) {
0317         nla_nest_cancel(skb, nla);
0318         return err;
0319     }
0320 
0321     nla_nest_end(skb, nla);
0322     return 0;
0323 }
0324 
0325 /**
0326  *  ovs_vport_set_upcall_portids - set upcall portids of @vport.
0327  *
0328  * @vport: vport to modify.
0329  * @ids: new configuration, an array of port ids.
0330  *
0331  * Sets the vport's upcall_portids to @ids.
0332  *
0333  * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be parsed
0334  * as an array of U32.
0335  *
0336  * Must be called with ovs_mutex.
0337  */
0338 int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids)
0339 {
0340     struct vport_portids *old, *vport_portids;
0341 
0342     if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
0343         return -EINVAL;
0344 
0345     old = ovsl_dereference(vport->upcall_portids);
0346 
0347     vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids),
0348                 GFP_KERNEL);
0349     if (!vport_portids)
0350         return -ENOMEM;
0351 
0352     vport_portids->n_ids = nla_len(ids) / sizeof(u32);
0353     vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids);
0354     nla_memcpy(vport_portids->ids, ids, nla_len(ids));
0355 
0356     rcu_assign_pointer(vport->upcall_portids, vport_portids);
0357 
0358     if (old)
0359         kfree_rcu(old, rcu);
0360     return 0;
0361 }
0362 
0363 /**
0364  *  ovs_vport_get_upcall_portids - get the upcall_portids of @vport.
0365  *
0366  * @vport: vport from which to retrieve the portids.
0367  * @skb: sk_buff where portids should be appended.
0368  *
0369  * Retrieves the configuration of the given vport, appending the
0370  * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall
0371  * portids to @skb.
0372  *
0373  * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room.
0374  * If an error occurs, @skb is left unmodified.  Must be called with
0375  * ovs_mutex or rcu_read_lock.
0376  */
0377 int ovs_vport_get_upcall_portids(const struct vport *vport,
0378                  struct sk_buff *skb)
0379 {
0380     struct vport_portids *ids;
0381 
0382     ids = rcu_dereference_ovsl(vport->upcall_portids);
0383 
0384     if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS)
0385         return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID,
0386                    ids->n_ids * sizeof(u32), (void *)ids->ids);
0387     else
0388         return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]);
0389 }
0390 
0391 /**
0392  *  ovs_vport_find_upcall_portid - find the upcall portid to send upcall.
0393  *
0394  * @vport: vport from which the missed packet is received.
0395  * @skb: skb that the missed packet was received.
0396  *
0397  * Uses the skb_get_hash() to select the upcall portid to send the
0398  * upcall.
0399  *
0400  * Returns the portid of the target socket.  Must be called with rcu_read_lock.
0401  */
0402 u32 ovs_vport_find_upcall_portid(const struct vport *vport,
0403                  struct sk_buff *skb)
0404 {
0405     struct vport_portids *ids;
0406     u32 ids_index;
0407     u32 hash;
0408 
0409     ids = rcu_dereference(vport->upcall_portids);
0410 
0411     /* If there is only one portid, select it in the fast-path. */
0412     if (ids->n_ids == 1)
0413         return ids->ids[0];
0414 
0415     hash = skb_get_hash(skb);
0416     ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids);
0417     return ids->ids[ids_index];
0418 }
0419 
0420 /**
0421  *  ovs_vport_receive - pass up received packet to the datapath for processing
0422  *
0423  * @vport: vport that received the packet
0424  * @skb: skb that was received
0425  * @tun_info: tunnel (if any) that carried packet
0426  *
0427  * Must be called with rcu_read_lock.  The packet cannot be shared and
0428  * skb->data should point to the Ethernet header.
0429  */
0430 int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
0431               const struct ip_tunnel_info *tun_info)
0432 {
0433     struct sw_flow_key key;
0434     int error;
0435 
0436     OVS_CB(skb)->input_vport = vport;
0437     OVS_CB(skb)->mru = 0;
0438     OVS_CB(skb)->cutlen = 0;
0439     if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
0440         u32 mark;
0441 
0442         mark = skb->mark;
0443         skb_scrub_packet(skb, true);
0444         skb->mark = mark;
0445         tun_info = NULL;
0446     }
0447 
0448     /* Extract flow from 'skb' into 'key'. */
0449     error = ovs_flow_key_extract(tun_info, skb, &key);
0450     if (unlikely(error)) {
0451         kfree_skb(skb);
0452         return error;
0453     }
0454     ovs_dp_process_packet(skb, &key);
0455     return 0;
0456 }
0457 
0458 static int packet_length(const struct sk_buff *skb,
0459              struct net_device *dev)
0460 {
0461     int length = skb->len - dev->hard_header_len;
0462 
0463     if (!skb_vlan_tag_present(skb) &&
0464         eth_type_vlan(skb->protocol))
0465         length -= VLAN_HLEN;
0466 
0467     /* Don't subtract for multiple VLAN tags. Most (all?) drivers allow
0468      * (ETH_LEN + VLAN_HLEN) in addition to the mtu value, but almost none
0469      * account for 802.1ad. e.g. is_skb_forwardable().
0470      */
0471 
0472     return length > 0 ? length : 0;
0473 }
0474 
0475 void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
0476 {
0477     int mtu = vport->dev->mtu;
0478 
0479     switch (vport->dev->type) {
0480     case ARPHRD_NONE:
0481         if (mac_proto == MAC_PROTO_ETHERNET) {
0482             skb_reset_network_header(skb);
0483             skb_reset_mac_len(skb);
0484             skb->protocol = htons(ETH_P_TEB);
0485         } else if (mac_proto != MAC_PROTO_NONE) {
0486             WARN_ON_ONCE(1);
0487             goto drop;
0488         }
0489         break;
0490     case ARPHRD_ETHER:
0491         if (mac_proto != MAC_PROTO_ETHERNET)
0492             goto drop;
0493         break;
0494     default:
0495         goto drop;
0496     }
0497 
0498     if (unlikely(packet_length(skb, vport->dev) > mtu &&
0499              !skb_is_gso(skb))) {
0500         vport->dev->stats.tx_errors++;
0501         if (vport->dev->flags & IFF_UP)
0502             net_warn_ratelimited("%s: dropped over-mtu packet: "
0503                          "%d > %d\n", vport->dev->name,
0504                          packet_length(skb, vport->dev),
0505                          mtu);
0506         goto drop;
0507     }
0508 
0509     skb->dev = vport->dev;
0510     skb_clear_tstamp(skb);
0511     vport->ops->send(skb);
0512     return;
0513 
0514 drop:
0515     kfree_skb(skb);
0516 }