Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
0003  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
0004  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
0005  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
0006  *
0007  * This software is available to you under a choice of one of two
0008  * licenses.  You may choose to be licensed under the terms of the GNU
0009  * General Public License (GPL) Version 2, available from the file
0010  * COPYING in the main directory of this source tree, or the
0011  * OpenIB.org BSD license below:
0012  *
0013  *     Redistribution and use in source and binary forms, with or
0014  *     without modification, are permitted provided that the following
0015  *     conditions are met:
0016  *
0017  *      - Redistributions of source code must retain the above
0018  *        copyright notice, this list of conditions and the following
0019  *        disclaimer.
0020  *
0021  *      - Redistributions in binary form must reproduce the above
0022  *        copyright notice, this list of conditions and the following
0023  *        disclaimer in the documentation and/or other materials
0024  *        provided with the distribution.
0025  *
0026  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0027  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0028  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0029  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0030  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0031  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0032  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0033  * SOFTWARE.
0034  */
0035 
0036 #include <linux/mutex.h>
0037 #include <linux/inetdevice.h>
0038 #include <linux/slab.h>
0039 #include <linux/workqueue.h>
0040 #include <net/arp.h>
0041 #include <net/neighbour.h>
0042 #include <net/route.h>
0043 #include <net/netevent.h>
0044 #include <net/ipv6_stubs.h>
0045 #include <net/ip6_route.h>
0046 #include <rdma/ib_addr.h>
0047 #include <rdma/ib_cache.h>
0048 #include <rdma/ib_sa.h>
0049 #include <rdma/ib.h>
0050 #include <rdma/rdma_netlink.h>
0051 #include <net/netlink.h>
0052 
0053 #include "core_priv.h"
0054 
0055 struct addr_req {
0056     struct list_head list;
0057     struct sockaddr_storage src_addr;
0058     struct sockaddr_storage dst_addr;
0059     struct rdma_dev_addr *addr;
0060     void *context;
0061     void (*callback)(int status, struct sockaddr *src_addr,
0062              struct rdma_dev_addr *addr, void *context);
0063     unsigned long timeout;
0064     struct delayed_work work;
0065     bool resolve_by_gid_attr;   /* Consider gid attr in resolve phase */
0066     int status;
0067     u32 seq;
0068 };
0069 
0070 static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);
0071 
0072 static DEFINE_SPINLOCK(lock);
0073 static LIST_HEAD(req_list);
0074 static struct workqueue_struct *addr_wq;
0075 
0076 static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
0077     [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
0078         .len = sizeof(struct rdma_nla_ls_gid),
0079         .validation_type = NLA_VALIDATE_MIN,
0080         .min = sizeof(struct rdma_nla_ls_gid)},
0081 };
0082 
0083 static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
0084 {
0085     struct nlattr *tb[LS_NLA_TYPE_MAX] = {};
0086     int ret;
0087 
0088     if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
0089         return false;
0090 
0091     ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
0092                    nlmsg_len(nlh), ib_nl_addr_policy, NULL);
0093     if (ret)
0094         return false;
0095 
0096     return true;
0097 }
0098 
0099 static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
0100 {
0101     const struct nlattr *head, *curr;
0102     union ib_gid gid;
0103     struct addr_req *req;
0104     int len, rem;
0105     int found = 0;
0106 
0107     head = (const struct nlattr *)nlmsg_data(nlh);
0108     len = nlmsg_len(nlh);
0109 
0110     nla_for_each_attr(curr, head, len, rem) {
0111         if (curr->nla_type == LS_NLA_TYPE_DGID)
0112             memcpy(&gid, nla_data(curr), nla_len(curr));
0113     }
0114 
0115     spin_lock_bh(&lock);
0116     list_for_each_entry(req, &req_list, list) {
0117         if (nlh->nlmsg_seq != req->seq)
0118             continue;
0119         /* We set the DGID part, the rest was set earlier */
0120         rdma_addr_set_dgid(req->addr, &gid);
0121         req->status = 0;
0122         found = 1;
0123         break;
0124     }
0125     spin_unlock_bh(&lock);
0126 
0127     if (!found)
0128         pr_info("Couldn't find request waiting for DGID: %pI6\n",
0129             &gid);
0130 }
0131 
0132 int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
0133                  struct nlmsghdr *nlh,
0134                  struct netlink_ext_ack *extack)
0135 {
0136     if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
0137         !(NETLINK_CB(skb).sk))
0138         return -EPERM;
0139 
0140     if (ib_nl_is_good_ip_resp(nlh))
0141         ib_nl_process_good_ip_rsep(nlh);
0142 
0143     return 0;
0144 }
0145 
0146 static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
0147                  const void *daddr,
0148                  u32 seq, u16 family)
0149 {
0150     struct sk_buff *skb = NULL;
0151     struct nlmsghdr *nlh;
0152     struct rdma_ls_ip_resolve_header *header;
0153     void *data;
0154     size_t size;
0155     int attrtype;
0156     int len;
0157 
0158     if (family == AF_INET) {
0159         size = sizeof(struct in_addr);
0160         attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4;
0161     } else {
0162         size = sizeof(struct in6_addr);
0163         attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6;
0164     }
0165 
0166     len = nla_total_size(sizeof(size));
0167     len += NLMSG_ALIGN(sizeof(*header));
0168 
0169     skb = nlmsg_new(len, GFP_KERNEL);
0170     if (!skb)
0171         return -ENOMEM;
0172 
0173     data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS,
0174                 RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST);
0175     if (!data) {
0176         nlmsg_free(skb);
0177         return -ENODATA;
0178     }
0179 
0180     /* Construct the family header first */
0181     header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
0182     header->ifindex = dev_addr->bound_dev_if;
0183     nla_put(skb, attrtype, size, daddr);
0184 
0185     /* Repair the nlmsg header length */
0186     nlmsg_end(skb, nlh);
0187     rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, GFP_KERNEL);
0188 
0189     /* Make the request retry, so when we get the response from userspace
0190      * we will have something.
0191      */
0192     return -ENODATA;
0193 }
0194 
0195 int rdma_addr_size(const struct sockaddr *addr)
0196 {
0197     switch (addr->sa_family) {
0198     case AF_INET:
0199         return sizeof(struct sockaddr_in);
0200     case AF_INET6:
0201         return sizeof(struct sockaddr_in6);
0202     case AF_IB:
0203         return sizeof(struct sockaddr_ib);
0204     default:
0205         return 0;
0206     }
0207 }
0208 EXPORT_SYMBOL(rdma_addr_size);
0209 
0210 int rdma_addr_size_in6(struct sockaddr_in6 *addr)
0211 {
0212     int ret = rdma_addr_size((struct sockaddr *) addr);
0213 
0214     return ret <= sizeof(*addr) ? ret : 0;
0215 }
0216 EXPORT_SYMBOL(rdma_addr_size_in6);
0217 
0218 int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr)
0219 {
0220     int ret = rdma_addr_size((struct sockaddr *) addr);
0221 
0222     return ret <= sizeof(*addr) ? ret : 0;
0223 }
0224 EXPORT_SYMBOL(rdma_addr_size_kss);
0225 
0226 /**
0227  * rdma_copy_src_l2_addr - Copy netdevice source addresses
0228  * @dev_addr:   Destination address pointer where to copy the addresses
0229  * @dev:    Netdevice whose source addresses to copy
0230  *
0231  * rdma_copy_src_l2_addr() copies source addresses from the specified netdevice.
0232  * This includes unicast address, broadcast address, device type and
0233  * interface index.
0234  */
0235 void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
0236                const struct net_device *dev)
0237 {
0238     dev_addr->dev_type = dev->type;
0239     memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
0240     memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
0241     dev_addr->bound_dev_if = dev->ifindex;
0242 }
0243 EXPORT_SYMBOL(rdma_copy_src_l2_addr);
0244 
0245 static struct net_device *
0246 rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in)
0247 {
0248     struct net_device *dev = NULL;
0249     int ret = -EADDRNOTAVAIL;
0250 
0251     switch (src_in->sa_family) {
0252     case AF_INET:
0253         dev = __ip_dev_find(net,
0254                     ((const struct sockaddr_in *)src_in)->sin_addr.s_addr,
0255                     false);
0256         if (dev)
0257             ret = 0;
0258         break;
0259 #if IS_ENABLED(CONFIG_IPV6)
0260     case AF_INET6:
0261         for_each_netdev_rcu(net, dev) {
0262             if (ipv6_chk_addr(net,
0263                       &((const struct sockaddr_in6 *)src_in)->sin6_addr,
0264                       dev, 1)) {
0265                 ret = 0;
0266                 break;
0267             }
0268         }
0269         break;
0270 #endif
0271     }
0272     return ret ? ERR_PTR(ret) : dev;
0273 }
0274 
0275 int rdma_translate_ip(const struct sockaddr *addr,
0276               struct rdma_dev_addr *dev_addr)
0277 {
0278     struct net_device *dev;
0279 
0280     if (dev_addr->bound_dev_if) {
0281         dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
0282         if (!dev)
0283             return -ENODEV;
0284         rdma_copy_src_l2_addr(dev_addr, dev);
0285         dev_put(dev);
0286         return 0;
0287     }
0288 
0289     rcu_read_lock();
0290     dev = rdma_find_ndev_for_src_ip_rcu(dev_addr->net, addr);
0291     if (!IS_ERR(dev))
0292         rdma_copy_src_l2_addr(dev_addr, dev);
0293     rcu_read_unlock();
0294     return PTR_ERR_OR_ZERO(dev);
0295 }
0296 EXPORT_SYMBOL(rdma_translate_ip);
0297 
0298 static void set_timeout(struct addr_req *req, unsigned long time)
0299 {
0300     unsigned long delay;
0301 
0302     delay = time - jiffies;
0303     if ((long)delay < 0)
0304         delay = 0;
0305 
0306     mod_delayed_work(addr_wq, &req->work, delay);
0307 }
0308 
0309 static void queue_req(struct addr_req *req)
0310 {
0311     spin_lock_bh(&lock);
0312     list_add_tail(&req->list, &req_list);
0313     set_timeout(req, req->timeout);
0314     spin_unlock_bh(&lock);
0315 }
0316 
0317 static int ib_nl_fetch_ha(struct rdma_dev_addr *dev_addr,
0318               const void *daddr, u32 seq, u16 family)
0319 {
0320     if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
0321         return -EADDRNOTAVAIL;
0322 
0323     return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
0324 }
0325 
0326 static int dst_fetch_ha(const struct dst_entry *dst,
0327             struct rdma_dev_addr *dev_addr,
0328             const void *daddr)
0329 {
0330     struct neighbour *n;
0331     int ret = 0;
0332 
0333     n = dst_neigh_lookup(dst, daddr);
0334     if (!n)
0335         return -ENODATA;
0336 
0337     if (!(n->nud_state & NUD_VALID)) {
0338         neigh_event_send(n, NULL);
0339         ret = -ENODATA;
0340     } else {
0341         neigh_ha_snapshot(dev_addr->dst_dev_addr, n, dst->dev);
0342     }
0343 
0344     neigh_release(n);
0345 
0346     return ret;
0347 }
0348 
0349 static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
0350 {
0351     struct rtable *rt;
0352     struct rt6_info *rt6;
0353 
0354     if (family == AF_INET) {
0355         rt = container_of(dst, struct rtable, dst);
0356         return rt->rt_uses_gateway;
0357     }
0358 
0359     rt6 = container_of(dst, struct rt6_info, dst);
0360     return rt6->rt6i_flags & RTF_GATEWAY;
0361 }
0362 
0363 static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
0364             const struct sockaddr *dst_in, u32 seq)
0365 {
0366     const struct sockaddr_in *dst_in4 =
0367         (const struct sockaddr_in *)dst_in;
0368     const struct sockaddr_in6 *dst_in6 =
0369         (const struct sockaddr_in6 *)dst_in;
0370     const void *daddr = (dst_in->sa_family == AF_INET) ?
0371         (const void *)&dst_in4->sin_addr.s_addr :
0372         (const void *)&dst_in6->sin6_addr;
0373     sa_family_t family = dst_in->sa_family;
0374 
0375     might_sleep();
0376 
0377     /* If we have a gateway in IB mode then it must be an IB network */
0378     if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB)
0379         return ib_nl_fetch_ha(dev_addr, daddr, seq, family);
0380     else
0381         return dst_fetch_ha(dst, dev_addr, daddr);
0382 }
0383 
0384 static int addr4_resolve(struct sockaddr *src_sock,
0385              const struct sockaddr *dst_sock,
0386              struct rdma_dev_addr *addr,
0387              struct rtable **prt)
0388 {
0389     struct sockaddr_in *src_in = (struct sockaddr_in *)src_sock;
0390     const struct sockaddr_in *dst_in =
0391             (const struct sockaddr_in *)dst_sock;
0392 
0393     __be32 src_ip = src_in->sin_addr.s_addr;
0394     __be32 dst_ip = dst_in->sin_addr.s_addr;
0395     struct rtable *rt;
0396     struct flowi4 fl4;
0397     int ret;
0398 
0399     memset(&fl4, 0, sizeof(fl4));
0400     fl4.daddr = dst_ip;
0401     fl4.saddr = src_ip;
0402     fl4.flowi4_oif = addr->bound_dev_if;
0403     rt = ip_route_output_key(addr->net, &fl4);
0404     ret = PTR_ERR_OR_ZERO(rt);
0405     if (ret)
0406         return ret;
0407 
0408     src_in->sin_addr.s_addr = fl4.saddr;
0409 
0410     addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
0411 
0412     *prt = rt;
0413     return 0;
0414 }
0415 
0416 #if IS_ENABLED(CONFIG_IPV6)
0417 static int addr6_resolve(struct sockaddr *src_sock,
0418              const struct sockaddr *dst_sock,
0419              struct rdma_dev_addr *addr,
0420              struct dst_entry **pdst)
0421 {
0422     struct sockaddr_in6 *src_in = (struct sockaddr_in6 *)src_sock;
0423     const struct sockaddr_in6 *dst_in =
0424                 (const struct sockaddr_in6 *)dst_sock;
0425     struct flowi6 fl6;
0426     struct dst_entry *dst;
0427 
0428     memset(&fl6, 0, sizeof fl6);
0429     fl6.daddr = dst_in->sin6_addr;
0430     fl6.saddr = src_in->sin6_addr;
0431     fl6.flowi6_oif = addr->bound_dev_if;
0432 
0433     dst = ipv6_stub->ipv6_dst_lookup_flow(addr->net, NULL, &fl6, NULL);
0434     if (IS_ERR(dst))
0435         return PTR_ERR(dst);
0436 
0437     if (ipv6_addr_any(&src_in->sin6_addr))
0438         src_in->sin6_addr = fl6.saddr;
0439 
0440     addr->hoplimit = ip6_dst_hoplimit(dst);
0441 
0442     *pdst = dst;
0443     return 0;
0444 }
0445 #else
0446 static int addr6_resolve(struct sockaddr *src_sock,
0447              const struct sockaddr *dst_sock,
0448              struct rdma_dev_addr *addr,
0449              struct dst_entry **pdst)
0450 {
0451     return -EADDRNOTAVAIL;
0452 }
0453 #endif
0454 
0455 static int addr_resolve_neigh(const struct dst_entry *dst,
0456                   const struct sockaddr *dst_in,
0457                   struct rdma_dev_addr *addr,
0458                   unsigned int ndev_flags,
0459                   u32 seq)
0460 {
0461     int ret = 0;
0462 
0463     if (ndev_flags & IFF_LOOPBACK) {
0464         memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
0465     } else {
0466         if (!(ndev_flags & IFF_NOARP)) {
0467             /* If the device doesn't do ARP internally */
0468             ret = fetch_ha(dst, addr, dst_in, seq);
0469         }
0470     }
0471     return ret;
0472 }
0473 
0474 static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
0475                 const struct sockaddr *dst_in,
0476                 const struct dst_entry *dst,
0477                 const struct net_device *ndev)
0478 {
0479     int ret = 0;
0480 
0481     if (dst->dev->flags & IFF_LOOPBACK)
0482         ret = rdma_translate_ip(dst_in, dev_addr);
0483     else
0484         rdma_copy_src_l2_addr(dev_addr, dst->dev);
0485 
0486     /*
0487      * If there's a gateway and type of device not ARPHRD_INFINIBAND,
0488      * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the
0489      * network type accordingly.
0490      */
0491     if (has_gateway(dst, dst_in->sa_family) &&
0492         ndev->type != ARPHRD_INFINIBAND)
0493         dev_addr->network = dst_in->sa_family == AF_INET ?
0494                         RDMA_NETWORK_IPV4 :
0495                         RDMA_NETWORK_IPV6;
0496     else
0497         dev_addr->network = RDMA_NETWORK_IB;
0498 
0499     return ret;
0500 }
0501 
0502 static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr,
0503                  unsigned int *ndev_flags,
0504                  const struct sockaddr *dst_in,
0505                  const struct dst_entry *dst)
0506 {
0507     struct net_device *ndev = READ_ONCE(dst->dev);
0508 
0509     *ndev_flags = ndev->flags;
0510     /* A physical device must be the RDMA device to use */
0511     if (ndev->flags & IFF_LOOPBACK) {
0512         /*
0513          * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or
0514          * loopback IP address. So if route is resolved to loopback
0515          * interface, translate that to a real ndev based on non
0516          * loopback IP address.
0517          */
0518         ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in);
0519         if (IS_ERR(ndev))
0520             return -ENODEV;
0521     }
0522 
0523     return copy_src_l2_addr(dev_addr, dst_in, dst, ndev);
0524 }
0525 
0526 static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr)
0527 {
0528     struct net_device *ndev;
0529 
0530     ndev = rdma_read_gid_attr_ndev_rcu(addr->sgid_attr);
0531     if (IS_ERR(ndev))
0532         return PTR_ERR(ndev);
0533 
0534     /*
0535      * Since we are holding the rcu, reading net and ifindex
0536      * are safe without any additional reference; because
0537      * change_net_namespace() in net/core/dev.c does rcu sync
0538      * after it changes the state to IFF_DOWN and before
0539      * updating netdev fields {net, ifindex}.
0540      */
0541     addr->net = dev_net(ndev);
0542     addr->bound_dev_if = ndev->ifindex;
0543     return 0;
0544 }
0545 
0546 static void rdma_addr_set_net_defaults(struct rdma_dev_addr *addr)
0547 {
0548     addr->net = &init_net;
0549     addr->bound_dev_if = 0;
0550 }
0551 
0552 static int addr_resolve(struct sockaddr *src_in,
0553             const struct sockaddr *dst_in,
0554             struct rdma_dev_addr *addr,
0555             bool resolve_neigh,
0556             bool resolve_by_gid_attr,
0557             u32 seq)
0558 {
0559     struct dst_entry *dst = NULL;
0560     unsigned int ndev_flags = 0;
0561     struct rtable *rt = NULL;
0562     int ret;
0563 
0564     if (!addr->net) {
0565         pr_warn_ratelimited("%s: missing namespace\n", __func__);
0566         return -EINVAL;
0567     }
0568 
0569     rcu_read_lock();
0570     if (resolve_by_gid_attr) {
0571         if (!addr->sgid_attr) {
0572             rcu_read_unlock();
0573             pr_warn_ratelimited("%s: missing gid_attr\n", __func__);
0574             return -EINVAL;
0575         }
0576         /*
0577          * If the request is for a specific gid attribute of the
0578          * rdma_dev_addr, derive net from the netdevice of the
0579          * GID attribute.
0580          */
0581         ret = set_addr_netns_by_gid_rcu(addr);
0582         if (ret) {
0583             rcu_read_unlock();
0584             return ret;
0585         }
0586     }
0587     if (src_in->sa_family == AF_INET) {
0588         ret = addr4_resolve(src_in, dst_in, addr, &rt);
0589         dst = &rt->dst;
0590     } else {
0591         ret = addr6_resolve(src_in, dst_in, addr, &dst);
0592     }
0593     if (ret) {
0594         rcu_read_unlock();
0595         goto done;
0596     }
0597     ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst);
0598     rcu_read_unlock();
0599 
0600     /*
0601      * Resolve neighbor destination address if requested and
0602      * only if src addr translation didn't fail.
0603      */
0604     if (!ret && resolve_neigh)
0605         ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq);
0606 
0607     if (src_in->sa_family == AF_INET)
0608         ip_rt_put(rt);
0609     else
0610         dst_release(dst);
0611 done:
0612     /*
0613      * Clear the addr net to go back to its original state, only if it was
0614      * derived from GID attribute in this context.
0615      */
0616     if (resolve_by_gid_attr)
0617         rdma_addr_set_net_defaults(addr);
0618     return ret;
0619 }
0620 
0621 static void process_one_req(struct work_struct *_work)
0622 {
0623     struct addr_req *req;
0624     struct sockaddr *src_in, *dst_in;
0625 
0626     req = container_of(_work, struct addr_req, work.work);
0627 
0628     if (req->status == -ENODATA) {
0629         src_in = (struct sockaddr *)&req->src_addr;
0630         dst_in = (struct sockaddr *)&req->dst_addr;
0631         req->status = addr_resolve(src_in, dst_in, req->addr,
0632                        true, req->resolve_by_gid_attr,
0633                        req->seq);
0634         if (req->status && time_after_eq(jiffies, req->timeout)) {
0635             req->status = -ETIMEDOUT;
0636         } else if (req->status == -ENODATA) {
0637             /* requeue the work for retrying again */
0638             spin_lock_bh(&lock);
0639             if (!list_empty(&req->list))
0640                 set_timeout(req, req->timeout);
0641             spin_unlock_bh(&lock);
0642             return;
0643         }
0644     }
0645 
0646     req->callback(req->status, (struct sockaddr *)&req->src_addr,
0647         req->addr, req->context);
0648     req->callback = NULL;
0649 
0650     spin_lock_bh(&lock);
0651     /*
0652      * Although the work will normally have been canceled by the workqueue,
0653      * it can still be requeued as long as it is on the req_list.
0654      */
0655     cancel_delayed_work(&req->work);
0656     if (!list_empty(&req->list)) {
0657         list_del_init(&req->list);
0658         kfree(req);
0659     }
0660     spin_unlock_bh(&lock);
0661 }
0662 
0663 int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
0664             struct rdma_dev_addr *addr, unsigned long timeout_ms,
0665             void (*callback)(int status, struct sockaddr *src_addr,
0666                      struct rdma_dev_addr *addr, void *context),
0667             bool resolve_by_gid_attr, void *context)
0668 {
0669     struct sockaddr *src_in, *dst_in;
0670     struct addr_req *req;
0671     int ret = 0;
0672 
0673     req = kzalloc(sizeof *req, GFP_KERNEL);
0674     if (!req)
0675         return -ENOMEM;
0676 
0677     src_in = (struct sockaddr *) &req->src_addr;
0678     dst_in = (struct sockaddr *) &req->dst_addr;
0679 
0680     if (src_addr) {
0681         if (src_addr->sa_family != dst_addr->sa_family) {
0682             ret = -EINVAL;
0683             goto err;
0684         }
0685 
0686         memcpy(src_in, src_addr, rdma_addr_size(src_addr));
0687     } else {
0688         src_in->sa_family = dst_addr->sa_family;
0689     }
0690 
0691     memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
0692     req->addr = addr;
0693     req->callback = callback;
0694     req->context = context;
0695     req->resolve_by_gid_attr = resolve_by_gid_attr;
0696     INIT_DELAYED_WORK(&req->work, process_one_req);
0697     req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
0698 
0699     req->status = addr_resolve(src_in, dst_in, addr, true,
0700                    req->resolve_by_gid_attr, req->seq);
0701     switch (req->status) {
0702     case 0:
0703         req->timeout = jiffies;
0704         queue_req(req);
0705         break;
0706     case -ENODATA:
0707         req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
0708         queue_req(req);
0709         break;
0710     default:
0711         ret = req->status;
0712         goto err;
0713     }
0714     return ret;
0715 err:
0716     kfree(req);
0717     return ret;
0718 }
0719 EXPORT_SYMBOL(rdma_resolve_ip);
0720 
0721 int roce_resolve_route_from_path(struct sa_path_rec *rec,
0722                  const struct ib_gid_attr *attr)
0723 {
0724     union {
0725         struct sockaddr     _sockaddr;
0726         struct sockaddr_in  _sockaddr_in;
0727         struct sockaddr_in6 _sockaddr_in6;
0728     } sgid, dgid;
0729     struct rdma_dev_addr dev_addr = {};
0730     int ret;
0731 
0732     might_sleep();
0733 
0734     if (rec->roce.route_resolved)
0735         return 0;
0736 
0737     rdma_gid2ip((struct sockaddr *)&sgid, &rec->sgid);
0738     rdma_gid2ip((struct sockaddr *)&dgid, &rec->dgid);
0739 
0740     if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family)
0741         return -EINVAL;
0742 
0743     if (!attr || !attr->ndev)
0744         return -EINVAL;
0745 
0746     dev_addr.net = &init_net;
0747     dev_addr.sgid_attr = attr;
0748 
0749     ret = addr_resolve((struct sockaddr *)&sgid, (struct sockaddr *)&dgid,
0750                &dev_addr, false, true, 0);
0751     if (ret)
0752         return ret;
0753 
0754     if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
0755          dev_addr.network == RDMA_NETWORK_IPV6) &&
0756         rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
0757         return -EINVAL;
0758 
0759     rec->roce.route_resolved = true;
0760     return 0;
0761 }
0762 
0763 /**
0764  * rdma_addr_cancel - Cancel resolve ip request
0765  * @addr:   Pointer to address structure given previously
0766  *      during rdma_resolve_ip().
0767  * rdma_addr_cancel() is synchronous function which cancels any pending
0768  * request if there is any.
0769  */
0770 void rdma_addr_cancel(struct rdma_dev_addr *addr)
0771 {
0772     struct addr_req *req, *temp_req;
0773     struct addr_req *found = NULL;
0774 
0775     spin_lock_bh(&lock);
0776     list_for_each_entry_safe(req, temp_req, &req_list, list) {
0777         if (req->addr == addr) {
0778             /*
0779              * Removing from the list means we take ownership of
0780              * the req
0781              */
0782             list_del_init(&req->list);
0783             found = req;
0784             break;
0785         }
0786     }
0787     spin_unlock_bh(&lock);
0788 
0789     if (!found)
0790         return;
0791 
0792     /*
0793      * sync canceling the work after removing it from the req_list
0794      * guarentees no work is running and none will be started.
0795      */
0796     cancel_delayed_work_sync(&found->work);
0797     kfree(found);
0798 }
0799 EXPORT_SYMBOL(rdma_addr_cancel);
0800 
0801 struct resolve_cb_context {
0802     struct completion comp;
0803     int status;
0804 };
0805 
0806 static void resolve_cb(int status, struct sockaddr *src_addr,
0807          struct rdma_dev_addr *addr, void *context)
0808 {
0809     ((struct resolve_cb_context *)context)->status = status;
0810     complete(&((struct resolve_cb_context *)context)->comp);
0811 }
0812 
0813 int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
0814                  const union ib_gid *dgid,
0815                  u8 *dmac, const struct ib_gid_attr *sgid_attr,
0816                  int *hoplimit)
0817 {
0818     struct rdma_dev_addr dev_addr;
0819     struct resolve_cb_context ctx;
0820     union {
0821         struct sockaddr_in  _sockaddr_in;
0822         struct sockaddr_in6 _sockaddr_in6;
0823     } sgid_addr, dgid_addr;
0824     int ret;
0825 
0826     rdma_gid2ip((struct sockaddr *)&sgid_addr, sgid);
0827     rdma_gid2ip((struct sockaddr *)&dgid_addr, dgid);
0828 
0829     memset(&dev_addr, 0, sizeof(dev_addr));
0830     dev_addr.net = &init_net;
0831     dev_addr.sgid_attr = sgid_attr;
0832 
0833     init_completion(&ctx.comp);
0834     ret = rdma_resolve_ip((struct sockaddr *)&sgid_addr,
0835                   (struct sockaddr *)&dgid_addr, &dev_addr, 1000,
0836                   resolve_cb, true, &ctx);
0837     if (ret)
0838         return ret;
0839 
0840     wait_for_completion(&ctx.comp);
0841 
0842     ret = ctx.status;
0843     if (ret)
0844         return ret;
0845 
0846     memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
0847     *hoplimit = dev_addr.hoplimit;
0848     return 0;
0849 }
0850 
0851 static int netevent_callback(struct notifier_block *self, unsigned long event,
0852     void *ctx)
0853 {
0854     struct addr_req *req;
0855 
0856     if (event == NETEVENT_NEIGH_UPDATE) {
0857         struct neighbour *neigh = ctx;
0858 
0859         if (neigh->nud_state & NUD_VALID) {
0860             spin_lock_bh(&lock);
0861             list_for_each_entry(req, &req_list, list)
0862                 set_timeout(req, jiffies);
0863             spin_unlock_bh(&lock);
0864         }
0865     }
0866     return 0;
0867 }
0868 
0869 static struct notifier_block nb = {
0870     .notifier_call = netevent_callback
0871 };
0872 
0873 int addr_init(void)
0874 {
0875     addr_wq = alloc_ordered_workqueue("ib_addr", 0);
0876     if (!addr_wq)
0877         return -ENOMEM;
0878 
0879     register_netevent_notifier(&nb);
0880 
0881     return 0;
0882 }
0883 
0884 void addr_cleanup(void)
0885 {
0886     unregister_netevent_notifier(&nb);
0887     destroy_workqueue(addr_wq);
0888     WARN_ON(!list_empty(&req_list));
0889 }