Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
0003  * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
0004  * Copyright (c) 2004 Intel Corporation.  All rights reserved.
0005  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
0006  * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
0007  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
0008  * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
0009  *
0010  * This software is available to you under a choice of one of two
0011  * licenses.  You may choose to be licensed under the terms of the GNU
0012  * General Public License (GPL) Version 2, available from the file
0013  * COPYING in the main directory of this source tree, or the
0014  * OpenIB.org BSD license below:
0015  *
0016  *     Redistribution and use in source and binary forms, with or
0017  *     without modification, are permitted provided that the following
0018  *     conditions are met:
0019  *
0020  *      - Redistributions of source code must retain the above
0021  *        copyright notice, this list of conditions and the following
0022  *        disclaimer.
0023  *
0024  *      - Redistributions in binary form must reproduce the above
0025  *        copyright notice, this list of conditions and the following
0026  *        disclaimer in the documentation and/or other materials
0027  *        provided with the distribution.
0028  *
0029  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0030  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0031  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0032  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0033  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0034  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0035  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0036  * SOFTWARE.
0037  */
0038 
0039 #include <linux/errno.h>
0040 #include <linux/err.h>
0041 #include <linux/export.h>
0042 #include <linux/string.h>
0043 #include <linux/slab.h>
0044 #include <linux/in.h>
0045 #include <linux/in6.h>
0046 #include <net/addrconf.h>
0047 #include <linux/security.h>
0048 
0049 #include <rdma/ib_verbs.h>
0050 #include <rdma/ib_cache.h>
0051 #include <rdma/ib_addr.h>
0052 #include <rdma/rw.h>
0053 #include <rdma/lag.h>
0054 
0055 #include "core_priv.h"
0056 #include <trace/events/rdma_core.h>
0057 
0058 static int ib_resolve_eth_dmac(struct ib_device *device,
0059                    struct rdma_ah_attr *ah_attr);
0060 
0061 static const char * const ib_events[] = {
0062     [IB_EVENT_CQ_ERR]       = "CQ error",
0063     [IB_EVENT_QP_FATAL]     = "QP fatal error",
0064     [IB_EVENT_QP_REQ_ERR]       = "QP request error",
0065     [IB_EVENT_QP_ACCESS_ERR]    = "QP access error",
0066     [IB_EVENT_COMM_EST]     = "communication established",
0067     [IB_EVENT_SQ_DRAINED]       = "send queue drained",
0068     [IB_EVENT_PATH_MIG]     = "path migration successful",
0069     [IB_EVENT_PATH_MIG_ERR]     = "path migration error",
0070     [IB_EVENT_DEVICE_FATAL]     = "device fatal error",
0071     [IB_EVENT_PORT_ACTIVE]      = "port active",
0072     [IB_EVENT_PORT_ERR]     = "port error",
0073     [IB_EVENT_LID_CHANGE]       = "LID change",
0074     [IB_EVENT_PKEY_CHANGE]      = "P_key change",
0075     [IB_EVENT_SM_CHANGE]        = "SM change",
0076     [IB_EVENT_SRQ_ERR]      = "SRQ error",
0077     [IB_EVENT_SRQ_LIMIT_REACHED]    = "SRQ limit reached",
0078     [IB_EVENT_QP_LAST_WQE_REACHED]  = "last WQE reached",
0079     [IB_EVENT_CLIENT_REREGISTER]    = "client reregister",
0080     [IB_EVENT_GID_CHANGE]       = "GID changed",
0081 };
0082 
0083 const char *__attribute_const__ ib_event_msg(enum ib_event_type event)
0084 {
0085     size_t index = event;
0086 
0087     return (index < ARRAY_SIZE(ib_events) && ib_events[index]) ?
0088             ib_events[index] : "unrecognized event";
0089 }
0090 EXPORT_SYMBOL(ib_event_msg);
0091 
0092 static const char * const wc_statuses[] = {
0093     [IB_WC_SUCCESS]         = "success",
0094     [IB_WC_LOC_LEN_ERR]     = "local length error",
0095     [IB_WC_LOC_QP_OP_ERR]       = "local QP operation error",
0096     [IB_WC_LOC_EEC_OP_ERR]      = "local EE context operation error",
0097     [IB_WC_LOC_PROT_ERR]        = "local protection error",
0098     [IB_WC_WR_FLUSH_ERR]        = "WR flushed",
0099     [IB_WC_MW_BIND_ERR]     = "memory bind operation error",
0100     [IB_WC_BAD_RESP_ERR]        = "bad response error",
0101     [IB_WC_LOC_ACCESS_ERR]      = "local access error",
0102     [IB_WC_REM_INV_REQ_ERR]     = "remote invalid request error",
0103     [IB_WC_REM_ACCESS_ERR]      = "remote access error",
0104     [IB_WC_REM_OP_ERR]      = "remote operation error",
0105     [IB_WC_RETRY_EXC_ERR]       = "transport retry counter exceeded",
0106     [IB_WC_RNR_RETRY_EXC_ERR]   = "RNR retry counter exceeded",
0107     [IB_WC_LOC_RDD_VIOL_ERR]    = "local RDD violation error",
0108     [IB_WC_REM_INV_RD_REQ_ERR]  = "remote invalid RD request",
0109     [IB_WC_REM_ABORT_ERR]       = "operation aborted",
0110     [IB_WC_INV_EECN_ERR]        = "invalid EE context number",
0111     [IB_WC_INV_EEC_STATE_ERR]   = "invalid EE context state",
0112     [IB_WC_FATAL_ERR]       = "fatal error",
0113     [IB_WC_RESP_TIMEOUT_ERR]    = "response timeout error",
0114     [IB_WC_GENERAL_ERR]     = "general error",
0115 };
0116 
0117 const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status)
0118 {
0119     size_t index = status;
0120 
0121     return (index < ARRAY_SIZE(wc_statuses) && wc_statuses[index]) ?
0122             wc_statuses[index] : "unrecognized status";
0123 }
0124 EXPORT_SYMBOL(ib_wc_status_msg);
0125 
0126 __attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
0127 {
0128     switch (rate) {
0129     case IB_RATE_2_5_GBPS: return   1;
0130     case IB_RATE_5_GBPS:   return   2;
0131     case IB_RATE_10_GBPS:  return   4;
0132     case IB_RATE_20_GBPS:  return   8;
0133     case IB_RATE_30_GBPS:  return  12;
0134     case IB_RATE_40_GBPS:  return  16;
0135     case IB_RATE_60_GBPS:  return  24;
0136     case IB_RATE_80_GBPS:  return  32;
0137     case IB_RATE_120_GBPS: return  48;
0138     case IB_RATE_14_GBPS:  return   6;
0139     case IB_RATE_56_GBPS:  return  22;
0140     case IB_RATE_112_GBPS: return  45;
0141     case IB_RATE_168_GBPS: return  67;
0142     case IB_RATE_25_GBPS:  return  10;
0143     case IB_RATE_100_GBPS: return  40;
0144     case IB_RATE_200_GBPS: return  80;
0145     case IB_RATE_300_GBPS: return 120;
0146     case IB_RATE_28_GBPS:  return  11;
0147     case IB_RATE_50_GBPS:  return  20;
0148     case IB_RATE_400_GBPS: return 160;
0149     case IB_RATE_600_GBPS: return 240;
0150     default:           return  -1;
0151     }
0152 }
0153 EXPORT_SYMBOL(ib_rate_to_mult);
0154 
0155 __attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
0156 {
0157     switch (mult) {
0158     case 1:   return IB_RATE_2_5_GBPS;
0159     case 2:   return IB_RATE_5_GBPS;
0160     case 4:   return IB_RATE_10_GBPS;
0161     case 8:   return IB_RATE_20_GBPS;
0162     case 12:  return IB_RATE_30_GBPS;
0163     case 16:  return IB_RATE_40_GBPS;
0164     case 24:  return IB_RATE_60_GBPS;
0165     case 32:  return IB_RATE_80_GBPS;
0166     case 48:  return IB_RATE_120_GBPS;
0167     case 6:   return IB_RATE_14_GBPS;
0168     case 22:  return IB_RATE_56_GBPS;
0169     case 45:  return IB_RATE_112_GBPS;
0170     case 67:  return IB_RATE_168_GBPS;
0171     case 10:  return IB_RATE_25_GBPS;
0172     case 40:  return IB_RATE_100_GBPS;
0173     case 80:  return IB_RATE_200_GBPS;
0174     case 120: return IB_RATE_300_GBPS;
0175     case 11:  return IB_RATE_28_GBPS;
0176     case 20:  return IB_RATE_50_GBPS;
0177     case 160: return IB_RATE_400_GBPS;
0178     case 240: return IB_RATE_600_GBPS;
0179     default:  return IB_RATE_PORT_CURRENT;
0180     }
0181 }
0182 EXPORT_SYMBOL(mult_to_ib_rate);
0183 
0184 __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
0185 {
0186     switch (rate) {
0187     case IB_RATE_2_5_GBPS: return 2500;
0188     case IB_RATE_5_GBPS:   return 5000;
0189     case IB_RATE_10_GBPS:  return 10000;
0190     case IB_RATE_20_GBPS:  return 20000;
0191     case IB_RATE_30_GBPS:  return 30000;
0192     case IB_RATE_40_GBPS:  return 40000;
0193     case IB_RATE_60_GBPS:  return 60000;
0194     case IB_RATE_80_GBPS:  return 80000;
0195     case IB_RATE_120_GBPS: return 120000;
0196     case IB_RATE_14_GBPS:  return 14062;
0197     case IB_RATE_56_GBPS:  return 56250;
0198     case IB_RATE_112_GBPS: return 112500;
0199     case IB_RATE_168_GBPS: return 168750;
0200     case IB_RATE_25_GBPS:  return 25781;
0201     case IB_RATE_100_GBPS: return 103125;
0202     case IB_RATE_200_GBPS: return 206250;
0203     case IB_RATE_300_GBPS: return 309375;
0204     case IB_RATE_28_GBPS:  return 28125;
0205     case IB_RATE_50_GBPS:  return 53125;
0206     case IB_RATE_400_GBPS: return 425000;
0207     case IB_RATE_600_GBPS: return 637500;
0208     default:           return -1;
0209     }
0210 }
0211 EXPORT_SYMBOL(ib_rate_to_mbps);
0212 
0213 __attribute_const__ enum rdma_transport_type
0214 rdma_node_get_transport(unsigned int node_type)
0215 {
0216 
0217     if (node_type == RDMA_NODE_USNIC)
0218         return RDMA_TRANSPORT_USNIC;
0219     if (node_type == RDMA_NODE_USNIC_UDP)
0220         return RDMA_TRANSPORT_USNIC_UDP;
0221     if (node_type == RDMA_NODE_RNIC)
0222         return RDMA_TRANSPORT_IWARP;
0223     if (node_type == RDMA_NODE_UNSPECIFIED)
0224         return RDMA_TRANSPORT_UNSPECIFIED;
0225 
0226     return RDMA_TRANSPORT_IB;
0227 }
0228 EXPORT_SYMBOL(rdma_node_get_transport);
0229 
0230 enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
0231                           u32 port_num)
0232 {
0233     enum rdma_transport_type lt;
0234     if (device->ops.get_link_layer)
0235         return device->ops.get_link_layer(device, port_num);
0236 
0237     lt = rdma_node_get_transport(device->node_type);
0238     if (lt == RDMA_TRANSPORT_IB)
0239         return IB_LINK_LAYER_INFINIBAND;
0240 
0241     return IB_LINK_LAYER_ETHERNET;
0242 }
0243 EXPORT_SYMBOL(rdma_port_get_link_layer);
0244 
0245 /* Protection domains */
0246 
0247 /**
0248  * __ib_alloc_pd - Allocates an unused protection domain.
0249  * @device: The device on which to allocate the protection domain.
0250  * @flags: protection domain flags
0251  * @caller: caller's build-time module name
0252  *
0253  * A protection domain object provides an association between QPs, shared
0254  * receive queues, address handles, memory regions, and memory windows.
0255  *
0256  * Every PD has a local_dma_lkey which can be used as the lkey value for local
0257  * memory operations.
0258  */
0259 struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
0260         const char *caller)
0261 {
0262     struct ib_pd *pd;
0263     int mr_access_flags = 0;
0264     int ret;
0265 
0266     pd = rdma_zalloc_drv_obj(device, ib_pd);
0267     if (!pd)
0268         return ERR_PTR(-ENOMEM);
0269 
0270     pd->device = device;
0271     pd->flags = flags;
0272 
0273     rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD);
0274     rdma_restrack_set_name(&pd->res, caller);
0275 
0276     ret = device->ops.alloc_pd(pd, NULL);
0277     if (ret) {
0278         rdma_restrack_put(&pd->res);
0279         kfree(pd);
0280         return ERR_PTR(ret);
0281     }
0282     rdma_restrack_add(&pd->res);
0283 
0284     if (device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY)
0285         pd->local_dma_lkey = device->local_dma_lkey;
0286     else
0287         mr_access_flags |= IB_ACCESS_LOCAL_WRITE;
0288 
0289     if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
0290         pr_warn("%s: enabling unsafe global rkey\n", caller);
0291         mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE;
0292     }
0293 
0294     if (mr_access_flags) {
0295         struct ib_mr *mr;
0296 
0297         mr = pd->device->ops.get_dma_mr(pd, mr_access_flags);
0298         if (IS_ERR(mr)) {
0299             ib_dealloc_pd(pd);
0300             return ERR_CAST(mr);
0301         }
0302 
0303         mr->device  = pd->device;
0304         mr->pd      = pd;
0305         mr->type        = IB_MR_TYPE_DMA;
0306         mr->uobject = NULL;
0307         mr->need_inval  = false;
0308 
0309         pd->__internal_mr = mr;
0310 
0311         if (!(device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY))
0312             pd->local_dma_lkey = pd->__internal_mr->lkey;
0313 
0314         if (flags & IB_PD_UNSAFE_GLOBAL_RKEY)
0315             pd->unsafe_global_rkey = pd->__internal_mr->rkey;
0316     }
0317 
0318     return pd;
0319 }
0320 EXPORT_SYMBOL(__ib_alloc_pd);
0321 
0322 /**
0323  * ib_dealloc_pd_user - Deallocates a protection domain.
0324  * @pd: The protection domain to deallocate.
0325  * @udata: Valid user data or NULL for kernel object
0326  *
0327  * It is an error to call this function while any resources in the pd still
0328  * exist.  The caller is responsible to synchronously destroy them and
0329  * guarantee no new allocations will happen.
0330  */
0331 int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
0332 {
0333     int ret;
0334 
0335     if (pd->__internal_mr) {
0336         ret = pd->device->ops.dereg_mr(pd->__internal_mr, NULL);
0337         WARN_ON(ret);
0338         pd->__internal_mr = NULL;
0339     }
0340 
0341     ret = pd->device->ops.dealloc_pd(pd, udata);
0342     if (ret)
0343         return ret;
0344 
0345     rdma_restrack_del(&pd->res);
0346     kfree(pd);
0347     return ret;
0348 }
0349 EXPORT_SYMBOL(ib_dealloc_pd_user);
0350 
0351 /* Address handles */
0352 
0353 /**
0354  * rdma_copy_ah_attr - Copy rdma ah attribute from source to destination.
0355  * @dest:       Pointer to destination ah_attr. Contents of the destination
0356  *              pointer is assumed to be invalid and attribute are overwritten.
0357  * @src:        Pointer to source ah_attr.
0358  */
0359 void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
0360                const struct rdma_ah_attr *src)
0361 {
0362     *dest = *src;
0363     if (dest->grh.sgid_attr)
0364         rdma_hold_gid_attr(dest->grh.sgid_attr);
0365 }
0366 EXPORT_SYMBOL(rdma_copy_ah_attr);
0367 
0368 /**
0369  * rdma_replace_ah_attr - Replace valid ah_attr with new new one.
0370  * @old:        Pointer to existing ah_attr which needs to be replaced.
0371  *              old is assumed to be valid or zero'd
0372  * @new:        Pointer to the new ah_attr.
0373  *
0374  * rdma_replace_ah_attr() first releases any reference in the old ah_attr if
0375  * old the ah_attr is valid; after that it copies the new attribute and holds
0376  * the reference to the replaced ah_attr.
0377  */
0378 void rdma_replace_ah_attr(struct rdma_ah_attr *old,
0379               const struct rdma_ah_attr *new)
0380 {
0381     rdma_destroy_ah_attr(old);
0382     *old = *new;
0383     if (old->grh.sgid_attr)
0384         rdma_hold_gid_attr(old->grh.sgid_attr);
0385 }
0386 EXPORT_SYMBOL(rdma_replace_ah_attr);
0387 
0388 /**
0389  * rdma_move_ah_attr - Move ah_attr pointed by source to destination.
0390  * @dest:       Pointer to destination ah_attr to copy to.
0391  *              dest is assumed to be valid or zero'd
0392  * @src:        Pointer to the new ah_attr.
0393  *
0394  * rdma_move_ah_attr() first releases any reference in the destination ah_attr
0395  * if it is valid. This also transfers ownership of internal references from
0396  * src to dest, making src invalid in the process. No new reference of the src
0397  * ah_attr is taken.
0398  */
0399 void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src)
0400 {
0401     rdma_destroy_ah_attr(dest);
0402     *dest = *src;
0403     src->grh.sgid_attr = NULL;
0404 }
0405 EXPORT_SYMBOL(rdma_move_ah_attr);
0406 
0407 /*
0408  * Validate that the rdma_ah_attr is valid for the device before passing it
0409  * off to the driver.
0410  */
0411 static int rdma_check_ah_attr(struct ib_device *device,
0412                   struct rdma_ah_attr *ah_attr)
0413 {
0414     if (!rdma_is_port_valid(device, ah_attr->port_num))
0415         return -EINVAL;
0416 
0417     if ((rdma_is_grh_required(device, ah_attr->port_num) ||
0418          ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) &&
0419         !(ah_attr->ah_flags & IB_AH_GRH))
0420         return -EINVAL;
0421 
0422     if (ah_attr->grh.sgid_attr) {
0423         /*
0424          * Make sure the passed sgid_attr is consistent with the
0425          * parameters
0426          */
0427         if (ah_attr->grh.sgid_attr->index != ah_attr->grh.sgid_index ||
0428             ah_attr->grh.sgid_attr->port_num != ah_attr->port_num)
0429             return -EINVAL;
0430     }
0431     return 0;
0432 }
0433 
0434 /*
0435  * If the ah requires a GRH then ensure that sgid_attr pointer is filled in.
0436  * On success the caller is responsible to call rdma_unfill_sgid_attr().
0437  */
0438 static int rdma_fill_sgid_attr(struct ib_device *device,
0439                    struct rdma_ah_attr *ah_attr,
0440                    const struct ib_gid_attr **old_sgid_attr)
0441 {
0442     const struct ib_gid_attr *sgid_attr;
0443     struct ib_global_route *grh;
0444     int ret;
0445 
0446     *old_sgid_attr = ah_attr->grh.sgid_attr;
0447 
0448     ret = rdma_check_ah_attr(device, ah_attr);
0449     if (ret)
0450         return ret;
0451 
0452     if (!(ah_attr->ah_flags & IB_AH_GRH))
0453         return 0;
0454 
0455     grh = rdma_ah_retrieve_grh(ah_attr);
0456     if (grh->sgid_attr)
0457         return 0;
0458 
0459     sgid_attr =
0460         rdma_get_gid_attr(device, ah_attr->port_num, grh->sgid_index);
0461     if (IS_ERR(sgid_attr))
0462         return PTR_ERR(sgid_attr);
0463 
0464     /* Move ownerhip of the kref into the ah_attr */
0465     grh->sgid_attr = sgid_attr;
0466     return 0;
0467 }
0468 
0469 static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr,
0470                   const struct ib_gid_attr *old_sgid_attr)
0471 {
0472     /*
0473      * Fill didn't change anything, the caller retains ownership of
0474      * whatever it passed
0475      */
0476     if (ah_attr->grh.sgid_attr == old_sgid_attr)
0477         return;
0478 
0479     /*
0480      * Otherwise, we need to undo what rdma_fill_sgid_attr so the caller
0481      * doesn't see any change in the rdma_ah_attr. If we get here
0482      * old_sgid_attr is NULL.
0483      */
0484     rdma_destroy_ah_attr(ah_attr);
0485 }
0486 
0487 static const struct ib_gid_attr *
0488 rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr,
0489               const struct ib_gid_attr *old_attr)
0490 {
0491     if (old_attr)
0492         rdma_put_gid_attr(old_attr);
0493     if (ah_attr->ah_flags & IB_AH_GRH) {
0494         rdma_hold_gid_attr(ah_attr->grh.sgid_attr);
0495         return ah_attr->grh.sgid_attr;
0496     }
0497     return NULL;
0498 }
0499 
0500 static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
0501                      struct rdma_ah_attr *ah_attr,
0502                      u32 flags,
0503                      struct ib_udata *udata,
0504                      struct net_device *xmit_slave)
0505 {
0506     struct rdma_ah_init_attr init_attr = {};
0507     struct ib_device *device = pd->device;
0508     struct ib_ah *ah;
0509     int ret;
0510 
0511     might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE);
0512 
0513     if (!udata && !device->ops.create_ah)
0514         return ERR_PTR(-EOPNOTSUPP);
0515 
0516     ah = rdma_zalloc_drv_obj_gfp(
0517         device, ib_ah,
0518         (flags & RDMA_CREATE_AH_SLEEPABLE) ? GFP_KERNEL : GFP_ATOMIC);
0519     if (!ah)
0520         return ERR_PTR(-ENOMEM);
0521 
0522     ah->device = device;
0523     ah->pd = pd;
0524     ah->type = ah_attr->type;
0525     ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL);
0526     init_attr.ah_attr = ah_attr;
0527     init_attr.flags = flags;
0528     init_attr.xmit_slave = xmit_slave;
0529 
0530     if (udata)
0531         ret = device->ops.create_user_ah(ah, &init_attr, udata);
0532     else
0533         ret = device->ops.create_ah(ah, &init_attr, NULL);
0534     if (ret) {
0535         kfree(ah);
0536         return ERR_PTR(ret);
0537     }
0538 
0539     atomic_inc(&pd->usecnt);
0540     return ah;
0541 }
0542 
0543 /**
0544  * rdma_create_ah - Creates an address handle for the
0545  * given address vector.
0546  * @pd: The protection domain associated with the address handle.
0547  * @ah_attr: The attributes of the address vector.
0548  * @flags: Create address handle flags (see enum rdma_create_ah_flags).
0549  *
0550  * It returns 0 on success and returns appropriate error code on error.
0551  * The address handle is used to reference a local or global destination
0552  * in all UD QP post sends.
0553  */
0554 struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
0555                  u32 flags)
0556 {
0557     const struct ib_gid_attr *old_sgid_attr;
0558     struct net_device *slave;
0559     struct ib_ah *ah;
0560     int ret;
0561 
0562     ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
0563     if (ret)
0564         return ERR_PTR(ret);
0565     slave = rdma_lag_get_ah_roce_slave(pd->device, ah_attr,
0566                        (flags & RDMA_CREATE_AH_SLEEPABLE) ?
0567                        GFP_KERNEL : GFP_ATOMIC);
0568     if (IS_ERR(slave)) {
0569         rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
0570         return (void *)slave;
0571     }
0572     ah = _rdma_create_ah(pd, ah_attr, flags, NULL, slave);
0573     rdma_lag_put_ah_roce_slave(slave);
0574     rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
0575     return ah;
0576 }
0577 EXPORT_SYMBOL(rdma_create_ah);
0578 
0579 /**
0580  * rdma_create_user_ah - Creates an address handle for the
0581  * given address vector.
0582  * It resolves destination mac address for ah attribute of RoCE type.
0583  * @pd: The protection domain associated with the address handle.
0584  * @ah_attr: The attributes of the address vector.
0585  * @udata: pointer to user's input output buffer information need by
0586  *         provider driver.
0587  *
0588  * It returns 0 on success and returns appropriate error code on error.
0589  * The address handle is used to reference a local or global destination
0590  * in all UD QP post sends.
0591  */
0592 struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
0593                   struct rdma_ah_attr *ah_attr,
0594                   struct ib_udata *udata)
0595 {
0596     const struct ib_gid_attr *old_sgid_attr;
0597     struct ib_ah *ah;
0598     int err;
0599 
0600     err = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
0601     if (err)
0602         return ERR_PTR(err);
0603 
0604     if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
0605         err = ib_resolve_eth_dmac(pd->device, ah_attr);
0606         if (err) {
0607             ah = ERR_PTR(err);
0608             goto out;
0609         }
0610     }
0611 
0612     ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE,
0613                  udata, NULL);
0614 
0615 out:
0616     rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
0617     return ah;
0618 }
0619 EXPORT_SYMBOL(rdma_create_user_ah);
0620 
0621 int ib_get_rdma_header_version(const union rdma_network_hdr *hdr)
0622 {
0623     const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh;
0624     struct iphdr ip4h_checked;
0625     const struct ipv6hdr *ip6h = (struct ipv6hdr *)&hdr->ibgrh;
0626 
0627     /* If it's IPv6, the version must be 6, otherwise, the first
0628      * 20 bytes (before the IPv4 header) are garbled.
0629      */
0630     if (ip6h->version != 6)
0631         return (ip4h->version == 4) ? 4 : 0;
0632     /* version may be 6 or 4 because the first 20 bytes could be garbled */
0633 
0634     /* RoCE v2 requires no options, thus header length
0635      * must be 5 words
0636      */
0637     if (ip4h->ihl != 5)
0638         return 6;
0639 
0640     /* Verify checksum.
0641      * We can't write on scattered buffers so we need to copy to
0642      * temp buffer.
0643      */
0644     memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked));
0645     ip4h_checked.check = 0;
0646     ip4h_checked.check = ip_fast_csum((u8 *)&ip4h_checked, 5);
0647     /* if IPv4 header checksum is OK, believe it */
0648     if (ip4h->check == ip4h_checked.check)
0649         return 4;
0650     return 6;
0651 }
0652 EXPORT_SYMBOL(ib_get_rdma_header_version);
0653 
0654 static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
0655                              u32 port_num,
0656                              const struct ib_grh *grh)
0657 {
0658     int grh_version;
0659 
0660     if (rdma_protocol_ib(device, port_num))
0661         return RDMA_NETWORK_IB;
0662 
0663     grh_version = ib_get_rdma_header_version((union rdma_network_hdr *)grh);
0664 
0665     if (grh_version == 4)
0666         return RDMA_NETWORK_IPV4;
0667 
0668     if (grh->next_hdr == IPPROTO_UDP)
0669         return RDMA_NETWORK_IPV6;
0670 
0671     return RDMA_NETWORK_ROCE_V1;
0672 }
0673 
0674 struct find_gid_index_context {
0675     u16 vlan_id;
0676     enum ib_gid_type gid_type;
0677 };
0678 
0679 static bool find_gid_index(const union ib_gid *gid,
0680                const struct ib_gid_attr *gid_attr,
0681                void *context)
0682 {
0683     struct find_gid_index_context *ctx = context;
0684     u16 vlan_id = 0xffff;
0685     int ret;
0686 
0687     if (ctx->gid_type != gid_attr->gid_type)
0688         return false;
0689 
0690     ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
0691     if (ret)
0692         return false;
0693 
0694     return ctx->vlan_id == vlan_id;
0695 }
0696 
0697 static const struct ib_gid_attr *
0698 get_sgid_attr_from_eth(struct ib_device *device, u32 port_num,
0699                u16 vlan_id, const union ib_gid *sgid,
0700                enum ib_gid_type gid_type)
0701 {
0702     struct find_gid_index_context context = {.vlan_id = vlan_id,
0703                          .gid_type = gid_type};
0704 
0705     return rdma_find_gid_by_filter(device, sgid, port_num, find_gid_index,
0706                        &context);
0707 }
0708 
0709 int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
0710                   enum rdma_network_type net_type,
0711                   union ib_gid *sgid, union ib_gid *dgid)
0712 {
0713     struct sockaddr_in  src_in;
0714     struct sockaddr_in  dst_in;
0715     __be32 src_saddr, dst_saddr;
0716 
0717     if (!sgid || !dgid)
0718         return -EINVAL;
0719 
0720     if (net_type == RDMA_NETWORK_IPV4) {
0721         memcpy(&src_in.sin_addr.s_addr,
0722                &hdr->roce4grh.saddr, 4);
0723         memcpy(&dst_in.sin_addr.s_addr,
0724                &hdr->roce4grh.daddr, 4);
0725         src_saddr = src_in.sin_addr.s_addr;
0726         dst_saddr = dst_in.sin_addr.s_addr;
0727         ipv6_addr_set_v4mapped(src_saddr,
0728                        (struct in6_addr *)sgid);
0729         ipv6_addr_set_v4mapped(dst_saddr,
0730                        (struct in6_addr *)dgid);
0731         return 0;
0732     } else if (net_type == RDMA_NETWORK_IPV6 ||
0733            net_type == RDMA_NETWORK_IB || RDMA_NETWORK_ROCE_V1) {
0734         *dgid = hdr->ibgrh.dgid;
0735         *sgid = hdr->ibgrh.sgid;
0736         return 0;
0737     } else {
0738         return -EINVAL;
0739     }
0740 }
0741 EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
0742 
0743 /* Resolve destination mac address and hop limit for unicast destination
0744  * GID entry, considering the source GID entry as well.
0745  * ah_attribute must have have valid port_num, sgid_index.
0746  */
0747 static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
0748                        struct rdma_ah_attr *ah_attr)
0749 {
0750     struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr);
0751     const struct ib_gid_attr *sgid_attr = grh->sgid_attr;
0752     int hop_limit = 0xff;
0753     int ret = 0;
0754 
0755     /* If destination is link local and source GID is RoCEv1,
0756      * IP stack is not used.
0757      */
0758     if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) &&
0759         sgid_attr->gid_type == IB_GID_TYPE_ROCE) {
0760         rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
0761                 ah_attr->roce.dmac);
0762         return ret;
0763     }
0764 
0765     ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid,
0766                        ah_attr->roce.dmac,
0767                        sgid_attr, &hop_limit);
0768 
0769     grh->hop_limit = hop_limit;
0770     return ret;
0771 }
0772 
0773 /*
0774  * This function initializes address handle attributes from the incoming packet.
0775  * Incoming packet has dgid of the receiver node on which this code is
0776  * getting executed and, sgid contains the GID of the sender.
0777  *
0778  * When resolving mac address of destination, the arrived dgid is used
0779  * as sgid and, sgid is used as dgid because sgid contains destinations
0780  * GID whom to respond to.
0781  *
0782  * On success the caller is responsible to call rdma_destroy_ah_attr on the
0783  * attr.
0784  */
0785 int ib_init_ah_attr_from_wc(struct ib_device *device, u32 port_num,
0786                 const struct ib_wc *wc, const struct ib_grh *grh,
0787                 struct rdma_ah_attr *ah_attr)
0788 {
0789     u32 flow_class;
0790     int ret;
0791     enum rdma_network_type net_type = RDMA_NETWORK_IB;
0792     enum ib_gid_type gid_type = IB_GID_TYPE_IB;
0793     const struct ib_gid_attr *sgid_attr;
0794     int hoplimit = 0xff;
0795     union ib_gid dgid;
0796     union ib_gid sgid;
0797 
0798     might_sleep();
0799 
0800     memset(ah_attr, 0, sizeof *ah_attr);
0801     ah_attr->type = rdma_ah_find_type(device, port_num);
0802     if (rdma_cap_eth_ah(device, port_num)) {
0803         if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
0804             net_type = wc->network_hdr_type;
0805         else
0806             net_type = ib_get_net_type_by_grh(device, port_num, grh);
0807         gid_type = ib_network_to_gid_type(net_type);
0808     }
0809     ret = ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
0810                     &sgid, &dgid);
0811     if (ret)
0812         return ret;
0813 
0814     rdma_ah_set_sl(ah_attr, wc->sl);
0815     rdma_ah_set_port_num(ah_attr, port_num);
0816 
0817     if (rdma_protocol_roce(device, port_num)) {
0818         u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
0819                 wc->vlan_id : 0xffff;
0820 
0821         if (!(wc->wc_flags & IB_WC_GRH))
0822             return -EPROTOTYPE;
0823 
0824         sgid_attr = get_sgid_attr_from_eth(device, port_num,
0825                            vlan_id, &dgid,
0826                            gid_type);
0827         if (IS_ERR(sgid_attr))
0828             return PTR_ERR(sgid_attr);
0829 
0830         flow_class = be32_to_cpu(grh->version_tclass_flow);
0831         rdma_move_grh_sgid_attr(ah_attr,
0832                     &sgid,
0833                     flow_class & 0xFFFFF,
0834                     hoplimit,
0835                     (flow_class >> 20) & 0xFF,
0836                     sgid_attr);
0837 
0838         ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
0839         if (ret)
0840             rdma_destroy_ah_attr(ah_attr);
0841 
0842         return ret;
0843     } else {
0844         rdma_ah_set_dlid(ah_attr, wc->slid);
0845         rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
0846 
0847         if ((wc->wc_flags & IB_WC_GRH) == 0)
0848             return 0;
0849 
0850         if (dgid.global.interface_id !=
0851                     cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
0852             sgid_attr = rdma_find_gid_by_port(
0853                 device, &dgid, IB_GID_TYPE_IB, port_num, NULL);
0854         } else
0855             sgid_attr = rdma_get_gid_attr(device, port_num, 0);
0856 
0857         if (IS_ERR(sgid_attr))
0858             return PTR_ERR(sgid_attr);
0859         flow_class = be32_to_cpu(grh->version_tclass_flow);
0860         rdma_move_grh_sgid_attr(ah_attr,
0861                     &sgid,
0862                     flow_class & 0xFFFFF,
0863                     hoplimit,
0864                     (flow_class >> 20) & 0xFF,
0865                     sgid_attr);
0866 
0867         return 0;
0868     }
0869 }
0870 EXPORT_SYMBOL(ib_init_ah_attr_from_wc);
0871 
0872 /**
0873  * rdma_move_grh_sgid_attr - Sets the sgid attribute of GRH, taking ownership
0874  * of the reference
0875  *
0876  * @attr:   Pointer to AH attribute structure
0877  * @dgid:   Destination GID
0878  * @flow_label: Flow label
0879  * @hop_limit:  Hop limit
0880  * @traffic_class: traffic class
0881  * @sgid_attr:  Pointer to SGID attribute
0882  *
0883  * This takes ownership of the sgid_attr reference. The caller must ensure
0884  * rdma_destroy_ah_attr() is called before destroying the rdma_ah_attr after
0885  * calling this function.
0886  */
0887 void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
0888                  u32 flow_label, u8 hop_limit, u8 traffic_class,
0889                  const struct ib_gid_attr *sgid_attr)
0890 {
0891     rdma_ah_set_grh(attr, dgid, flow_label, sgid_attr->index, hop_limit,
0892             traffic_class);
0893     attr->grh.sgid_attr = sgid_attr;
0894 }
0895 EXPORT_SYMBOL(rdma_move_grh_sgid_attr);
0896 
0897 /**
0898  * rdma_destroy_ah_attr - Release reference to SGID attribute of
0899  * ah attribute.
0900  * @ah_attr: Pointer to ah attribute
0901  *
0902  * Release reference to the SGID attribute of the ah attribute if it is
0903  * non NULL. It is safe to call this multiple times, and safe to call it on
0904  * a zero initialized ah_attr.
0905  */
0906 void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr)
0907 {
0908     if (ah_attr->grh.sgid_attr) {
0909         rdma_put_gid_attr(ah_attr->grh.sgid_attr);
0910         ah_attr->grh.sgid_attr = NULL;
0911     }
0912 }
0913 EXPORT_SYMBOL(rdma_destroy_ah_attr);
0914 
0915 struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
0916                    const struct ib_grh *grh, u32 port_num)
0917 {
0918     struct rdma_ah_attr ah_attr;
0919     struct ib_ah *ah;
0920     int ret;
0921 
0922     ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr);
0923     if (ret)
0924         return ERR_PTR(ret);
0925 
0926     ah = rdma_create_ah(pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE);
0927 
0928     rdma_destroy_ah_attr(&ah_attr);
0929     return ah;
0930 }
0931 EXPORT_SYMBOL(ib_create_ah_from_wc);
0932 
0933 int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
0934 {
0935     const struct ib_gid_attr *old_sgid_attr;
0936     int ret;
0937 
0938     if (ah->type != ah_attr->type)
0939         return -EINVAL;
0940 
0941     ret = rdma_fill_sgid_attr(ah->device, ah_attr, &old_sgid_attr);
0942     if (ret)
0943         return ret;
0944 
0945     ret = ah->device->ops.modify_ah ?
0946         ah->device->ops.modify_ah(ah, ah_attr) :
0947         -EOPNOTSUPP;
0948 
0949     ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr);
0950     rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
0951     return ret;
0952 }
0953 EXPORT_SYMBOL(rdma_modify_ah);
0954 
0955 int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
0956 {
0957     ah_attr->grh.sgid_attr = NULL;
0958 
0959     return ah->device->ops.query_ah ?
0960         ah->device->ops.query_ah(ah, ah_attr) :
0961         -EOPNOTSUPP;
0962 }
0963 EXPORT_SYMBOL(rdma_query_ah);
0964 
0965 int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata)
0966 {
0967     const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
0968     struct ib_pd *pd;
0969     int ret;
0970 
0971     might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE);
0972 
0973     pd = ah->pd;
0974 
0975     ret = ah->device->ops.destroy_ah(ah, flags);
0976     if (ret)
0977         return ret;
0978 
0979     atomic_dec(&pd->usecnt);
0980     if (sgid_attr)
0981         rdma_put_gid_attr(sgid_attr);
0982 
0983     kfree(ah);
0984     return ret;
0985 }
0986 EXPORT_SYMBOL(rdma_destroy_ah_user);
0987 
0988 /* Shared receive queues */
0989 
0990 /**
0991  * ib_create_srq_user - Creates a SRQ associated with the specified protection
0992  *   domain.
0993  * @pd: The protection domain associated with the SRQ.
0994  * @srq_init_attr: A list of initial attributes required to create the
0995  *   SRQ.  If SRQ creation succeeds, then the attributes are updated to
0996  *   the actual capabilities of the created SRQ.
0997  * @uobject: uobject pointer if this is not a kernel SRQ
0998  * @udata: udata pointer if this is not a kernel SRQ
0999  *
1000  * srq_attr->max_wr and srq_attr->max_sge are read the determine the
1001  * requested size of the SRQ, and set to the actual values allocated
1002  * on return.  If ib_create_srq() succeeds, then max_wr and max_sge
1003  * will always be at least as large as the requested values.
1004  */
1005 struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
1006                   struct ib_srq_init_attr *srq_init_attr,
1007                   struct ib_usrq_object *uobject,
1008                   struct ib_udata *udata)
1009 {
1010     struct ib_srq *srq;
1011     int ret;
1012 
1013     srq = rdma_zalloc_drv_obj(pd->device, ib_srq);
1014     if (!srq)
1015         return ERR_PTR(-ENOMEM);
1016 
1017     srq->device = pd->device;
1018     srq->pd = pd;
1019     srq->event_handler = srq_init_attr->event_handler;
1020     srq->srq_context = srq_init_attr->srq_context;
1021     srq->srq_type = srq_init_attr->srq_type;
1022     srq->uobject = uobject;
1023 
1024     if (ib_srq_has_cq(srq->srq_type)) {
1025         srq->ext.cq = srq_init_attr->ext.cq;
1026         atomic_inc(&srq->ext.cq->usecnt);
1027     }
1028     if (srq->srq_type == IB_SRQT_XRC) {
1029         srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
1030         if (srq->ext.xrc.xrcd)
1031             atomic_inc(&srq->ext.xrc.xrcd->usecnt);
1032     }
1033     atomic_inc(&pd->usecnt);
1034 
1035     rdma_restrack_new(&srq->res, RDMA_RESTRACK_SRQ);
1036     rdma_restrack_parent_name(&srq->res, &pd->res);
1037 
1038     ret = pd->device->ops.create_srq(srq, srq_init_attr, udata);
1039     if (ret) {
1040         rdma_restrack_put(&srq->res);
1041         atomic_dec(&srq->pd->usecnt);
1042         if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd)
1043             atomic_dec(&srq->ext.xrc.xrcd->usecnt);
1044         if (ib_srq_has_cq(srq->srq_type))
1045             atomic_dec(&srq->ext.cq->usecnt);
1046         kfree(srq);
1047         return ERR_PTR(ret);
1048     }
1049 
1050     rdma_restrack_add(&srq->res);
1051 
1052     return srq;
1053 }
1054 EXPORT_SYMBOL(ib_create_srq_user);
1055 
1056 int ib_modify_srq(struct ib_srq *srq,
1057           struct ib_srq_attr *srq_attr,
1058           enum ib_srq_attr_mask srq_attr_mask)
1059 {
1060     return srq->device->ops.modify_srq ?
1061         srq->device->ops.modify_srq(srq, srq_attr, srq_attr_mask,
1062                         NULL) : -EOPNOTSUPP;
1063 }
1064 EXPORT_SYMBOL(ib_modify_srq);
1065 
1066 int ib_query_srq(struct ib_srq *srq,
1067          struct ib_srq_attr *srq_attr)
1068 {
1069     return srq->device->ops.query_srq ?
1070         srq->device->ops.query_srq(srq, srq_attr) : -EOPNOTSUPP;
1071 }
1072 EXPORT_SYMBOL(ib_query_srq);
1073 
1074 int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata)
1075 {
1076     int ret;
1077 
1078     if (atomic_read(&srq->usecnt))
1079         return -EBUSY;
1080 
1081     ret = srq->device->ops.destroy_srq(srq, udata);
1082     if (ret)
1083         return ret;
1084 
1085     atomic_dec(&srq->pd->usecnt);
1086     if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd)
1087         atomic_dec(&srq->ext.xrc.xrcd->usecnt);
1088     if (ib_srq_has_cq(srq->srq_type))
1089         atomic_dec(&srq->ext.cq->usecnt);
1090     rdma_restrack_del(&srq->res);
1091     kfree(srq);
1092 
1093     return ret;
1094 }
1095 EXPORT_SYMBOL(ib_destroy_srq_user);
1096 
1097 /* Queue pairs */
1098 
1099 static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
1100 {
1101     struct ib_qp *qp = context;
1102     unsigned long flags;
1103 
1104     spin_lock_irqsave(&qp->device->qp_open_list_lock, flags);
1105     list_for_each_entry(event->element.qp, &qp->open_list, open_list)
1106         if (event->element.qp->event_handler)
1107             event->element.qp->event_handler(event, event->element.qp->qp_context);
1108     spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags);
1109 }
1110 
1111 static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
1112                   void (*event_handler)(struct ib_event *, void *),
1113                   void *qp_context)
1114 {
1115     struct ib_qp *qp;
1116     unsigned long flags;
1117     int err;
1118 
1119     qp = kzalloc(sizeof *qp, GFP_KERNEL);
1120     if (!qp)
1121         return ERR_PTR(-ENOMEM);
1122 
1123     qp->real_qp = real_qp;
1124     err = ib_open_shared_qp_security(qp, real_qp->device);
1125     if (err) {
1126         kfree(qp);
1127         return ERR_PTR(err);
1128     }
1129 
1130     qp->real_qp = real_qp;
1131     atomic_inc(&real_qp->usecnt);
1132     qp->device = real_qp->device;
1133     qp->event_handler = event_handler;
1134     qp->qp_context = qp_context;
1135     qp->qp_num = real_qp->qp_num;
1136     qp->qp_type = real_qp->qp_type;
1137 
1138     spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags);
1139     list_add(&qp->open_list, &real_qp->open_list);
1140     spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags);
1141 
1142     return qp;
1143 }
1144 
1145 struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
1146              struct ib_qp_open_attr *qp_open_attr)
1147 {
1148     struct ib_qp *qp, *real_qp;
1149 
1150     if (qp_open_attr->qp_type != IB_QPT_XRC_TGT)
1151         return ERR_PTR(-EINVAL);
1152 
1153     down_read(&xrcd->tgt_qps_rwsem);
1154     real_qp = xa_load(&xrcd->tgt_qps, qp_open_attr->qp_num);
1155     if (!real_qp) {
1156         up_read(&xrcd->tgt_qps_rwsem);
1157         return ERR_PTR(-EINVAL);
1158     }
1159     qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
1160               qp_open_attr->qp_context);
1161     up_read(&xrcd->tgt_qps_rwsem);
1162     return qp;
1163 }
1164 EXPORT_SYMBOL(ib_open_qp);
1165 
1166 static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp,
1167                     struct ib_qp_init_attr *qp_init_attr)
1168 {
1169     struct ib_qp *real_qp = qp;
1170     int err;
1171 
1172     qp->event_handler = __ib_shared_qp_event_handler;
1173     qp->qp_context = qp;
1174     qp->pd = NULL;
1175     qp->send_cq = qp->recv_cq = NULL;
1176     qp->srq = NULL;
1177     qp->xrcd = qp_init_attr->xrcd;
1178     atomic_inc(&qp_init_attr->xrcd->usecnt);
1179     INIT_LIST_HEAD(&qp->open_list);
1180 
1181     qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
1182               qp_init_attr->qp_context);
1183     if (IS_ERR(qp))
1184         return qp;
1185 
1186     err = xa_err(xa_store(&qp_init_attr->xrcd->tgt_qps, real_qp->qp_num,
1187                   real_qp, GFP_KERNEL));
1188     if (err) {
1189         ib_close_qp(qp);
1190         return ERR_PTR(err);
1191     }
1192     return qp;
1193 }
1194 
1195 static struct ib_qp *create_qp(struct ib_device *dev, struct ib_pd *pd,
1196                    struct ib_qp_init_attr *attr,
1197                    struct ib_udata *udata,
1198                    struct ib_uqp_object *uobj, const char *caller)
1199 {
1200     struct ib_udata dummy = {};
1201     struct ib_qp *qp;
1202     int ret;
1203 
1204     if (!dev->ops.create_qp)
1205         return ERR_PTR(-EOPNOTSUPP);
1206 
1207     qp = rdma_zalloc_drv_obj_numa(dev, ib_qp);
1208     if (!qp)
1209         return ERR_PTR(-ENOMEM);
1210 
1211     qp->device = dev;
1212     qp->pd = pd;
1213     qp->uobject = uobj;
1214     qp->real_qp = qp;
1215 
1216     qp->qp_type = attr->qp_type;
1217     qp->rwq_ind_tbl = attr->rwq_ind_tbl;
1218     qp->srq = attr->srq;
1219     qp->event_handler = attr->event_handler;
1220     qp->port = attr->port_num;
1221     qp->qp_context = attr->qp_context;
1222 
1223     spin_lock_init(&qp->mr_lock);
1224     INIT_LIST_HEAD(&qp->rdma_mrs);
1225     INIT_LIST_HEAD(&qp->sig_mrs);
1226 
1227     qp->send_cq = attr->send_cq;
1228     qp->recv_cq = attr->recv_cq;
1229 
1230     rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP);
1231     WARN_ONCE(!udata && !caller, "Missing kernel QP owner");
1232     rdma_restrack_set_name(&qp->res, udata ? NULL : caller);
1233     ret = dev->ops.create_qp(qp, attr, udata);
1234     if (ret)
1235         goto err_create;
1236 
1237     /*
1238      * TODO: The mlx4 internally overwrites send_cq and recv_cq.
1239      * Unfortunately, it is not an easy task to fix that driver.
1240      */
1241     qp->send_cq = attr->send_cq;
1242     qp->recv_cq = attr->recv_cq;
1243 
1244     ret = ib_create_qp_security(qp, dev);
1245     if (ret)
1246         goto err_security;
1247 
1248     rdma_restrack_add(&qp->res);
1249     return qp;
1250 
1251 err_security:
1252     qp->device->ops.destroy_qp(qp, udata ? &dummy : NULL);
1253 err_create:
1254     rdma_restrack_put(&qp->res);
1255     kfree(qp);
1256     return ERR_PTR(ret);
1257 
1258 }
1259 
1260 /**
1261  * ib_create_qp_user - Creates a QP associated with the specified protection
1262  *   domain.
1263  * @dev: IB device
1264  * @pd: The protection domain associated with the QP.
1265  * @attr: A list of initial attributes required to create the
1266  *   QP.  If QP creation succeeds, then the attributes are updated to
1267  *   the actual capabilities of the created QP.
1268  * @udata: User data
1269  * @uobj: uverbs obect
1270  * @caller: caller's build-time module name
1271  */
1272 struct ib_qp *ib_create_qp_user(struct ib_device *dev, struct ib_pd *pd,
1273                 struct ib_qp_init_attr *attr,
1274                 struct ib_udata *udata,
1275                 struct ib_uqp_object *uobj, const char *caller)
1276 {
1277     struct ib_qp *qp, *xrc_qp;
1278 
1279     if (attr->qp_type == IB_QPT_XRC_TGT)
1280         qp = create_qp(dev, pd, attr, NULL, NULL, caller);
1281     else
1282         qp = create_qp(dev, pd, attr, udata, uobj, NULL);
1283     if (attr->qp_type != IB_QPT_XRC_TGT || IS_ERR(qp))
1284         return qp;
1285 
1286     xrc_qp = create_xrc_qp_user(qp, attr);
1287     if (IS_ERR(xrc_qp)) {
1288         ib_destroy_qp(qp);
1289         return xrc_qp;
1290     }
1291 
1292     xrc_qp->uobject = uobj;
1293     return xrc_qp;
1294 }
1295 EXPORT_SYMBOL(ib_create_qp_user);
1296 
1297 void ib_qp_usecnt_inc(struct ib_qp *qp)
1298 {
1299     if (qp->pd)
1300         atomic_inc(&qp->pd->usecnt);
1301     if (qp->send_cq)
1302         atomic_inc(&qp->send_cq->usecnt);
1303     if (qp->recv_cq)
1304         atomic_inc(&qp->recv_cq->usecnt);
1305     if (qp->srq)
1306         atomic_inc(&qp->srq->usecnt);
1307     if (qp->rwq_ind_tbl)
1308         atomic_inc(&qp->rwq_ind_tbl->usecnt);
1309 }
1310 EXPORT_SYMBOL(ib_qp_usecnt_inc);
1311 
1312 void ib_qp_usecnt_dec(struct ib_qp *qp)
1313 {
1314     if (qp->rwq_ind_tbl)
1315         atomic_dec(&qp->rwq_ind_tbl->usecnt);
1316     if (qp->srq)
1317         atomic_dec(&qp->srq->usecnt);
1318     if (qp->recv_cq)
1319         atomic_dec(&qp->recv_cq->usecnt);
1320     if (qp->send_cq)
1321         atomic_dec(&qp->send_cq->usecnt);
1322     if (qp->pd)
1323         atomic_dec(&qp->pd->usecnt);
1324 }
1325 EXPORT_SYMBOL(ib_qp_usecnt_dec);
1326 
1327 struct ib_qp *ib_create_qp_kernel(struct ib_pd *pd,
1328                   struct ib_qp_init_attr *qp_init_attr,
1329                   const char *caller)
1330 {
1331     struct ib_device *device = pd->device;
1332     struct ib_qp *qp;
1333     int ret;
1334 
1335     /*
1336      * If the callers is using the RDMA API calculate the resources
1337      * needed for the RDMA READ/WRITE operations.
1338      *
1339      * Note that these callers need to pass in a port number.
1340      */
1341     if (qp_init_attr->cap.max_rdma_ctxs)
1342         rdma_rw_init_qp(device, qp_init_attr);
1343 
1344     qp = create_qp(device, pd, qp_init_attr, NULL, NULL, caller);
1345     if (IS_ERR(qp))
1346         return qp;
1347 
1348     ib_qp_usecnt_inc(qp);
1349 
1350     if (qp_init_attr->cap.max_rdma_ctxs) {
1351         ret = rdma_rw_init_mrs(qp, qp_init_attr);
1352         if (ret)
1353             goto err;
1354     }
1355 
1356     /*
1357      * Note: all hw drivers guarantee that max_send_sge is lower than
1358      * the device RDMA WRITE SGE limit but not all hw drivers ensure that
1359      * max_send_sge <= max_sge_rd.
1360      */
1361     qp->max_write_sge = qp_init_attr->cap.max_send_sge;
1362     qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge,
1363                  device->attrs.max_sge_rd);
1364     if (qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN)
1365         qp->integrity_en = true;
1366 
1367     return qp;
1368 
1369 err:
1370     ib_destroy_qp(qp);
1371     return ERR_PTR(ret);
1372 
1373 }
1374 EXPORT_SYMBOL(ib_create_qp_kernel);
1375 
1376 static const struct {
1377     int         valid;
1378     enum ib_qp_attr_mask    req_param[IB_QPT_MAX];
1379     enum ib_qp_attr_mask    opt_param[IB_QPT_MAX];
1380 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
1381     [IB_QPS_RESET] = {
1382         [IB_QPS_RESET] = { .valid = 1 },
1383         [IB_QPS_INIT]  = {
1384             .valid = 1,
1385             .req_param = {
1386                 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX        |
1387                         IB_QP_PORT          |
1388                         IB_QP_QKEY),
1389                 [IB_QPT_RAW_PACKET] = IB_QP_PORT,
1390                 [IB_QPT_UC]  = (IB_QP_PKEY_INDEX        |
1391                         IB_QP_PORT          |
1392                         IB_QP_ACCESS_FLAGS),
1393                 [IB_QPT_RC]  = (IB_QP_PKEY_INDEX        |
1394                         IB_QP_PORT          |
1395                         IB_QP_ACCESS_FLAGS),
1396                 [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX        |
1397                         IB_QP_PORT          |
1398                         IB_QP_ACCESS_FLAGS),
1399                 [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX        |
1400                         IB_QP_PORT          |
1401                         IB_QP_ACCESS_FLAGS),
1402                 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX        |
1403                         IB_QP_QKEY),
1404                 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX        |
1405                         IB_QP_QKEY),
1406             }
1407         },
1408     },
1409     [IB_QPS_INIT]  = {
1410         [IB_QPS_RESET] = { .valid = 1 },
1411         [IB_QPS_ERR] =   { .valid = 1 },
1412         [IB_QPS_INIT]  = {
1413             .valid = 1,
1414             .opt_param = {
1415                 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX        |
1416                         IB_QP_PORT          |
1417                         IB_QP_QKEY),
1418                 [IB_QPT_UC]  = (IB_QP_PKEY_INDEX        |
1419                         IB_QP_PORT          |
1420                         IB_QP_ACCESS_FLAGS),
1421                 [IB_QPT_RC]  = (IB_QP_PKEY_INDEX        |
1422                         IB_QP_PORT          |
1423                         IB_QP_ACCESS_FLAGS),
1424                 [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX        |
1425                         IB_QP_PORT          |
1426                         IB_QP_ACCESS_FLAGS),
1427                 [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX        |
1428                         IB_QP_PORT          |
1429                         IB_QP_ACCESS_FLAGS),
1430                 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX        |
1431                         IB_QP_QKEY),
1432                 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX        |
1433                         IB_QP_QKEY),
1434             }
1435         },
1436         [IB_QPS_RTR]   = {
1437             .valid = 1,
1438             .req_param = {
1439                 [IB_QPT_UC]  = (IB_QP_AV            |
1440                         IB_QP_PATH_MTU          |
1441                         IB_QP_DEST_QPN          |
1442                         IB_QP_RQ_PSN),
1443                 [IB_QPT_RC]  = (IB_QP_AV            |
1444                         IB_QP_PATH_MTU          |
1445                         IB_QP_DEST_QPN          |
1446                         IB_QP_RQ_PSN            |
1447                         IB_QP_MAX_DEST_RD_ATOMIC    |
1448                         IB_QP_MIN_RNR_TIMER),
1449                 [IB_QPT_XRC_INI] = (IB_QP_AV            |
1450                         IB_QP_PATH_MTU          |
1451                         IB_QP_DEST_QPN          |
1452                         IB_QP_RQ_PSN),
1453                 [IB_QPT_XRC_TGT] = (IB_QP_AV            |
1454                         IB_QP_PATH_MTU          |
1455                         IB_QP_DEST_QPN          |
1456                         IB_QP_RQ_PSN            |
1457                         IB_QP_MAX_DEST_RD_ATOMIC    |
1458                         IB_QP_MIN_RNR_TIMER),
1459             },
1460             .opt_param = {
1461                  [IB_QPT_UD]  = (IB_QP_PKEY_INDEX       |
1462                          IB_QP_QKEY),
1463                  [IB_QPT_UC]  = (IB_QP_ALT_PATH         |
1464                          IB_QP_ACCESS_FLAGS     |
1465                          IB_QP_PKEY_INDEX),
1466                  [IB_QPT_RC]  = (IB_QP_ALT_PATH         |
1467                          IB_QP_ACCESS_FLAGS     |
1468                          IB_QP_PKEY_INDEX),
1469                  [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH     |
1470                          IB_QP_ACCESS_FLAGS     |
1471                          IB_QP_PKEY_INDEX),
1472                  [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH     |
1473                          IB_QP_ACCESS_FLAGS     |
1474                          IB_QP_PKEY_INDEX),
1475                  [IB_QPT_SMI] = (IB_QP_PKEY_INDEX       |
1476                          IB_QP_QKEY),
1477                  [IB_QPT_GSI] = (IB_QP_PKEY_INDEX       |
1478                          IB_QP_QKEY),
1479              },
1480         },
1481     },
1482     [IB_QPS_RTR]   = {
1483         [IB_QPS_RESET] = { .valid = 1 },
1484         [IB_QPS_ERR] =   { .valid = 1 },
1485         [IB_QPS_RTS]   = {
1486             .valid = 1,
1487             .req_param = {
1488                 [IB_QPT_UD]  = IB_QP_SQ_PSN,
1489                 [IB_QPT_UC]  = IB_QP_SQ_PSN,
1490                 [IB_QPT_RC]  = (IB_QP_TIMEOUT           |
1491                         IB_QP_RETRY_CNT         |
1492                         IB_QP_RNR_RETRY         |
1493                         IB_QP_SQ_PSN            |
1494                         IB_QP_MAX_QP_RD_ATOMIC),
1495                 [IB_QPT_XRC_INI] = (IB_QP_TIMEOUT       |
1496                         IB_QP_RETRY_CNT         |
1497                         IB_QP_RNR_RETRY         |
1498                         IB_QP_SQ_PSN            |
1499                         IB_QP_MAX_QP_RD_ATOMIC),
1500                 [IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT       |
1501                         IB_QP_SQ_PSN),
1502                 [IB_QPT_SMI] = IB_QP_SQ_PSN,
1503                 [IB_QPT_GSI] = IB_QP_SQ_PSN,
1504             },
1505             .opt_param = {
1506                  [IB_QPT_UD]  = (IB_QP_CUR_STATE        |
1507                          IB_QP_QKEY),
1508                  [IB_QPT_UC]  = (IB_QP_CUR_STATE        |
1509                          IB_QP_ALT_PATH         |
1510                          IB_QP_ACCESS_FLAGS     |
1511                          IB_QP_PATH_MIG_STATE),
1512                  [IB_QPT_RC]  = (IB_QP_CUR_STATE        |
1513                          IB_QP_ALT_PATH         |
1514                          IB_QP_ACCESS_FLAGS     |
1515                          IB_QP_MIN_RNR_TIMER        |
1516                          IB_QP_PATH_MIG_STATE),
1517                  [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE        |
1518                          IB_QP_ALT_PATH         |
1519                          IB_QP_ACCESS_FLAGS     |
1520                          IB_QP_PATH_MIG_STATE),
1521                  [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE        |
1522                          IB_QP_ALT_PATH         |
1523                          IB_QP_ACCESS_FLAGS     |
1524                          IB_QP_MIN_RNR_TIMER        |
1525                          IB_QP_PATH_MIG_STATE),
1526                  [IB_QPT_SMI] = (IB_QP_CUR_STATE        |
1527                          IB_QP_QKEY),
1528                  [IB_QPT_GSI] = (IB_QP_CUR_STATE        |
1529                          IB_QP_QKEY),
1530                  [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT,
1531              }
1532         }
1533     },
1534     [IB_QPS_RTS]   = {
1535         [IB_QPS_RESET] = { .valid = 1 },
1536         [IB_QPS_ERR] =   { .valid = 1 },
1537         [IB_QPS_RTS]   = {
1538             .valid = 1,
1539             .opt_param = {
1540                 [IB_QPT_UD]  = (IB_QP_CUR_STATE         |
1541                         IB_QP_QKEY),
1542                 [IB_QPT_UC]  = (IB_QP_CUR_STATE         |
1543                         IB_QP_ACCESS_FLAGS      |
1544                         IB_QP_ALT_PATH          |
1545                         IB_QP_PATH_MIG_STATE),
1546                 [IB_QPT_RC]  = (IB_QP_CUR_STATE         |
1547                         IB_QP_ACCESS_FLAGS      |
1548                         IB_QP_ALT_PATH          |
1549                         IB_QP_PATH_MIG_STATE        |
1550                         IB_QP_MIN_RNR_TIMER),
1551                 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE     |
1552                         IB_QP_ACCESS_FLAGS      |
1553                         IB_QP_ALT_PATH          |
1554                         IB_QP_PATH_MIG_STATE),
1555                 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE     |
1556                         IB_QP_ACCESS_FLAGS      |
1557                         IB_QP_ALT_PATH          |
1558                         IB_QP_PATH_MIG_STATE        |
1559                         IB_QP_MIN_RNR_TIMER),
1560                 [IB_QPT_SMI] = (IB_QP_CUR_STATE         |
1561                         IB_QP_QKEY),
1562                 [IB_QPT_GSI] = (IB_QP_CUR_STATE         |
1563                         IB_QP_QKEY),
1564                 [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT,
1565             }
1566         },
1567         [IB_QPS_SQD]   = {
1568             .valid = 1,
1569             .opt_param = {
1570                 [IB_QPT_UD]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
1571                 [IB_QPT_UC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
1572                 [IB_QPT_RC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
1573                 [IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1574                 [IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */
1575                 [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1576                 [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
1577             }
1578         },
1579     },
1580     [IB_QPS_SQD]   = {
1581         [IB_QPS_RESET] = { .valid = 1 },
1582         [IB_QPS_ERR] =   { .valid = 1 },
1583         [IB_QPS_RTS]   = {
1584             .valid = 1,
1585             .opt_param = {
1586                 [IB_QPT_UD]  = (IB_QP_CUR_STATE         |
1587                         IB_QP_QKEY),
1588                 [IB_QPT_UC]  = (IB_QP_CUR_STATE         |
1589                         IB_QP_ALT_PATH          |
1590                         IB_QP_ACCESS_FLAGS      |
1591                         IB_QP_PATH_MIG_STATE),
1592                 [IB_QPT_RC]  = (IB_QP_CUR_STATE         |
1593                         IB_QP_ALT_PATH          |
1594                         IB_QP_ACCESS_FLAGS      |
1595                         IB_QP_MIN_RNR_TIMER     |
1596                         IB_QP_PATH_MIG_STATE),
1597                 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE     |
1598                         IB_QP_ALT_PATH          |
1599                         IB_QP_ACCESS_FLAGS      |
1600                         IB_QP_PATH_MIG_STATE),
1601                 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE     |
1602                         IB_QP_ALT_PATH          |
1603                         IB_QP_ACCESS_FLAGS      |
1604                         IB_QP_MIN_RNR_TIMER     |
1605                         IB_QP_PATH_MIG_STATE),
1606                 [IB_QPT_SMI] = (IB_QP_CUR_STATE         |
1607                         IB_QP_QKEY),
1608                 [IB_QPT_GSI] = (IB_QP_CUR_STATE         |
1609                         IB_QP_QKEY),
1610             }
1611         },
1612         [IB_QPS_SQD]   = {
1613             .valid = 1,
1614             .opt_param = {
1615                 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX        |
1616                         IB_QP_QKEY),
1617                 [IB_QPT_UC]  = (IB_QP_AV            |
1618                         IB_QP_ALT_PATH          |
1619                         IB_QP_ACCESS_FLAGS      |
1620                         IB_QP_PKEY_INDEX        |
1621                         IB_QP_PATH_MIG_STATE),
1622                 [IB_QPT_RC]  = (IB_QP_PORT          |
1623                         IB_QP_AV            |
1624                         IB_QP_TIMEOUT           |
1625                         IB_QP_RETRY_CNT         |
1626                         IB_QP_RNR_RETRY         |
1627                         IB_QP_MAX_QP_RD_ATOMIC      |
1628                         IB_QP_MAX_DEST_RD_ATOMIC    |
1629                         IB_QP_ALT_PATH          |
1630                         IB_QP_ACCESS_FLAGS      |
1631                         IB_QP_PKEY_INDEX        |
1632                         IB_QP_MIN_RNR_TIMER     |
1633                         IB_QP_PATH_MIG_STATE),
1634                 [IB_QPT_XRC_INI] = (IB_QP_PORT          |
1635                         IB_QP_AV            |
1636                         IB_QP_TIMEOUT           |
1637                         IB_QP_RETRY_CNT         |
1638                         IB_QP_RNR_RETRY         |
1639                         IB_QP_MAX_QP_RD_ATOMIC      |
1640                         IB_QP_ALT_PATH          |
1641                         IB_QP_ACCESS_FLAGS      |
1642                         IB_QP_PKEY_INDEX        |
1643                         IB_QP_PATH_MIG_STATE),
1644                 [IB_QPT_XRC_TGT] = (IB_QP_PORT          |
1645                         IB_QP_AV            |
1646                         IB_QP_TIMEOUT           |
1647                         IB_QP_MAX_DEST_RD_ATOMIC    |
1648                         IB_QP_ALT_PATH          |
1649                         IB_QP_ACCESS_FLAGS      |
1650                         IB_QP_PKEY_INDEX        |
1651                         IB_QP_MIN_RNR_TIMER     |
1652                         IB_QP_PATH_MIG_STATE),
1653                 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX        |
1654                         IB_QP_QKEY),
1655                 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX        |
1656                         IB_QP_QKEY),
1657             }
1658         }
1659     },
1660     [IB_QPS_SQE]   = {
1661         [IB_QPS_RESET] = { .valid = 1 },
1662         [IB_QPS_ERR] =   { .valid = 1 },
1663         [IB_QPS_RTS]   = {
1664             .valid = 1,
1665             .opt_param = {
1666                 [IB_QPT_UD]  = (IB_QP_CUR_STATE         |
1667                         IB_QP_QKEY),
1668                 [IB_QPT_UC]  = (IB_QP_CUR_STATE         |
1669                         IB_QP_ACCESS_FLAGS),
1670                 [IB_QPT_SMI] = (IB_QP_CUR_STATE         |
1671                         IB_QP_QKEY),
1672                 [IB_QPT_GSI] = (IB_QP_CUR_STATE         |
1673                         IB_QP_QKEY),
1674             }
1675         }
1676     },
1677     [IB_QPS_ERR] = {
1678         [IB_QPS_RESET] = { .valid = 1 },
1679         [IB_QPS_ERR] =   { .valid = 1 }
1680     }
1681 };
1682 
1683 bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
1684             enum ib_qp_type type, enum ib_qp_attr_mask mask)
1685 {
1686     enum ib_qp_attr_mask req_param, opt_param;
1687 
1688     if (mask & IB_QP_CUR_STATE  &&
1689         cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
1690         cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
1691         return false;
1692 
1693     if (!qp_state_table[cur_state][next_state].valid)
1694         return false;
1695 
1696     req_param = qp_state_table[cur_state][next_state].req_param[type];
1697     opt_param = qp_state_table[cur_state][next_state].opt_param[type];
1698 
1699     if ((mask & req_param) != req_param)
1700         return false;
1701 
1702     if (mask & ~(req_param | opt_param | IB_QP_STATE))
1703         return false;
1704 
1705     return true;
1706 }
1707 EXPORT_SYMBOL(ib_modify_qp_is_ok);
1708 
1709 /**
1710  * ib_resolve_eth_dmac - Resolve destination mac address
1711  * @device:     Device to consider
1712  * @ah_attr:        address handle attribute which describes the
1713  *          source and destination parameters
1714  * ib_resolve_eth_dmac() resolves destination mac address and L3 hop limit It
1715  * returns 0 on success or appropriate error code. It initializes the
1716  * necessary ah_attr fields when call is successful.
1717  */
1718 static int ib_resolve_eth_dmac(struct ib_device *device,
1719                    struct rdma_ah_attr *ah_attr)
1720 {
1721     int ret = 0;
1722 
1723     if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
1724         if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
1725             __be32 addr = 0;
1726 
1727             memcpy(&addr, ah_attr->grh.dgid.raw + 12, 4);
1728             ip_eth_mc_map(addr, (char *)ah_attr->roce.dmac);
1729         } else {
1730             ipv6_eth_mc_map((struct in6_addr *)ah_attr->grh.dgid.raw,
1731                     (char *)ah_attr->roce.dmac);
1732         }
1733     } else {
1734         ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
1735     }
1736     return ret;
1737 }
1738 
1739 static bool is_qp_type_connected(const struct ib_qp *qp)
1740 {
1741     return (qp->qp_type == IB_QPT_UC ||
1742         qp->qp_type == IB_QPT_RC ||
1743         qp->qp_type == IB_QPT_XRC_INI ||
1744         qp->qp_type == IB_QPT_XRC_TGT);
1745 }
1746 
1747 /*
1748  * IB core internal function to perform QP attributes modification.
1749  */
1750 static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
1751              int attr_mask, struct ib_udata *udata)
1752 {
1753     u32 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
1754     const struct ib_gid_attr *old_sgid_attr_av;
1755     const struct ib_gid_attr *old_sgid_attr_alt_av;
1756     int ret;
1757 
1758     attr->xmit_slave = NULL;
1759     if (attr_mask & IB_QP_AV) {
1760         ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr,
1761                       &old_sgid_attr_av);
1762         if (ret)
1763             return ret;
1764 
1765         if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
1766             is_qp_type_connected(qp)) {
1767             struct net_device *slave;
1768 
1769             /*
1770              * If the user provided the qp_attr then we have to
1771              * resolve it. Kerne users have to provide already
1772              * resolved rdma_ah_attr's.
1773              */
1774             if (udata) {
1775                 ret = ib_resolve_eth_dmac(qp->device,
1776                               &attr->ah_attr);
1777                 if (ret)
1778                     goto out_av;
1779             }
1780             slave = rdma_lag_get_ah_roce_slave(qp->device,
1781                                &attr->ah_attr,
1782                                GFP_KERNEL);
1783             if (IS_ERR(slave)) {
1784                 ret = PTR_ERR(slave);
1785                 goto out_av;
1786             }
1787             attr->xmit_slave = slave;
1788         }
1789     }
1790     if (attr_mask & IB_QP_ALT_PATH) {
1791         /*
1792          * FIXME: This does not track the migration state, so if the
1793          * user loads a new alternate path after the HW has migrated
1794          * from primary->alternate we will keep the wrong
1795          * references. This is OK for IB because the reference
1796          * counting does not serve any functional purpose.
1797          */
1798         ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr,
1799                       &old_sgid_attr_alt_av);
1800         if (ret)
1801             goto out_av;
1802 
1803         /*
1804          * Today the core code can only handle alternate paths and APM
1805          * for IB. Ban them in roce mode.
1806          */
1807         if (!(rdma_protocol_ib(qp->device,
1808                        attr->alt_ah_attr.port_num) &&
1809               rdma_protocol_ib(qp->device, port))) {
1810             ret = -EINVAL;
1811             goto out;
1812         }
1813     }
1814 
1815     if (rdma_ib_or_roce(qp->device, port)) {
1816         if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
1817             dev_warn(&qp->device->dev,
1818                  "%s rq_psn overflow, masking to 24 bits\n",
1819                  __func__);
1820             attr->rq_psn &= 0xffffff;
1821         }
1822 
1823         if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
1824             dev_warn(&qp->device->dev,
1825                  " %s sq_psn overflow, masking to 24 bits\n",
1826                  __func__);
1827             attr->sq_psn &= 0xffffff;
1828         }
1829     }
1830 
1831     /*
1832      * Bind this qp to a counter automatically based on the rdma counter
1833      * rules. This only set in RST2INIT with port specified
1834      */
1835     if (!qp->counter && (attr_mask & IB_QP_PORT) &&
1836         ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT))
1837         rdma_counter_bind_qp_auto(qp, attr->port_num);
1838 
1839     ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
1840     if (ret)
1841         goto out;
1842 
1843     if (attr_mask & IB_QP_PORT)
1844         qp->port = attr->port_num;
1845     if (attr_mask & IB_QP_AV)
1846         qp->av_sgid_attr =
1847             rdma_update_sgid_attr(&attr->ah_attr, qp->av_sgid_attr);
1848     if (attr_mask & IB_QP_ALT_PATH)
1849         qp->alt_path_sgid_attr = rdma_update_sgid_attr(
1850             &attr->alt_ah_attr, qp->alt_path_sgid_attr);
1851 
1852 out:
1853     if (attr_mask & IB_QP_ALT_PATH)
1854         rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av);
1855 out_av:
1856     if (attr_mask & IB_QP_AV) {
1857         rdma_lag_put_ah_roce_slave(attr->xmit_slave);
1858         rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av);
1859     }
1860     return ret;
1861 }
1862 
1863 /**
1864  * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
1865  * @ib_qp: The QP to modify.
1866  * @attr: On input, specifies the QP attributes to modify.  On output,
1867  *   the current values of selected QP attributes are returned.
1868  * @attr_mask: A bit-mask used to specify which attributes of the QP
1869  *   are being modified.
1870  * @udata: pointer to user's input output buffer information
1871  *   are being modified.
1872  * It returns 0 on success and returns appropriate error code on error.
1873  */
1874 int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
1875                 int attr_mask, struct ib_udata *udata)
1876 {
1877     return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata);
1878 }
1879 EXPORT_SYMBOL(ib_modify_qp_with_udata);
1880 
1881 int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
1882 {
1883     int rc;
1884     u32 netdev_speed;
1885     struct net_device *netdev;
1886     struct ethtool_link_ksettings lksettings;
1887 
1888     if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
1889         return -EINVAL;
1890 
1891     netdev = ib_device_get_netdev(dev, port_num);
1892     if (!netdev)
1893         return -ENODEV;
1894 
1895     rtnl_lock();
1896     rc = __ethtool_get_link_ksettings(netdev, &lksettings);
1897     rtnl_unlock();
1898 
1899     dev_put(netdev);
1900 
1901     if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) {
1902         netdev_speed = lksettings.base.speed;
1903     } else {
1904         netdev_speed = SPEED_1000;
1905         pr_warn("%s speed is unknown, defaulting to %u\n", netdev->name,
1906             netdev_speed);
1907     }
1908 
1909     if (netdev_speed <= SPEED_1000) {
1910         *width = IB_WIDTH_1X;
1911         *speed = IB_SPEED_SDR;
1912     } else if (netdev_speed <= SPEED_10000) {
1913         *width = IB_WIDTH_1X;
1914         *speed = IB_SPEED_FDR10;
1915     } else if (netdev_speed <= SPEED_20000) {
1916         *width = IB_WIDTH_4X;
1917         *speed = IB_SPEED_DDR;
1918     } else if (netdev_speed <= SPEED_25000) {
1919         *width = IB_WIDTH_1X;
1920         *speed = IB_SPEED_EDR;
1921     } else if (netdev_speed <= SPEED_40000) {
1922         *width = IB_WIDTH_4X;
1923         *speed = IB_SPEED_FDR10;
1924     } else {
1925         *width = IB_WIDTH_4X;
1926         *speed = IB_SPEED_EDR;
1927     }
1928 
1929     return 0;
1930 }
1931 EXPORT_SYMBOL(ib_get_eth_speed);
1932 
1933 int ib_modify_qp(struct ib_qp *qp,
1934          struct ib_qp_attr *qp_attr,
1935          int qp_attr_mask)
1936 {
1937     return _ib_modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
1938 }
1939 EXPORT_SYMBOL(ib_modify_qp);
1940 
1941 int ib_query_qp(struct ib_qp *qp,
1942         struct ib_qp_attr *qp_attr,
1943         int qp_attr_mask,
1944         struct ib_qp_init_attr *qp_init_attr)
1945 {
1946     qp_attr->ah_attr.grh.sgid_attr = NULL;
1947     qp_attr->alt_ah_attr.grh.sgid_attr = NULL;
1948 
1949     return qp->device->ops.query_qp ?
1950         qp->device->ops.query_qp(qp->real_qp, qp_attr, qp_attr_mask,
1951                      qp_init_attr) : -EOPNOTSUPP;
1952 }
1953 EXPORT_SYMBOL(ib_query_qp);
1954 
1955 int ib_close_qp(struct ib_qp *qp)
1956 {
1957     struct ib_qp *real_qp;
1958     unsigned long flags;
1959 
1960     real_qp = qp->real_qp;
1961     if (real_qp == qp)
1962         return -EINVAL;
1963 
1964     spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags);
1965     list_del(&qp->open_list);
1966     spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags);
1967 
1968     atomic_dec(&real_qp->usecnt);
1969     if (qp->qp_sec)
1970         ib_close_shared_qp_security(qp->qp_sec);
1971     kfree(qp);
1972 
1973     return 0;
1974 }
1975 EXPORT_SYMBOL(ib_close_qp);
1976 
1977 static int __ib_destroy_shared_qp(struct ib_qp *qp)
1978 {
1979     struct ib_xrcd *xrcd;
1980     struct ib_qp *real_qp;
1981     int ret;
1982 
1983     real_qp = qp->real_qp;
1984     xrcd = real_qp->xrcd;
1985     down_write(&xrcd->tgt_qps_rwsem);
1986     ib_close_qp(qp);
1987     if (atomic_read(&real_qp->usecnt) == 0)
1988         xa_erase(&xrcd->tgt_qps, real_qp->qp_num);
1989     else
1990         real_qp = NULL;
1991     up_write(&xrcd->tgt_qps_rwsem);
1992 
1993     if (real_qp) {
1994         ret = ib_destroy_qp(real_qp);
1995         if (!ret)
1996             atomic_dec(&xrcd->usecnt);
1997     }
1998 
1999     return 0;
2000 }
2001 
2002 int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
2003 {
2004     const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr;
2005     const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr;
2006     struct ib_qp_security *sec;
2007     int ret;
2008 
2009     WARN_ON_ONCE(qp->mrs_used > 0);
2010 
2011     if (atomic_read(&qp->usecnt))
2012         return -EBUSY;
2013 
2014     if (qp->real_qp != qp)
2015         return __ib_destroy_shared_qp(qp);
2016 
2017     sec  = qp->qp_sec;
2018     if (sec)
2019         ib_destroy_qp_security_begin(sec);
2020 
2021     if (!qp->uobject)
2022         rdma_rw_cleanup_mrs(qp);
2023 
2024     rdma_counter_unbind_qp(qp, true);
2025     ret = qp->device->ops.destroy_qp(qp, udata);
2026     if (ret) {
2027         if (sec)
2028             ib_destroy_qp_security_abort(sec);
2029         return ret;
2030     }
2031 
2032     if (alt_path_sgid_attr)
2033         rdma_put_gid_attr(alt_path_sgid_attr);
2034     if (av_sgid_attr)
2035         rdma_put_gid_attr(av_sgid_attr);
2036 
2037     ib_qp_usecnt_dec(qp);
2038     if (sec)
2039         ib_destroy_qp_security_end(sec);
2040 
2041     rdma_restrack_del(&qp->res);
2042     kfree(qp);
2043     return ret;
2044 }
2045 EXPORT_SYMBOL(ib_destroy_qp_user);
2046 
2047 /* Completion queues */
2048 
2049 struct ib_cq *__ib_create_cq(struct ib_device *device,
2050                  ib_comp_handler comp_handler,
2051                  void (*event_handler)(struct ib_event *, void *),
2052                  void *cq_context,
2053                  const struct ib_cq_init_attr *cq_attr,
2054                  const char *caller)
2055 {
2056     struct ib_cq *cq;
2057     int ret;
2058 
2059     cq = rdma_zalloc_drv_obj(device, ib_cq);
2060     if (!cq)
2061         return ERR_PTR(-ENOMEM);
2062 
2063     cq->device = device;
2064     cq->uobject = NULL;
2065     cq->comp_handler = comp_handler;
2066     cq->event_handler = event_handler;
2067     cq->cq_context = cq_context;
2068     atomic_set(&cq->usecnt, 0);
2069 
2070     rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
2071     rdma_restrack_set_name(&cq->res, caller);
2072 
2073     ret = device->ops.create_cq(cq, cq_attr, NULL);
2074     if (ret) {
2075         rdma_restrack_put(&cq->res);
2076         kfree(cq);
2077         return ERR_PTR(ret);
2078     }
2079 
2080     rdma_restrack_add(&cq->res);
2081     return cq;
2082 }
2083 EXPORT_SYMBOL(__ib_create_cq);
2084 
2085 int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period)
2086 {
2087     if (cq->shared)
2088         return -EOPNOTSUPP;
2089 
2090     return cq->device->ops.modify_cq ?
2091         cq->device->ops.modify_cq(cq, cq_count,
2092                       cq_period) : -EOPNOTSUPP;
2093 }
2094 EXPORT_SYMBOL(rdma_set_cq_moderation);
2095 
2096 int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata)
2097 {
2098     int ret;
2099 
2100     if (WARN_ON_ONCE(cq->shared))
2101         return -EOPNOTSUPP;
2102 
2103     if (atomic_read(&cq->usecnt))
2104         return -EBUSY;
2105 
2106     ret = cq->device->ops.destroy_cq(cq, udata);
2107     if (ret)
2108         return ret;
2109 
2110     rdma_restrack_del(&cq->res);
2111     kfree(cq);
2112     return ret;
2113 }
2114 EXPORT_SYMBOL(ib_destroy_cq_user);
2115 
2116 int ib_resize_cq(struct ib_cq *cq, int cqe)
2117 {
2118     if (cq->shared)
2119         return -EOPNOTSUPP;
2120 
2121     return cq->device->ops.resize_cq ?
2122         cq->device->ops.resize_cq(cq, cqe, NULL) : -EOPNOTSUPP;
2123 }
2124 EXPORT_SYMBOL(ib_resize_cq);
2125 
2126 /* Memory regions */
2127 
2128 struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
2129                  u64 virt_addr, int access_flags)
2130 {
2131     struct ib_mr *mr;
2132 
2133     if (access_flags & IB_ACCESS_ON_DEMAND) {
2134         if (!(pd->device->attrs.kernel_cap_flags &
2135               IBK_ON_DEMAND_PAGING)) {
2136             pr_debug("ODP support not available\n");
2137             return ERR_PTR(-EINVAL);
2138         }
2139     }
2140 
2141     mr = pd->device->ops.reg_user_mr(pd, start, length, virt_addr,
2142                      access_flags, NULL);
2143 
2144     if (IS_ERR(mr))
2145         return mr;
2146 
2147     mr->device = pd->device;
2148     mr->type = IB_MR_TYPE_USER;
2149     mr->pd = pd;
2150     mr->dm = NULL;
2151     atomic_inc(&pd->usecnt);
2152 
2153     rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
2154     rdma_restrack_parent_name(&mr->res, &pd->res);
2155     rdma_restrack_add(&mr->res);
2156 
2157     return mr;
2158 }
2159 EXPORT_SYMBOL(ib_reg_user_mr);
2160 
2161 int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
2162          u32 flags, struct ib_sge *sg_list, u32 num_sge)
2163 {
2164     if (!pd->device->ops.advise_mr)
2165         return -EOPNOTSUPP;
2166 
2167     if (!num_sge)
2168         return 0;
2169 
2170     return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge,
2171                      NULL);
2172 }
2173 EXPORT_SYMBOL(ib_advise_mr);
2174 
2175 int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
2176 {
2177     struct ib_pd *pd = mr->pd;
2178     struct ib_dm *dm = mr->dm;
2179     struct ib_sig_attrs *sig_attrs = mr->sig_attrs;
2180     int ret;
2181 
2182     trace_mr_dereg(mr);
2183     rdma_restrack_del(&mr->res);
2184     ret = mr->device->ops.dereg_mr(mr, udata);
2185     if (!ret) {
2186         atomic_dec(&pd->usecnt);
2187         if (dm)
2188             atomic_dec(&dm->usecnt);
2189         kfree(sig_attrs);
2190     }
2191 
2192     return ret;
2193 }
2194 EXPORT_SYMBOL(ib_dereg_mr_user);
2195 
2196 /**
2197  * ib_alloc_mr() - Allocates a memory region
2198  * @pd:            protection domain associated with the region
2199  * @mr_type:       memory region type
2200  * @max_num_sg:    maximum sg entries available for registration.
2201  *
2202  * Notes:
2203  * Memory registeration page/sg lists must not exceed max_num_sg.
2204  * For mr_type IB_MR_TYPE_MEM_REG, the total length cannot exceed
2205  * max_num_sg * used_page_size.
2206  *
2207  */
2208 struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
2209               u32 max_num_sg)
2210 {
2211     struct ib_mr *mr;
2212 
2213     if (!pd->device->ops.alloc_mr) {
2214         mr = ERR_PTR(-EOPNOTSUPP);
2215         goto out;
2216     }
2217 
2218     if (mr_type == IB_MR_TYPE_INTEGRITY) {
2219         WARN_ON_ONCE(1);
2220         mr = ERR_PTR(-EINVAL);
2221         goto out;
2222     }
2223 
2224     mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg);
2225     if (IS_ERR(mr))
2226         goto out;
2227 
2228     mr->device = pd->device;
2229     mr->pd = pd;
2230     mr->dm = NULL;
2231     mr->uobject = NULL;
2232     atomic_inc(&pd->usecnt);
2233     mr->need_inval = false;
2234     mr->type = mr_type;
2235     mr->sig_attrs = NULL;
2236 
2237     rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
2238     rdma_restrack_parent_name(&mr->res, &pd->res);
2239     rdma_restrack_add(&mr->res);
2240 out:
2241     trace_mr_alloc(pd, mr_type, max_num_sg, mr);
2242     return mr;
2243 }
2244 EXPORT_SYMBOL(ib_alloc_mr);
2245 
2246 /**
2247  * ib_alloc_mr_integrity() - Allocates an integrity memory region
2248  * @pd:                      protection domain associated with the region
2249  * @max_num_data_sg:         maximum data sg entries available for registration
2250  * @max_num_meta_sg:         maximum metadata sg entries available for
2251  *                           registration
2252  *
2253  * Notes:
2254  * Memory registration page/sg lists must not exceed max_num_sg,
2255  * also the integrity page/sg lists must not exceed max_num_meta_sg.
2256  *
2257  */
2258 struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
2259                     u32 max_num_data_sg,
2260                     u32 max_num_meta_sg)
2261 {
2262     struct ib_mr *mr;
2263     struct ib_sig_attrs *sig_attrs;
2264 
2265     if (!pd->device->ops.alloc_mr_integrity ||
2266         !pd->device->ops.map_mr_sg_pi) {
2267         mr = ERR_PTR(-EOPNOTSUPP);
2268         goto out;
2269     }
2270 
2271     if (!max_num_meta_sg) {
2272         mr = ERR_PTR(-EINVAL);
2273         goto out;
2274     }
2275 
2276     sig_attrs = kzalloc(sizeof(struct ib_sig_attrs), GFP_KERNEL);
2277     if (!sig_attrs) {
2278         mr = ERR_PTR(-ENOMEM);
2279         goto out;
2280     }
2281 
2282     mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg,
2283                         max_num_meta_sg);
2284     if (IS_ERR(mr)) {
2285         kfree(sig_attrs);
2286         goto out;
2287     }
2288 
2289     mr->device = pd->device;
2290     mr->pd = pd;
2291     mr->dm = NULL;
2292     mr->uobject = NULL;
2293     atomic_inc(&pd->usecnt);
2294     mr->need_inval = false;
2295     mr->type = IB_MR_TYPE_INTEGRITY;
2296     mr->sig_attrs = sig_attrs;
2297 
2298     rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
2299     rdma_restrack_parent_name(&mr->res, &pd->res);
2300     rdma_restrack_add(&mr->res);
2301 out:
2302     trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr);
2303     return mr;
2304 }
2305 EXPORT_SYMBOL(ib_alloc_mr_integrity);
2306 
2307 /* Multicast groups */
2308 
2309 static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
2310 {
2311     struct ib_qp_init_attr init_attr = {};
2312     struct ib_qp_attr attr = {};
2313     int num_eth_ports = 0;
2314     unsigned int port;
2315 
2316     /* If QP state >= init, it is assigned to a port and we can check this
2317      * port only.
2318      */
2319     if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) {
2320         if (attr.qp_state >= IB_QPS_INIT) {
2321             if (rdma_port_get_link_layer(qp->device, attr.port_num) !=
2322                 IB_LINK_LAYER_INFINIBAND)
2323                 return true;
2324             goto lid_check;
2325         }
2326     }
2327 
2328     /* Can't get a quick answer, iterate over all ports */
2329     rdma_for_each_port(qp->device, port)
2330         if (rdma_port_get_link_layer(qp->device, port) !=
2331             IB_LINK_LAYER_INFINIBAND)
2332             num_eth_ports++;
2333 
2334     /* If we have at lease one Ethernet port, RoCE annex declares that
2335      * multicast LID should be ignored. We can't tell at this step if the
2336      * QP belongs to an IB or Ethernet port.
2337      */
2338     if (num_eth_ports)
2339         return true;
2340 
2341     /* If all the ports are IB, we can check according to IB spec. */
2342 lid_check:
2343     return !(lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
2344          lid == be16_to_cpu(IB_LID_PERMISSIVE));
2345 }
2346 
2347 int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
2348 {
2349     int ret;
2350 
2351     if (!qp->device->ops.attach_mcast)
2352         return -EOPNOTSUPP;
2353 
2354     if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
2355         qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
2356         return -EINVAL;
2357 
2358     ret = qp->device->ops.attach_mcast(qp, gid, lid);
2359     if (!ret)
2360         atomic_inc(&qp->usecnt);
2361     return ret;
2362 }
2363 EXPORT_SYMBOL(ib_attach_mcast);
2364 
2365 int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
2366 {
2367     int ret;
2368 
2369     if (!qp->device->ops.detach_mcast)
2370         return -EOPNOTSUPP;
2371 
2372     if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
2373         qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
2374         return -EINVAL;
2375 
2376     ret = qp->device->ops.detach_mcast(qp, gid, lid);
2377     if (!ret)
2378         atomic_dec(&qp->usecnt);
2379     return ret;
2380 }
2381 EXPORT_SYMBOL(ib_detach_mcast);
2382 
2383 /**
2384  * ib_alloc_xrcd_user - Allocates an XRC domain.
2385  * @device: The device on which to allocate the XRC domain.
2386  * @inode: inode to connect XRCD
2387  * @udata: Valid user data or NULL for kernel object
2388  */
2389 struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device,
2390                    struct inode *inode, struct ib_udata *udata)
2391 {
2392     struct ib_xrcd *xrcd;
2393     int ret;
2394 
2395     if (!device->ops.alloc_xrcd)
2396         return ERR_PTR(-EOPNOTSUPP);
2397 
2398     xrcd = rdma_zalloc_drv_obj(device, ib_xrcd);
2399     if (!xrcd)
2400         return ERR_PTR(-ENOMEM);
2401 
2402     xrcd->device = device;
2403     xrcd->inode = inode;
2404     atomic_set(&xrcd->usecnt, 0);
2405     init_rwsem(&xrcd->tgt_qps_rwsem);
2406     xa_init(&xrcd->tgt_qps);
2407 
2408     ret = device->ops.alloc_xrcd(xrcd, udata);
2409     if (ret)
2410         goto err;
2411     return xrcd;
2412 err:
2413     kfree(xrcd);
2414     return ERR_PTR(ret);
2415 }
2416 EXPORT_SYMBOL(ib_alloc_xrcd_user);
2417 
2418 /**
2419  * ib_dealloc_xrcd_user - Deallocates an XRC domain.
2420  * @xrcd: The XRC domain to deallocate.
2421  * @udata: Valid user data or NULL for kernel object
2422  */
2423 int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata)
2424 {
2425     int ret;
2426 
2427     if (atomic_read(&xrcd->usecnt))
2428         return -EBUSY;
2429 
2430     WARN_ON(!xa_empty(&xrcd->tgt_qps));
2431     ret = xrcd->device->ops.dealloc_xrcd(xrcd, udata);
2432     if (ret)
2433         return ret;
2434     kfree(xrcd);
2435     return ret;
2436 }
2437 EXPORT_SYMBOL(ib_dealloc_xrcd_user);
2438 
2439 /**
2440  * ib_create_wq - Creates a WQ associated with the specified protection
2441  * domain.
2442  * @pd: The protection domain associated with the WQ.
2443  * @wq_attr: A list of initial attributes required to create the
2444  * WQ. If WQ creation succeeds, then the attributes are updated to
2445  * the actual capabilities of the created WQ.
2446  *
2447  * wq_attr->max_wr and wq_attr->max_sge determine
2448  * the requested size of the WQ, and set to the actual values allocated
2449  * on return.
2450  * If ib_create_wq() succeeds, then max_wr and max_sge will always be
2451  * at least as large as the requested values.
2452  */
2453 struct ib_wq *ib_create_wq(struct ib_pd *pd,
2454                struct ib_wq_init_attr *wq_attr)
2455 {
2456     struct ib_wq *wq;
2457 
2458     if (!pd->device->ops.create_wq)
2459         return ERR_PTR(-EOPNOTSUPP);
2460 
2461     wq = pd->device->ops.create_wq(pd, wq_attr, NULL);
2462     if (!IS_ERR(wq)) {
2463         wq->event_handler = wq_attr->event_handler;
2464         wq->wq_context = wq_attr->wq_context;
2465         wq->wq_type = wq_attr->wq_type;
2466         wq->cq = wq_attr->cq;
2467         wq->device = pd->device;
2468         wq->pd = pd;
2469         wq->uobject = NULL;
2470         atomic_inc(&pd->usecnt);
2471         atomic_inc(&wq_attr->cq->usecnt);
2472         atomic_set(&wq->usecnt, 0);
2473     }
2474     return wq;
2475 }
2476 EXPORT_SYMBOL(ib_create_wq);
2477 
2478 /**
2479  * ib_destroy_wq_user - Destroys the specified user WQ.
2480  * @wq: The WQ to destroy.
2481  * @udata: Valid user data
2482  */
2483 int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata)
2484 {
2485     struct ib_cq *cq = wq->cq;
2486     struct ib_pd *pd = wq->pd;
2487     int ret;
2488 
2489     if (atomic_read(&wq->usecnt))
2490         return -EBUSY;
2491 
2492     ret = wq->device->ops.destroy_wq(wq, udata);
2493     if (ret)
2494         return ret;
2495 
2496     atomic_dec(&pd->usecnt);
2497     atomic_dec(&cq->usecnt);
2498     return ret;
2499 }
2500 EXPORT_SYMBOL(ib_destroy_wq_user);
2501 
2502 int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
2503                struct ib_mr_status *mr_status)
2504 {
2505     if (!mr->device->ops.check_mr_status)
2506         return -EOPNOTSUPP;
2507 
2508     return mr->device->ops.check_mr_status(mr, check_mask, mr_status);
2509 }
2510 EXPORT_SYMBOL(ib_check_mr_status);
2511 
2512 int ib_set_vf_link_state(struct ib_device *device, int vf, u32 port,
2513              int state)
2514 {
2515     if (!device->ops.set_vf_link_state)
2516         return -EOPNOTSUPP;
2517 
2518     return device->ops.set_vf_link_state(device, vf, port, state);
2519 }
2520 EXPORT_SYMBOL(ib_set_vf_link_state);
2521 
2522 int ib_get_vf_config(struct ib_device *device, int vf, u32 port,
2523              struct ifla_vf_info *info)
2524 {
2525     if (!device->ops.get_vf_config)
2526         return -EOPNOTSUPP;
2527 
2528     return device->ops.get_vf_config(device, vf, port, info);
2529 }
2530 EXPORT_SYMBOL(ib_get_vf_config);
2531 
2532 int ib_get_vf_stats(struct ib_device *device, int vf, u32 port,
2533             struct ifla_vf_stats *stats)
2534 {
2535     if (!device->ops.get_vf_stats)
2536         return -EOPNOTSUPP;
2537 
2538     return device->ops.get_vf_stats(device, vf, port, stats);
2539 }
2540 EXPORT_SYMBOL(ib_get_vf_stats);
2541 
2542 int ib_set_vf_guid(struct ib_device *device, int vf, u32 port, u64 guid,
2543            int type)
2544 {
2545     if (!device->ops.set_vf_guid)
2546         return -EOPNOTSUPP;
2547 
2548     return device->ops.set_vf_guid(device, vf, port, guid, type);
2549 }
2550 EXPORT_SYMBOL(ib_set_vf_guid);
2551 
2552 int ib_get_vf_guid(struct ib_device *device, int vf, u32 port,
2553            struct ifla_vf_guid *node_guid,
2554            struct ifla_vf_guid *port_guid)
2555 {
2556     if (!device->ops.get_vf_guid)
2557         return -EOPNOTSUPP;
2558 
2559     return device->ops.get_vf_guid(device, vf, port, node_guid, port_guid);
2560 }
2561 EXPORT_SYMBOL(ib_get_vf_guid);
2562 /**
2563  * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection
2564  *     information) and set an appropriate memory region for registration.
2565  * @mr:             memory region
2566  * @data_sg:        dma mapped scatterlist for data
2567  * @data_sg_nents:  number of entries in data_sg
2568  * @data_sg_offset: offset in bytes into data_sg
2569  * @meta_sg:        dma mapped scatterlist for metadata
2570  * @meta_sg_nents:  number of entries in meta_sg
2571  * @meta_sg_offset: offset in bytes into meta_sg
2572  * @page_size:      page vector desired page size
2573  *
2574  * Constraints:
2575  * - The MR must be allocated with type IB_MR_TYPE_INTEGRITY.
2576  *
2577  * Return: 0 on success.
2578  *
2579  * After this completes successfully, the  memory region
2580  * is ready for registration.
2581  */
2582 int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg,
2583             int data_sg_nents, unsigned int *data_sg_offset,
2584             struct scatterlist *meta_sg, int meta_sg_nents,
2585             unsigned int *meta_sg_offset, unsigned int page_size)
2586 {
2587     if (unlikely(!mr->device->ops.map_mr_sg_pi ||
2588              WARN_ON_ONCE(mr->type != IB_MR_TYPE_INTEGRITY)))
2589         return -EOPNOTSUPP;
2590 
2591     mr->page_size = page_size;
2592 
2593     return mr->device->ops.map_mr_sg_pi(mr, data_sg, data_sg_nents,
2594                         data_sg_offset, meta_sg,
2595                         meta_sg_nents, meta_sg_offset);
2596 }
2597 EXPORT_SYMBOL(ib_map_mr_sg_pi);
2598 
2599 /**
2600  * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
2601  *     and set it the memory region.
2602  * @mr:            memory region
2603  * @sg:            dma mapped scatterlist
2604  * @sg_nents:      number of entries in sg
2605  * @sg_offset:     offset in bytes into sg
2606  * @page_size:     page vector desired page size
2607  *
2608  * Constraints:
2609  *
2610  * - The first sg element is allowed to have an offset.
2611  * - Each sg element must either be aligned to page_size or virtually
2612  *   contiguous to the previous element. In case an sg element has a
2613  *   non-contiguous offset, the mapping prefix will not include it.
2614  * - The last sg element is allowed to have length less than page_size.
2615  * - If sg_nents total byte length exceeds the mr max_num_sge * page_size
2616  *   then only max_num_sg entries will be mapped.
2617  * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS, none of these
2618  *   constraints holds and the page_size argument is ignored.
2619  *
2620  * Returns the number of sg elements that were mapped to the memory region.
2621  *
2622  * After this completes successfully, the  memory region
2623  * is ready for registration.
2624  */
2625 int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
2626          unsigned int *sg_offset, unsigned int page_size)
2627 {
2628     if (unlikely(!mr->device->ops.map_mr_sg))
2629         return -EOPNOTSUPP;
2630 
2631     mr->page_size = page_size;
2632 
2633     return mr->device->ops.map_mr_sg(mr, sg, sg_nents, sg_offset);
2634 }
2635 EXPORT_SYMBOL(ib_map_mr_sg);
2636 
2637 /**
2638  * ib_sg_to_pages() - Convert the largest prefix of a sg list
2639  *     to a page vector
2640  * @mr:            memory region
2641  * @sgl:           dma mapped scatterlist
2642  * @sg_nents:      number of entries in sg
2643  * @sg_offset_p:   ==== =======================================================
2644  *                 IN   start offset in bytes into sg
2645  *                 OUT  offset in bytes for element n of the sg of the first
2646  *                      byte that has not been processed where n is the return
2647  *                      value of this function.
2648  *                 ==== =======================================================
2649  * @set_page:      driver page assignment function pointer
2650  *
2651  * Core service helper for drivers to convert the largest
2652  * prefix of given sg list to a page vector. The sg list
2653  * prefix converted is the prefix that meet the requirements
2654  * of ib_map_mr_sg.
2655  *
2656  * Returns the number of sg elements that were assigned to
2657  * a page vector.
2658  */
2659 int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents,
2660         unsigned int *sg_offset_p, int (*set_page)(struct ib_mr *, u64))
2661 {
2662     struct scatterlist *sg;
2663     u64 last_end_dma_addr = 0;
2664     unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
2665     unsigned int last_page_off = 0;
2666     u64 page_mask = ~((u64)mr->page_size - 1);
2667     int i, ret;
2668 
2669     if (unlikely(sg_nents <= 0 || sg_offset > sg_dma_len(&sgl[0])))
2670         return -EINVAL;
2671 
2672     mr->iova = sg_dma_address(&sgl[0]) + sg_offset;
2673     mr->length = 0;
2674 
2675     for_each_sg(sgl, sg, sg_nents, i) {
2676         u64 dma_addr = sg_dma_address(sg) + sg_offset;
2677         u64 prev_addr = dma_addr;
2678         unsigned int dma_len = sg_dma_len(sg) - sg_offset;
2679         u64 end_dma_addr = dma_addr + dma_len;
2680         u64 page_addr = dma_addr & page_mask;
2681 
2682         /*
2683          * For the second and later elements, check whether either the
2684          * end of element i-1 or the start of element i is not aligned
2685          * on a page boundary.
2686          */
2687         if (i && (last_page_off != 0 || page_addr != dma_addr)) {
2688             /* Stop mapping if there is a gap. */
2689             if (last_end_dma_addr != dma_addr)
2690                 break;
2691 
2692             /*
2693              * Coalesce this element with the last. If it is small
2694              * enough just update mr->length. Otherwise start
2695              * mapping from the next page.
2696              */
2697             goto next_page;
2698         }
2699 
2700         do {
2701             ret = set_page(mr, page_addr);
2702             if (unlikely(ret < 0)) {
2703                 sg_offset = prev_addr - sg_dma_address(sg);
2704                 mr->length += prev_addr - dma_addr;
2705                 if (sg_offset_p)
2706                     *sg_offset_p = sg_offset;
2707                 return i || sg_offset ? i : ret;
2708             }
2709             prev_addr = page_addr;
2710 next_page:
2711             page_addr += mr->page_size;
2712         } while (page_addr < end_dma_addr);
2713 
2714         mr->length += dma_len;
2715         last_end_dma_addr = end_dma_addr;
2716         last_page_off = end_dma_addr & ~page_mask;
2717 
2718         sg_offset = 0;
2719     }
2720 
2721     if (sg_offset_p)
2722         *sg_offset_p = 0;
2723     return i;
2724 }
2725 EXPORT_SYMBOL(ib_sg_to_pages);
2726 
2727 struct ib_drain_cqe {
2728     struct ib_cqe cqe;
2729     struct completion done;
2730 };
2731 
2732 static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
2733 {
2734     struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe,
2735                         cqe);
2736 
2737     complete(&cqe->done);
2738 }
2739 
2740 /*
2741  * Post a WR and block until its completion is reaped for the SQ.
2742  */
2743 static void __ib_drain_sq(struct ib_qp *qp)
2744 {
2745     struct ib_cq *cq = qp->send_cq;
2746     struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
2747     struct ib_drain_cqe sdrain;
2748     struct ib_rdma_wr swr = {
2749         .wr = {
2750             .next = NULL,
2751             { .wr_cqe   = &sdrain.cqe, },
2752             .opcode = IB_WR_RDMA_WRITE,
2753         },
2754     };
2755     int ret;
2756 
2757     ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
2758     if (ret) {
2759         WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
2760         return;
2761     }
2762 
2763     sdrain.cqe.done = ib_drain_qp_done;
2764     init_completion(&sdrain.done);
2765 
2766     ret = ib_post_send(qp, &swr.wr, NULL);
2767     if (ret) {
2768         WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
2769         return;
2770     }
2771 
2772     if (cq->poll_ctx == IB_POLL_DIRECT)
2773         while (wait_for_completion_timeout(&sdrain.done, HZ / 10) <= 0)
2774             ib_process_cq_direct(cq, -1);
2775     else
2776         wait_for_completion(&sdrain.done);
2777 }
2778 
2779 /*
2780  * Post a WR and block until its completion is reaped for the RQ.
2781  */
2782 static void __ib_drain_rq(struct ib_qp *qp)
2783 {
2784     struct ib_cq *cq = qp->recv_cq;
2785     struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
2786     struct ib_drain_cqe rdrain;
2787     struct ib_recv_wr rwr = {};
2788     int ret;
2789 
2790     ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
2791     if (ret) {
2792         WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
2793         return;
2794     }
2795 
2796     rwr.wr_cqe = &rdrain.cqe;
2797     rdrain.cqe.done = ib_drain_qp_done;
2798     init_completion(&rdrain.done);
2799 
2800     ret = ib_post_recv(qp, &rwr, NULL);
2801     if (ret) {
2802         WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
2803         return;
2804     }
2805 
2806     if (cq->poll_ctx == IB_POLL_DIRECT)
2807         while (wait_for_completion_timeout(&rdrain.done, HZ / 10) <= 0)
2808             ib_process_cq_direct(cq, -1);
2809     else
2810         wait_for_completion(&rdrain.done);
2811 }
2812 
2813 /**
2814  * ib_drain_sq() - Block until all SQ CQEs have been consumed by the
2815  *         application.
2816  * @qp:            queue pair to drain
2817  *
2818  * If the device has a provider-specific drain function, then
2819  * call that.  Otherwise call the generic drain function
2820  * __ib_drain_sq().
2821  *
2822  * The caller must:
2823  *
2824  * ensure there is room in the CQ and SQ for the drain work request and
2825  * completion.
2826  *
2827  * allocate the CQ using ib_alloc_cq().
2828  *
2829  * ensure that there are no other contexts that are posting WRs concurrently.
2830  * Otherwise the drain is not guaranteed.
2831  */
2832 void ib_drain_sq(struct ib_qp *qp)
2833 {
2834     if (qp->device->ops.drain_sq)
2835         qp->device->ops.drain_sq(qp);
2836     else
2837         __ib_drain_sq(qp);
2838     trace_cq_drain_complete(qp->send_cq);
2839 }
2840 EXPORT_SYMBOL(ib_drain_sq);
2841 
2842 /**
2843  * ib_drain_rq() - Block until all RQ CQEs have been consumed by the
2844  *         application.
2845  * @qp:            queue pair to drain
2846  *
2847  * If the device has a provider-specific drain function, then
2848  * call that.  Otherwise call the generic drain function
2849  * __ib_drain_rq().
2850  *
2851  * The caller must:
2852  *
2853  * ensure there is room in the CQ and RQ for the drain work request and
2854  * completion.
2855  *
2856  * allocate the CQ using ib_alloc_cq().
2857  *
2858  * ensure that there are no other contexts that are posting WRs concurrently.
2859  * Otherwise the drain is not guaranteed.
2860  */
2861 void ib_drain_rq(struct ib_qp *qp)
2862 {
2863     if (qp->device->ops.drain_rq)
2864         qp->device->ops.drain_rq(qp);
2865     else
2866         __ib_drain_rq(qp);
2867     trace_cq_drain_complete(qp->recv_cq);
2868 }
2869 EXPORT_SYMBOL(ib_drain_rq);
2870 
2871 /**
2872  * ib_drain_qp() - Block until all CQEs have been consumed by the
2873  *         application on both the RQ and SQ.
2874  * @qp:            queue pair to drain
2875  *
2876  * The caller must:
2877  *
2878  * ensure there is room in the CQ(s), SQ, and RQ for drain work requests
2879  * and completions.
2880  *
2881  * allocate the CQs using ib_alloc_cq().
2882  *
2883  * ensure that there are no other contexts that are posting WRs concurrently.
2884  * Otherwise the drain is not guaranteed.
2885  */
2886 void ib_drain_qp(struct ib_qp *qp)
2887 {
2888     ib_drain_sq(qp);
2889     if (!qp->srq)
2890         ib_drain_rq(qp);
2891 }
2892 EXPORT_SYMBOL(ib_drain_qp);
2893 
2894 struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num,
2895                      enum rdma_netdev_t type, const char *name,
2896                      unsigned char name_assign_type,
2897                      void (*setup)(struct net_device *))
2898 {
2899     struct rdma_netdev_alloc_params params;
2900     struct net_device *netdev;
2901     int rc;
2902 
2903     if (!device->ops.rdma_netdev_get_params)
2904         return ERR_PTR(-EOPNOTSUPP);
2905 
2906     rc = device->ops.rdma_netdev_get_params(device, port_num, type,
2907                         &params);
2908     if (rc)
2909         return ERR_PTR(rc);
2910 
2911     netdev = alloc_netdev_mqs(params.sizeof_priv, name, name_assign_type,
2912                   setup, params.txqs, params.rxqs);
2913     if (!netdev)
2914         return ERR_PTR(-ENOMEM);
2915 
2916     return netdev;
2917 }
2918 EXPORT_SYMBOL(rdma_alloc_netdev);
2919 
2920 int rdma_init_netdev(struct ib_device *device, u32 port_num,
2921              enum rdma_netdev_t type, const char *name,
2922              unsigned char name_assign_type,
2923              void (*setup)(struct net_device *),
2924              struct net_device *netdev)
2925 {
2926     struct rdma_netdev_alloc_params params;
2927     int rc;
2928 
2929     if (!device->ops.rdma_netdev_get_params)
2930         return -EOPNOTSUPP;
2931 
2932     rc = device->ops.rdma_netdev_get_params(device, port_num, type,
2933                         &params);
2934     if (rc)
2935         return rc;
2936 
2937     return params.initialize_rdma_netdev(device, port_num,
2938                          netdev, params.param);
2939 }
2940 EXPORT_SYMBOL(rdma_init_netdev);
2941 
2942 void __rdma_block_iter_start(struct ib_block_iter *biter,
2943                  struct scatterlist *sglist, unsigned int nents,
2944                  unsigned long pgsz)
2945 {
2946     memset(biter, 0, sizeof(struct ib_block_iter));
2947     biter->__sg = sglist;
2948     biter->__sg_nents = nents;
2949 
2950     /* Driver provides best block size to use */
2951     biter->__pg_bit = __fls(pgsz);
2952 }
2953 EXPORT_SYMBOL(__rdma_block_iter_start);
2954 
2955 bool __rdma_block_iter_next(struct ib_block_iter *biter)
2956 {
2957     unsigned int block_offset;
2958 
2959     if (!biter->__sg_nents || !biter->__sg)
2960         return false;
2961 
2962     biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance;
2963     block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1);
2964     biter->__sg_advance += BIT_ULL(biter->__pg_bit) - block_offset;
2965 
2966     if (biter->__sg_advance >= sg_dma_len(biter->__sg)) {
2967         biter->__sg_advance = 0;
2968         biter->__sg = sg_next(biter->__sg);
2969         biter->__sg_nents--;
2970     }
2971 
2972     return true;
2973 }
2974 EXPORT_SYMBOL(__rdma_block_iter_next);
2975 
2976 /**
2977  * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct
2978  *   for the drivers.
2979  * @descs: array of static descriptors
2980  * @num_counters: number of elements in array
2981  * @lifespan: milliseconds between updates
2982  */
2983 struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
2984     const struct rdma_stat_desc *descs, int num_counters,
2985     unsigned long lifespan)
2986 {
2987     struct rdma_hw_stats *stats;
2988 
2989     stats = kzalloc(struct_size(stats, value, num_counters), GFP_KERNEL);
2990     if (!stats)
2991         return NULL;
2992 
2993     stats->is_disabled = kcalloc(BITS_TO_LONGS(num_counters),
2994                      sizeof(*stats->is_disabled), GFP_KERNEL);
2995     if (!stats->is_disabled)
2996         goto err;
2997 
2998     stats->descs = descs;
2999     stats->num_counters = num_counters;
3000     stats->lifespan = msecs_to_jiffies(lifespan);
3001     mutex_init(&stats->lock);
3002 
3003     return stats;
3004 
3005 err:
3006     kfree(stats);
3007     return NULL;
3008 }
3009 EXPORT_SYMBOL(rdma_alloc_hw_stats_struct);
3010 
3011 /**
3012  * rdma_free_hw_stats_struct - Helper function to release rdma_hw_stats
3013  * @stats: statistics to release
3014  */
3015 void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats)
3016 {
3017     if (!stats)
3018         return;
3019 
3020     kfree(stats->is_disabled);
3021     kfree(stats);
3022 }
3023 EXPORT_SYMBOL(rdma_free_hw_stats_struct);