Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
0003  *
0004  * Redistribution and use in source and binary forms, with or without
0005  * modification, are permitted provided that the following conditions are met:
0006  *
0007  * 1. Redistributions of source code must retain the above copyright
0008  *    notice, this list of conditions and the following disclaimer.
0009  * 2. Redistributions in binary form must reproduce the above copyright
0010  *    notice, this list of conditions and the following disclaimer in the
0011  *    documentation and/or other materials provided with the distribution.
0012  * 3. Neither the names of the copyright holders nor the names of its
0013  *    contributors may be used to endorse or promote products derived from
0014  *    this software without specific prior written permission.
0015  *
0016  * Alternatively, this software may be distributed under the terms of the
0017  * GNU General Public License ("GPL") version 2 as published by the Free
0018  * Software Foundation.
0019  *
0020  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
0021  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
0022  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
0023  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
0024  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
0025  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
0026  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
0027  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
0028  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
0029  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
0030  * POSSIBILITY OF SUCH DAMAGE.
0031  */
0032 
0033 #include <linux/module.h>
0034 #include <linux/pid.h>
0035 #include <linux/pid_namespace.h>
0036 #include <linux/mutex.h>
0037 #include <net/netlink.h>
0038 #include <rdma/rdma_cm.h>
0039 #include <rdma/rdma_netlink.h>
0040 
0041 #include "core_priv.h"
0042 #include "cma_priv.h"
0043 #include "restrack.h"
0044 #include "uverbs.h"
0045 
0046 typedef int (*res_fill_func_t)(struct sk_buff*, bool,
0047                    struct rdma_restrack_entry*, uint32_t);
0048 
0049 /*
0050  * Sort array elements by the netlink attribute name
0051  */
0052 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
0053     [RDMA_NLDEV_ATTR_CHARDEV]       = { .type = NLA_U64 },
0054     [RDMA_NLDEV_ATTR_CHARDEV_ABI]       = { .type = NLA_U64 },
0055     [RDMA_NLDEV_ATTR_CHARDEV_NAME]      = { .type = NLA_NUL_STRING,
0056                     .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
0057     [RDMA_NLDEV_ATTR_CHARDEV_TYPE]      = { .type = NLA_NUL_STRING,
0058                     .len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
0059     [RDMA_NLDEV_ATTR_DEV_DIM]               = { .type = NLA_U8 },
0060     [RDMA_NLDEV_ATTR_DEV_INDEX]     = { .type = NLA_U32 },
0061     [RDMA_NLDEV_ATTR_DEV_NAME]      = { .type = NLA_NUL_STRING,
0062                     .len = IB_DEVICE_NAME_MAX },
0063     [RDMA_NLDEV_ATTR_DEV_NODE_TYPE]     = { .type = NLA_U8 },
0064     [RDMA_NLDEV_ATTR_DEV_PROTOCOL]      = { .type = NLA_NUL_STRING,
0065                     .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
0066     [RDMA_NLDEV_ATTR_DRIVER]        = { .type = NLA_NESTED },
0067     [RDMA_NLDEV_ATTR_DRIVER_ENTRY]      = { .type = NLA_NESTED },
0068     [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 },
0069     [RDMA_NLDEV_ATTR_DRIVER_STRING]     = { .type = NLA_NUL_STRING,
0070                     .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
0071     [RDMA_NLDEV_ATTR_DRIVER_S32]        = { .type = NLA_S32 },
0072     [RDMA_NLDEV_ATTR_DRIVER_S64]        = { .type = NLA_S64 },
0073     [RDMA_NLDEV_ATTR_DRIVER_U32]        = { .type = NLA_U32 },
0074     [RDMA_NLDEV_ATTR_DRIVER_U64]        = { .type = NLA_U64 },
0075     [RDMA_NLDEV_ATTR_FW_VERSION]        = { .type = NLA_NUL_STRING,
0076                     .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
0077     [RDMA_NLDEV_ATTR_LID]           = { .type = NLA_U32 },
0078     [RDMA_NLDEV_ATTR_LINK_TYPE]     = { .type = NLA_NUL_STRING,
0079                     .len = IFNAMSIZ },
0080     [RDMA_NLDEV_ATTR_LMC]           = { .type = NLA_U8 },
0081     [RDMA_NLDEV_ATTR_NDEV_INDEX]        = { .type = NLA_U32 },
0082     [RDMA_NLDEV_ATTR_NDEV_NAME]     = { .type = NLA_NUL_STRING,
0083                     .len = IFNAMSIZ },
0084     [RDMA_NLDEV_ATTR_NODE_GUID]     = { .type = NLA_U64 },
0085     [RDMA_NLDEV_ATTR_PORT_INDEX]        = { .type = NLA_U32 },
0086     [RDMA_NLDEV_ATTR_PORT_PHYS_STATE]   = { .type = NLA_U8 },
0087     [RDMA_NLDEV_ATTR_PORT_STATE]        = { .type = NLA_U8 },
0088     [RDMA_NLDEV_ATTR_RES_CM_ID]     = { .type = NLA_NESTED },
0089     [RDMA_NLDEV_ATTR_RES_CM_IDN]        = { .type = NLA_U32 },
0090     [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]   = { .type = NLA_NESTED },
0091     [RDMA_NLDEV_ATTR_RES_CQ]        = { .type = NLA_NESTED },
0092     [RDMA_NLDEV_ATTR_RES_CQE]       = { .type = NLA_U32 },
0093     [RDMA_NLDEV_ATTR_RES_CQN]       = { .type = NLA_U32 },
0094     [RDMA_NLDEV_ATTR_RES_CQ_ENTRY]      = { .type = NLA_NESTED },
0095     [RDMA_NLDEV_ATTR_RES_CTX]       = { .type = NLA_NESTED },
0096     [RDMA_NLDEV_ATTR_RES_CTXN]      = { .type = NLA_U32 },
0097     [RDMA_NLDEV_ATTR_RES_CTX_ENTRY]     = { .type = NLA_NESTED },
0098     [RDMA_NLDEV_ATTR_RES_DST_ADDR]      = {
0099             .len = sizeof(struct __kernel_sockaddr_storage) },
0100     [RDMA_NLDEV_ATTR_RES_IOVA]      = { .type = NLA_U64 },
0101     [RDMA_NLDEV_ATTR_RES_KERN_NAME]     = { .type = NLA_NUL_STRING,
0102                     .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
0103     [RDMA_NLDEV_ATTR_RES_LKEY]      = { .type = NLA_U32 },
0104     [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]    = { .type = NLA_U32 },
0105     [RDMA_NLDEV_ATTR_RES_LQPN]      = { .type = NLA_U32 },
0106     [RDMA_NLDEV_ATTR_RES_MR]        = { .type = NLA_NESTED },
0107     [RDMA_NLDEV_ATTR_RES_MRLEN]     = { .type = NLA_U64 },
0108     [RDMA_NLDEV_ATTR_RES_MRN]       = { .type = NLA_U32 },
0109     [RDMA_NLDEV_ATTR_RES_MR_ENTRY]      = { .type = NLA_NESTED },
0110     [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]    = { .type = NLA_U8 },
0111     [RDMA_NLDEV_ATTR_RES_PD]        = { .type = NLA_NESTED },
0112     [RDMA_NLDEV_ATTR_RES_PDN]       = { .type = NLA_U32 },
0113     [RDMA_NLDEV_ATTR_RES_PD_ENTRY]      = { .type = NLA_NESTED },
0114     [RDMA_NLDEV_ATTR_RES_PID]       = { .type = NLA_U32 },
0115     [RDMA_NLDEV_ATTR_RES_POLL_CTX]      = { .type = NLA_U8 },
0116     [RDMA_NLDEV_ATTR_RES_PS]        = { .type = NLA_U32 },
0117     [RDMA_NLDEV_ATTR_RES_QP]        = { .type = NLA_NESTED },
0118     [RDMA_NLDEV_ATTR_RES_QP_ENTRY]      = { .type = NLA_NESTED },
0119     [RDMA_NLDEV_ATTR_RES_RAW]       = { .type = NLA_BINARY },
0120     [RDMA_NLDEV_ATTR_RES_RKEY]      = { .type = NLA_U32 },
0121     [RDMA_NLDEV_ATTR_RES_RQPN]      = { .type = NLA_U32 },
0122     [RDMA_NLDEV_ATTR_RES_RQ_PSN]        = { .type = NLA_U32 },
0123     [RDMA_NLDEV_ATTR_RES_SQ_PSN]        = { .type = NLA_U32 },
0124     [RDMA_NLDEV_ATTR_RES_SRC_ADDR]      = {
0125             .len = sizeof(struct __kernel_sockaddr_storage) },
0126     [RDMA_NLDEV_ATTR_RES_STATE]     = { .type = NLA_U8 },
0127     [RDMA_NLDEV_ATTR_RES_SUMMARY]       = { .type = NLA_NESTED },
0128     [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED },
0129     [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
0130     [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
0131                     .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
0132     [RDMA_NLDEV_ATTR_RES_TYPE]      = { .type = NLA_U8 },
0133     [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
0134     [RDMA_NLDEV_ATTR_RES_USECNT]        = { .type = NLA_U64 },
0135     [RDMA_NLDEV_ATTR_RES_SRQ]       = { .type = NLA_NESTED },
0136     [RDMA_NLDEV_ATTR_RES_SRQN]      = { .type = NLA_U32 },
0137     [RDMA_NLDEV_ATTR_RES_SRQ_ENTRY]     = { .type = NLA_NESTED },
0138     [RDMA_NLDEV_ATTR_MIN_RANGE]     = { .type = NLA_U32 },
0139     [RDMA_NLDEV_ATTR_MAX_RANGE]     = { .type = NLA_U32 },
0140     [RDMA_NLDEV_ATTR_SM_LID]        = { .type = NLA_U32 },
0141     [RDMA_NLDEV_ATTR_SUBNET_PREFIX]     = { .type = NLA_U64 },
0142     [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]   = { .type = NLA_U32 },
0143     [RDMA_NLDEV_ATTR_STAT_MODE]     = { .type = NLA_U32 },
0144     [RDMA_NLDEV_ATTR_STAT_RES]      = { .type = NLA_U32 },
0145     [RDMA_NLDEV_ATTR_STAT_COUNTER]      = { .type = NLA_NESTED },
0146     [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY]    = { .type = NLA_NESTED },
0147     [RDMA_NLDEV_ATTR_STAT_COUNTER_ID]       = { .type = NLA_U32 },
0148     [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]       = { .type = NLA_NESTED },
0149     [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY]  = { .type = NLA_NESTED },
0150     [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
0151     [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
0152     [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID]    = { .type = NLA_U64 },
0153     [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID]  = { .type = NLA_U32 },
0154     [RDMA_NLDEV_NET_NS_FD]          = { .type = NLA_U32 },
0155     [RDMA_NLDEV_SYS_ATTR_NETNS_MODE]    = { .type = NLA_U8 },
0156     [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK]  = { .type = NLA_U8 },
0157     [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX]  = { .type = NLA_U32 },
0158     [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 },
0159 };
0160 
0161 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
0162                       enum rdma_nldev_print_type print_type)
0163 {
0164     if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
0165         return -EMSGSIZE;
0166     if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
0167         nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
0168         return -EMSGSIZE;
0169 
0170     return 0;
0171 }
0172 
0173 static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
0174                    enum rdma_nldev_print_type print_type,
0175                    u32 value)
0176 {
0177     if (put_driver_name_print_type(msg, name, print_type))
0178         return -EMSGSIZE;
0179     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
0180         return -EMSGSIZE;
0181 
0182     return 0;
0183 }
0184 
0185 static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
0186                    enum rdma_nldev_print_type print_type,
0187                    u64 value)
0188 {
0189     if (put_driver_name_print_type(msg, name, print_type))
0190         return -EMSGSIZE;
0191     if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
0192                   RDMA_NLDEV_ATTR_PAD))
0193         return -EMSGSIZE;
0194 
0195     return 0;
0196 }
0197 
0198 int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
0199                   const char *str)
0200 {
0201     if (put_driver_name_print_type(msg, name,
0202                        RDMA_NLDEV_PRINT_TYPE_UNSPEC))
0203         return -EMSGSIZE;
0204     if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
0205         return -EMSGSIZE;
0206 
0207     return 0;
0208 }
0209 EXPORT_SYMBOL(rdma_nl_put_driver_string);
0210 
0211 int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
0212 {
0213     return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
0214                        value);
0215 }
0216 EXPORT_SYMBOL(rdma_nl_put_driver_u32);
0217 
0218 int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
0219                    u32 value)
0220 {
0221     return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
0222                        value);
0223 }
0224 EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
0225 
0226 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
0227 {
0228     return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
0229                        value);
0230 }
0231 EXPORT_SYMBOL(rdma_nl_put_driver_u64);
0232 
0233 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
0234 {
0235     return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
0236                        value);
0237 }
0238 EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
0239 
0240 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
0241 {
0242     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
0243         return -EMSGSIZE;
0244     if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
0245                dev_name(&device->dev)))
0246         return -EMSGSIZE;
0247 
0248     return 0;
0249 }
0250 
0251 static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
0252 {
0253     char fw[IB_FW_VERSION_NAME_MAX];
0254     int ret = 0;
0255     u32 port;
0256 
0257     if (fill_nldev_handle(msg, device))
0258         return -EMSGSIZE;
0259 
0260     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
0261         return -EMSGSIZE;
0262 
0263     BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
0264     if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
0265                   device->attrs.device_cap_flags,
0266                   RDMA_NLDEV_ATTR_PAD))
0267         return -EMSGSIZE;
0268 
0269     ib_get_device_fw_str(device, fw);
0270     /* Device without FW has strlen(fw) = 0 */
0271     if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
0272         return -EMSGSIZE;
0273 
0274     if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
0275                   be64_to_cpu(device->node_guid),
0276                   RDMA_NLDEV_ATTR_PAD))
0277         return -EMSGSIZE;
0278     if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
0279                   be64_to_cpu(device->attrs.sys_image_guid),
0280                   RDMA_NLDEV_ATTR_PAD))
0281         return -EMSGSIZE;
0282     if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
0283         return -EMSGSIZE;
0284     if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
0285         return -EMSGSIZE;
0286 
0287     /*
0288      * Link type is determined on first port and mlx4 device
0289      * which can potentially have two different link type for the same
0290      * IB device is considered as better to be avoided in the future,
0291      */
0292     port = rdma_start_port(device);
0293     if (rdma_cap_opa_mad(device, port))
0294         ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
0295     else if (rdma_protocol_ib(device, port))
0296         ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
0297     else if (rdma_protocol_iwarp(device, port))
0298         ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
0299     else if (rdma_protocol_roce(device, port))
0300         ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
0301     else if (rdma_protocol_usnic(device, port))
0302         ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
0303                      "usnic");
0304     return ret;
0305 }
0306 
0307 static int fill_port_info(struct sk_buff *msg,
0308               struct ib_device *device, u32 port,
0309               const struct net *net)
0310 {
0311     struct net_device *netdev = NULL;
0312     struct ib_port_attr attr;
0313     int ret;
0314     u64 cap_flags = 0;
0315 
0316     if (fill_nldev_handle(msg, device))
0317         return -EMSGSIZE;
0318 
0319     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
0320         return -EMSGSIZE;
0321 
0322     ret = ib_query_port(device, port, &attr);
0323     if (ret)
0324         return ret;
0325 
0326     if (rdma_protocol_ib(device, port)) {
0327         BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
0328                 sizeof(attr.port_cap_flags2)) > sizeof(u64));
0329         cap_flags = attr.port_cap_flags |
0330             ((u64)attr.port_cap_flags2 << 32);
0331         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
0332                       cap_flags, RDMA_NLDEV_ATTR_PAD))
0333             return -EMSGSIZE;
0334         if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
0335                       attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
0336             return -EMSGSIZE;
0337         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
0338             return -EMSGSIZE;
0339         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
0340             return -EMSGSIZE;
0341         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
0342             return -EMSGSIZE;
0343     }
0344     if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
0345         return -EMSGSIZE;
0346     if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
0347         return -EMSGSIZE;
0348 
0349     netdev = ib_device_get_netdev(device, port);
0350     if (netdev && net_eq(dev_net(netdev), net)) {
0351         ret = nla_put_u32(msg,
0352                   RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
0353         if (ret)
0354             goto out;
0355         ret = nla_put_string(msg,
0356                      RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
0357     }
0358 
0359 out:
0360     if (netdev)
0361         dev_put(netdev);
0362     return ret;
0363 }
0364 
0365 static int fill_res_info_entry(struct sk_buff *msg,
0366                    const char *name, u64 curr)
0367 {
0368     struct nlattr *entry_attr;
0369 
0370     entry_attr = nla_nest_start_noflag(msg,
0371                        RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
0372     if (!entry_attr)
0373         return -EMSGSIZE;
0374 
0375     if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
0376         goto err;
0377     if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
0378                   RDMA_NLDEV_ATTR_PAD))
0379         goto err;
0380 
0381     nla_nest_end(msg, entry_attr);
0382     return 0;
0383 
0384 err:
0385     nla_nest_cancel(msg, entry_attr);
0386     return -EMSGSIZE;
0387 }
0388 
0389 static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
0390 {
0391     static const char * const names[RDMA_RESTRACK_MAX] = {
0392         [RDMA_RESTRACK_PD] = "pd",
0393         [RDMA_RESTRACK_CQ] = "cq",
0394         [RDMA_RESTRACK_QP] = "qp",
0395         [RDMA_RESTRACK_CM_ID] = "cm_id",
0396         [RDMA_RESTRACK_MR] = "mr",
0397         [RDMA_RESTRACK_CTX] = "ctx",
0398         [RDMA_RESTRACK_SRQ] = "srq",
0399     };
0400 
0401     struct nlattr *table_attr;
0402     int ret, i, curr;
0403 
0404     if (fill_nldev_handle(msg, device))
0405         return -EMSGSIZE;
0406 
0407     table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
0408     if (!table_attr)
0409         return -EMSGSIZE;
0410 
0411     for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
0412         if (!names[i])
0413             continue;
0414         curr = rdma_restrack_count(device, i);
0415         ret = fill_res_info_entry(msg, names[i], curr);
0416         if (ret)
0417             goto err;
0418     }
0419 
0420     nla_nest_end(msg, table_attr);
0421     return 0;
0422 
0423 err:
0424     nla_nest_cancel(msg, table_attr);
0425     return ret;
0426 }
0427 
0428 static int fill_res_name_pid(struct sk_buff *msg,
0429                  struct rdma_restrack_entry *res)
0430 {
0431     int err = 0;
0432 
0433     /*
0434      * For user resources, user is should read /proc/PID/comm to get the
0435      * name of the task file.
0436      */
0437     if (rdma_is_kernel_res(res)) {
0438         err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
0439                      res->kern_name);
0440     } else {
0441         pid_t pid;
0442 
0443         pid = task_pid_vnr(res->task);
0444         /*
0445          * Task is dead and in zombie state.
0446          * There is no need to print PID anymore.
0447          */
0448         if (pid)
0449             /*
0450              * This part is racy, task can be killed and PID will
0451              * be zero right here but it is ok, next query won't
0452              * return PID. We don't promise real-time reflection
0453              * of SW objects.
0454              */
0455             err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
0456     }
0457 
0458     return err ? -EMSGSIZE : 0;
0459 }
0460 
0461 static int fill_res_qp_entry_query(struct sk_buff *msg,
0462                    struct rdma_restrack_entry *res,
0463                    struct ib_device *dev,
0464                    struct ib_qp *qp)
0465 {
0466     struct ib_qp_init_attr qp_init_attr;
0467     struct ib_qp_attr qp_attr;
0468     int ret;
0469 
0470     ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
0471     if (ret)
0472         return ret;
0473 
0474     if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
0475         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
0476                 qp_attr.dest_qp_num))
0477             goto err;
0478         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
0479                 qp_attr.rq_psn))
0480             goto err;
0481     }
0482 
0483     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
0484         goto err;
0485 
0486     if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
0487         qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
0488         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
0489                    qp_attr.path_mig_state))
0490             goto err;
0491     }
0492     if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
0493         goto err;
0494     if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
0495         goto err;
0496 
0497     if (dev->ops.fill_res_qp_entry)
0498         return dev->ops.fill_res_qp_entry(msg, qp);
0499     return 0;
0500 
0501 err:    return -EMSGSIZE;
0502 }
0503 
0504 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
0505                  struct rdma_restrack_entry *res, uint32_t port)
0506 {
0507     struct ib_qp *qp = container_of(res, struct ib_qp, res);
0508     struct ib_device *dev = qp->device;
0509     int ret;
0510 
0511     if (port && port != qp->port)
0512         return -EAGAIN;
0513 
0514     /* In create_qp() port is not set yet */
0515     if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
0516         return -EINVAL;
0517 
0518     ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
0519     if (ret)
0520         return -EMSGSIZE;
0521 
0522     if (!rdma_is_kernel_res(res) &&
0523         nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
0524         return -EMSGSIZE;
0525 
0526     ret = fill_res_name_pid(msg, res);
0527     if (ret)
0528         return -EMSGSIZE;
0529 
0530     return fill_res_qp_entry_query(msg, res, dev, qp);
0531 }
0532 
0533 static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
0534                  struct rdma_restrack_entry *res, uint32_t port)
0535 {
0536     struct ib_qp *qp = container_of(res, struct ib_qp, res);
0537     struct ib_device *dev = qp->device;
0538 
0539     if (port && port != qp->port)
0540         return -EAGAIN;
0541     if (!dev->ops.fill_res_qp_entry_raw)
0542         return -EINVAL;
0543     return dev->ops.fill_res_qp_entry_raw(msg, qp);
0544 }
0545 
0546 static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
0547                 struct rdma_restrack_entry *res, uint32_t port)
0548 {
0549     struct rdma_id_private *id_priv =
0550                 container_of(res, struct rdma_id_private, res);
0551     struct ib_device *dev = id_priv->id.device;
0552     struct rdma_cm_id *cm_id = &id_priv->id;
0553 
0554     if (port && port != cm_id->port_num)
0555         return 0;
0556 
0557     if (cm_id->port_num &&
0558         nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
0559         goto err;
0560 
0561     if (id_priv->qp_num) {
0562         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
0563             goto err;
0564         if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
0565             goto err;
0566     }
0567 
0568     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
0569         goto err;
0570 
0571     if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
0572         goto err;
0573 
0574     if (cm_id->route.addr.src_addr.ss_family &&
0575         nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
0576             sizeof(cm_id->route.addr.src_addr),
0577             &cm_id->route.addr.src_addr))
0578         goto err;
0579     if (cm_id->route.addr.dst_addr.ss_family &&
0580         nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
0581             sizeof(cm_id->route.addr.dst_addr),
0582             &cm_id->route.addr.dst_addr))
0583         goto err;
0584 
0585     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
0586         goto err;
0587 
0588     if (fill_res_name_pid(msg, res))
0589         goto err;
0590 
0591     if (dev->ops.fill_res_cm_id_entry)
0592         return dev->ops.fill_res_cm_id_entry(msg, cm_id);
0593     return 0;
0594 
0595 err: return -EMSGSIZE;
0596 }
0597 
0598 static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
0599                  struct rdma_restrack_entry *res, uint32_t port)
0600 {
0601     struct ib_cq *cq = container_of(res, struct ib_cq, res);
0602     struct ib_device *dev = cq->device;
0603 
0604     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
0605         return -EMSGSIZE;
0606     if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
0607                   atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
0608         return -EMSGSIZE;
0609 
0610     /* Poll context is only valid for kernel CQs */
0611     if (rdma_is_kernel_res(res) &&
0612         nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
0613         return -EMSGSIZE;
0614 
0615     if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
0616         return -EMSGSIZE;
0617 
0618     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
0619         return -EMSGSIZE;
0620     if (!rdma_is_kernel_res(res) &&
0621         nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
0622             cq->uobject->uevent.uobject.context->res.id))
0623         return -EMSGSIZE;
0624 
0625     if (fill_res_name_pid(msg, res))
0626         return -EMSGSIZE;
0627 
0628     return (dev->ops.fill_res_cq_entry) ?
0629         dev->ops.fill_res_cq_entry(msg, cq) : 0;
0630 }
0631 
0632 static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
0633                  struct rdma_restrack_entry *res, uint32_t port)
0634 {
0635     struct ib_cq *cq = container_of(res, struct ib_cq, res);
0636     struct ib_device *dev = cq->device;
0637 
0638     if (!dev->ops.fill_res_cq_entry_raw)
0639         return -EINVAL;
0640     return dev->ops.fill_res_cq_entry_raw(msg, cq);
0641 }
0642 
0643 static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
0644                  struct rdma_restrack_entry *res, uint32_t port)
0645 {
0646     struct ib_mr *mr = container_of(res, struct ib_mr, res);
0647     struct ib_device *dev = mr->pd->device;
0648 
0649     if (has_cap_net_admin) {
0650         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
0651             return -EMSGSIZE;
0652         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
0653             return -EMSGSIZE;
0654     }
0655 
0656     if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
0657                   RDMA_NLDEV_ATTR_PAD))
0658         return -EMSGSIZE;
0659 
0660     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
0661         return -EMSGSIZE;
0662 
0663     if (!rdma_is_kernel_res(res) &&
0664         nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
0665         return -EMSGSIZE;
0666 
0667     if (fill_res_name_pid(msg, res))
0668         return -EMSGSIZE;
0669 
0670     return (dev->ops.fill_res_mr_entry) ?
0671                dev->ops.fill_res_mr_entry(msg, mr) :
0672                0;
0673 }
0674 
0675 static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
0676                  struct rdma_restrack_entry *res, uint32_t port)
0677 {
0678     struct ib_mr *mr = container_of(res, struct ib_mr, res);
0679     struct ib_device *dev = mr->pd->device;
0680 
0681     if (!dev->ops.fill_res_mr_entry_raw)
0682         return -EINVAL;
0683     return dev->ops.fill_res_mr_entry_raw(msg, mr);
0684 }
0685 
0686 static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
0687                  struct rdma_restrack_entry *res, uint32_t port)
0688 {
0689     struct ib_pd *pd = container_of(res, struct ib_pd, res);
0690 
0691     if (has_cap_net_admin) {
0692         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
0693                 pd->local_dma_lkey))
0694             goto err;
0695         if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
0696             nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
0697                 pd->unsafe_global_rkey))
0698             goto err;
0699     }
0700     if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
0701                   atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
0702         goto err;
0703 
0704     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
0705         goto err;
0706 
0707     if (!rdma_is_kernel_res(res) &&
0708         nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
0709             pd->uobject->context->res.id))
0710         goto err;
0711 
0712     return fill_res_name_pid(msg, res);
0713 
0714 err:    return -EMSGSIZE;
0715 }
0716 
0717 static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin,
0718                   struct rdma_restrack_entry *res, uint32_t port)
0719 {
0720     struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res);
0721 
0722     if (rdma_is_kernel_res(res))
0723         return 0;
0724 
0725     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id))
0726         return -EMSGSIZE;
0727 
0728     return fill_res_name_pid(msg, res);
0729 }
0730 
0731 static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range,
0732                    uint32_t max_range)
0733 {
0734     struct nlattr *entry_attr;
0735 
0736     if (!min_range)
0737         return 0;
0738 
0739     entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
0740     if (!entry_attr)
0741         return -EMSGSIZE;
0742 
0743     if (min_range == max_range) {
0744         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range))
0745             goto err;
0746     } else {
0747         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range))
0748             goto err;
0749         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range))
0750             goto err;
0751     }
0752     nla_nest_end(msg, entry_attr);
0753     return 0;
0754 
0755 err:
0756     nla_nest_cancel(msg, entry_attr);
0757     return -EMSGSIZE;
0758 }
0759 
0760 static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq)
0761 {
0762     uint32_t min_range = 0, prev = 0;
0763     struct rdma_restrack_entry *res;
0764     struct rdma_restrack_root *rt;
0765     struct nlattr *table_attr;
0766     struct ib_qp *qp = NULL;
0767     unsigned long id = 0;
0768 
0769     table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
0770     if (!table_attr)
0771         return -EMSGSIZE;
0772 
0773     rt = &srq->device->res[RDMA_RESTRACK_QP];
0774     xa_lock(&rt->xa);
0775     xa_for_each(&rt->xa, id, res) {
0776         if (!rdma_restrack_get(res))
0777             continue;
0778 
0779         qp = container_of(res, struct ib_qp, res);
0780         if (!qp->srq || (qp->srq->res.id != srq->res.id)) {
0781             rdma_restrack_put(res);
0782             continue;
0783         }
0784 
0785         if (qp->qp_num < prev)
0786             /* qp_num should be ascending */
0787             goto err_loop;
0788 
0789         if (min_range == 0) {
0790             min_range = qp->qp_num;
0791         } else if (qp->qp_num > (prev + 1)) {
0792             if (fill_res_range_qp_entry(msg, min_range, prev))
0793                 goto err_loop;
0794 
0795             min_range = qp->qp_num;
0796         }
0797         prev = qp->qp_num;
0798         rdma_restrack_put(res);
0799     }
0800 
0801     xa_unlock(&rt->xa);
0802 
0803     if (fill_res_range_qp_entry(msg, min_range, prev))
0804         goto err;
0805 
0806     nla_nest_end(msg, table_attr);
0807     return 0;
0808 
0809 err_loop:
0810     rdma_restrack_put(res);
0811     xa_unlock(&rt->xa);
0812 err:
0813     nla_nest_cancel(msg, table_attr);
0814     return -EMSGSIZE;
0815 }
0816 
0817 static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin,
0818                   struct rdma_restrack_entry *res, uint32_t port)
0819 {
0820     struct ib_srq *srq = container_of(res, struct ib_srq, res);
0821 
0822     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id))
0823         goto err;
0824 
0825     if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type))
0826         goto err;
0827 
0828     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id))
0829         goto err;
0830 
0831     if (ib_srq_has_cq(srq->srq_type)) {
0832         if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN,
0833                 srq->ext.cq->res.id))
0834             goto err;
0835     }
0836 
0837     if (fill_res_srq_qps(msg, srq))
0838         goto err;
0839 
0840     return fill_res_name_pid(msg, res);
0841 
0842 err:
0843     return -EMSGSIZE;
0844 }
0845 
0846 static int fill_stat_counter_mode(struct sk_buff *msg,
0847                   struct rdma_counter *counter)
0848 {
0849     struct rdma_counter_mode *m = &counter->mode;
0850 
0851     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
0852         return -EMSGSIZE;
0853 
0854     if (m->mode == RDMA_COUNTER_MODE_AUTO) {
0855         if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
0856             nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
0857             return -EMSGSIZE;
0858 
0859         if ((m->mask & RDMA_COUNTER_MASK_PID) &&
0860             fill_res_name_pid(msg, &counter->res))
0861             return -EMSGSIZE;
0862     }
0863 
0864     return 0;
0865 }
0866 
0867 static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
0868 {
0869     struct nlattr *entry_attr;
0870 
0871     entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
0872     if (!entry_attr)
0873         return -EMSGSIZE;
0874 
0875     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
0876         goto err;
0877 
0878     nla_nest_end(msg, entry_attr);
0879     return 0;
0880 
0881 err:
0882     nla_nest_cancel(msg, entry_attr);
0883     return -EMSGSIZE;
0884 }
0885 
0886 static int fill_stat_counter_qps(struct sk_buff *msg,
0887                  struct rdma_counter *counter)
0888 {
0889     struct rdma_restrack_entry *res;
0890     struct rdma_restrack_root *rt;
0891     struct nlattr *table_attr;
0892     struct ib_qp *qp = NULL;
0893     unsigned long id = 0;
0894     int ret = 0;
0895 
0896     table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
0897 
0898     rt = &counter->device->res[RDMA_RESTRACK_QP];
0899     xa_lock(&rt->xa);
0900     xa_for_each(&rt->xa, id, res) {
0901         qp = container_of(res, struct ib_qp, res);
0902         if (!qp->counter || (qp->counter->id != counter->id))
0903             continue;
0904 
0905         ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
0906         if (ret)
0907             goto err;
0908     }
0909 
0910     xa_unlock(&rt->xa);
0911     nla_nest_end(msg, table_attr);
0912     return 0;
0913 
0914 err:
0915     xa_unlock(&rt->xa);
0916     nla_nest_cancel(msg, table_attr);
0917     return ret;
0918 }
0919 
0920 int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
0921                  u64 value)
0922 {
0923     struct nlattr *entry_attr;
0924 
0925     entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
0926     if (!entry_attr)
0927         return -EMSGSIZE;
0928 
0929     if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
0930                name))
0931         goto err;
0932     if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
0933                   value, RDMA_NLDEV_ATTR_PAD))
0934         goto err;
0935 
0936     nla_nest_end(msg, entry_attr);
0937     return 0;
0938 
0939 err:
0940     nla_nest_cancel(msg, entry_attr);
0941     return -EMSGSIZE;
0942 }
0943 EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
0944 
0945 static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
0946                   struct rdma_restrack_entry *res, uint32_t port)
0947 {
0948     struct ib_mr *mr = container_of(res, struct ib_mr, res);
0949     struct ib_device *dev = mr->pd->device;
0950 
0951     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
0952         goto err;
0953 
0954     if (dev->ops.fill_stat_mr_entry)
0955         return dev->ops.fill_stat_mr_entry(msg, mr);
0956     return 0;
0957 
0958 err:
0959     return -EMSGSIZE;
0960 }
0961 
0962 static int fill_stat_counter_hwcounters(struct sk_buff *msg,
0963                     struct rdma_counter *counter)
0964 {
0965     struct rdma_hw_stats *st = counter->stats;
0966     struct nlattr *table_attr;
0967     int i;
0968 
0969     table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
0970     if (!table_attr)
0971         return -EMSGSIZE;
0972 
0973     mutex_lock(&st->lock);
0974     for (i = 0; i < st->num_counters; i++) {
0975         if (test_bit(i, st->is_disabled))
0976             continue;
0977         if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
0978                          st->value[i]))
0979             goto err;
0980     }
0981     mutex_unlock(&st->lock);
0982 
0983     nla_nest_end(msg, table_attr);
0984     return 0;
0985 
0986 err:
0987     mutex_unlock(&st->lock);
0988     nla_nest_cancel(msg, table_attr);
0989     return -EMSGSIZE;
0990 }
0991 
0992 static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
0993                   struct rdma_restrack_entry *res,
0994                   uint32_t port)
0995 {
0996     struct rdma_counter *counter =
0997         container_of(res, struct rdma_counter, res);
0998 
0999     if (port && port != counter->port)
1000         return -EAGAIN;
1001 
1002     /* Dump it even query failed */
1003     rdma_counter_query_stats(counter);
1004 
1005     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
1006         nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
1007         fill_stat_counter_mode(msg, counter) ||
1008         fill_stat_counter_qps(msg, counter) ||
1009         fill_stat_counter_hwcounters(msg, counter))
1010         return -EMSGSIZE;
1011 
1012     return 0;
1013 }
1014 
1015 static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1016               struct netlink_ext_ack *extack)
1017 {
1018     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1019     struct ib_device *device;
1020     struct sk_buff *msg;
1021     u32 index;
1022     int err;
1023 
1024     err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1025                      nldev_policy, extack);
1026     if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1027         return -EINVAL;
1028 
1029     index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1030 
1031     device = ib_device_get_by_index(sock_net(skb->sk), index);
1032     if (!device)
1033         return -EINVAL;
1034 
1035     msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1036     if (!msg) {
1037         err = -ENOMEM;
1038         goto err;
1039     }
1040 
1041     nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1042             RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1043             0, 0);
1044 
1045     err = fill_dev_info(msg, device);
1046     if (err)
1047         goto err_free;
1048 
1049     nlmsg_end(msg, nlh);
1050 
1051     ib_device_put(device);
1052     return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1053 
1054 err_free:
1055     nlmsg_free(msg);
1056 err:
1057     ib_device_put(device);
1058     return err;
1059 }
1060 
1061 static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1062               struct netlink_ext_ack *extack)
1063 {
1064     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1065     struct ib_device *device;
1066     u32 index;
1067     int err;
1068 
1069     err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1070                      nldev_policy, extack);
1071     if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1072         return -EINVAL;
1073 
1074     index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1075     device = ib_device_get_by_index(sock_net(skb->sk), index);
1076     if (!device)
1077         return -EINVAL;
1078 
1079     if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
1080         char name[IB_DEVICE_NAME_MAX] = {};
1081 
1082         nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1083                 IB_DEVICE_NAME_MAX);
1084         if (strlen(name) == 0) {
1085             err = -EINVAL;
1086             goto done;
1087         }
1088         err = ib_device_rename(device, name);
1089         goto done;
1090     }
1091 
1092     if (tb[RDMA_NLDEV_NET_NS_FD]) {
1093         u32 ns_fd;
1094 
1095         ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
1096         err = ib_device_set_netns_put(skb, device, ns_fd);
1097         goto put_done;
1098     }
1099 
1100     if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
1101         u8 use_dim;
1102 
1103         use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
1104         err = ib_device_set_dim(device,  use_dim);
1105         goto done;
1106     }
1107 
1108 done:
1109     ib_device_put(device);
1110 put_done:
1111     return err;
1112 }
1113 
1114 static int _nldev_get_dumpit(struct ib_device *device,
1115                  struct sk_buff *skb,
1116                  struct netlink_callback *cb,
1117                  unsigned int idx)
1118 {
1119     int start = cb->args[0];
1120     struct nlmsghdr *nlh;
1121 
1122     if (idx < start)
1123         return 0;
1124 
1125     nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1126             RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1127             0, NLM_F_MULTI);
1128 
1129     if (fill_dev_info(skb, device)) {
1130         nlmsg_cancel(skb, nlh);
1131         goto out;
1132     }
1133 
1134     nlmsg_end(skb, nlh);
1135 
1136     idx++;
1137 
1138 out:    cb->args[0] = idx;
1139     return skb->len;
1140 }
1141 
1142 static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
1143 {
1144     /*
1145      * There is no need to take lock, because
1146      * we are relying on ib_core's locking.
1147      */
1148     return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
1149 }
1150 
1151 static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1152                    struct netlink_ext_ack *extack)
1153 {
1154     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1155     struct ib_device *device;
1156     struct sk_buff *msg;
1157     u32 index;
1158     u32 port;
1159     int err;
1160 
1161     err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1162                      nldev_policy, extack);
1163     if (err ||
1164         !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1165         !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1166         return -EINVAL;
1167 
1168     index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1169     device = ib_device_get_by_index(sock_net(skb->sk), index);
1170     if (!device)
1171         return -EINVAL;
1172 
1173     port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1174     if (!rdma_is_port_valid(device, port)) {
1175         err = -EINVAL;
1176         goto err;
1177     }
1178 
1179     msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1180     if (!msg) {
1181         err = -ENOMEM;
1182         goto err;
1183     }
1184 
1185     nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1186             RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1187             0, 0);
1188 
1189     err = fill_port_info(msg, device, port, sock_net(skb->sk));
1190     if (err)
1191         goto err_free;
1192 
1193     nlmsg_end(msg, nlh);
1194     ib_device_put(device);
1195 
1196     return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1197 
1198 err_free:
1199     nlmsg_free(msg);
1200 err:
1201     ib_device_put(device);
1202     return err;
1203 }
1204 
1205 static int nldev_port_get_dumpit(struct sk_buff *skb,
1206                  struct netlink_callback *cb)
1207 {
1208     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1209     struct ib_device *device;
1210     int start = cb->args[0];
1211     struct nlmsghdr *nlh;
1212     u32 idx = 0;
1213     u32 ifindex;
1214     int err;
1215     unsigned int p;
1216 
1217     err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1218                      nldev_policy, NULL);
1219     if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1220         return -EINVAL;
1221 
1222     ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1223     device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
1224     if (!device)
1225         return -EINVAL;
1226 
1227     rdma_for_each_port (device, p) {
1228         /*
1229          * The dumpit function returns all information from specific
1230          * index. This specific index is taken from the netlink
1231          * messages request sent by user and it is available
1232          * in cb->args[0].
1233          *
1234          * Usually, the user doesn't fill this field and it causes
1235          * to return everything.
1236          *
1237          */
1238         if (idx < start) {
1239             idx++;
1240             continue;
1241         }
1242 
1243         nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
1244                 cb->nlh->nlmsg_seq,
1245                 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1246                          RDMA_NLDEV_CMD_PORT_GET),
1247                 0, NLM_F_MULTI);
1248 
1249         if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
1250             nlmsg_cancel(skb, nlh);
1251             goto out;
1252         }
1253         idx++;
1254         nlmsg_end(skb, nlh);
1255     }
1256 
1257 out:
1258     ib_device_put(device);
1259     cb->args[0] = idx;
1260     return skb->len;
1261 }
1262 
1263 static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1264                   struct netlink_ext_ack *extack)
1265 {
1266     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1267     struct ib_device *device;
1268     struct sk_buff *msg;
1269     u32 index;
1270     int ret;
1271 
1272     ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1273                      nldev_policy, extack);
1274     if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1275         return -EINVAL;
1276 
1277     index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1278     device = ib_device_get_by_index(sock_net(skb->sk), index);
1279     if (!device)
1280         return -EINVAL;
1281 
1282     msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1283     if (!msg) {
1284         ret = -ENOMEM;
1285         goto err;
1286     }
1287 
1288     nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1289             RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1290             0, 0);
1291 
1292     ret = fill_res_info(msg, device);
1293     if (ret)
1294         goto err_free;
1295 
1296     nlmsg_end(msg, nlh);
1297     ib_device_put(device);
1298     return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1299 
1300 err_free:
1301     nlmsg_free(msg);
1302 err:
1303     ib_device_put(device);
1304     return ret;
1305 }
1306 
1307 static int _nldev_res_get_dumpit(struct ib_device *device,
1308                  struct sk_buff *skb,
1309                  struct netlink_callback *cb,
1310                  unsigned int idx)
1311 {
1312     int start = cb->args[0];
1313     struct nlmsghdr *nlh;
1314 
1315     if (idx < start)
1316         return 0;
1317 
1318     nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1319             RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1320             0, NLM_F_MULTI);
1321 
1322     if (fill_res_info(skb, device)) {
1323         nlmsg_cancel(skb, nlh);
1324         goto out;
1325     }
1326     nlmsg_end(skb, nlh);
1327 
1328     idx++;
1329 
1330 out:
1331     cb->args[0] = idx;
1332     return skb->len;
1333 }
1334 
1335 static int nldev_res_get_dumpit(struct sk_buff *skb,
1336                 struct netlink_callback *cb)
1337 {
1338     return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
1339 }
1340 
1341 struct nldev_fill_res_entry {
1342     enum rdma_nldev_attr nldev_attr;
1343     u8 flags;
1344     u32 entry;
1345     u32 id;
1346 };
1347 
1348 enum nldev_res_flags {
1349     NLDEV_PER_DEV = 1 << 0,
1350 };
1351 
1352 static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
1353     [RDMA_RESTRACK_QP] = {
1354         .nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
1355         .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
1356         .id = RDMA_NLDEV_ATTR_RES_LQPN,
1357     },
1358     [RDMA_RESTRACK_CM_ID] = {
1359         .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
1360         .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
1361         .id = RDMA_NLDEV_ATTR_RES_CM_IDN,
1362     },
1363     [RDMA_RESTRACK_CQ] = {
1364         .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
1365         .flags = NLDEV_PER_DEV,
1366         .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
1367         .id = RDMA_NLDEV_ATTR_RES_CQN,
1368     },
1369     [RDMA_RESTRACK_MR] = {
1370         .nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
1371         .flags = NLDEV_PER_DEV,
1372         .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
1373         .id = RDMA_NLDEV_ATTR_RES_MRN,
1374     },
1375     [RDMA_RESTRACK_PD] = {
1376         .nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
1377         .flags = NLDEV_PER_DEV,
1378         .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
1379         .id = RDMA_NLDEV_ATTR_RES_PDN,
1380     },
1381     [RDMA_RESTRACK_COUNTER] = {
1382         .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
1383         .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
1384         .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
1385     },
1386     [RDMA_RESTRACK_CTX] = {
1387         .nldev_attr = RDMA_NLDEV_ATTR_RES_CTX,
1388         .flags = NLDEV_PER_DEV,
1389         .entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY,
1390         .id = RDMA_NLDEV_ATTR_RES_CTXN,
1391     },
1392     [RDMA_RESTRACK_SRQ] = {
1393         .nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ,
1394         .flags = NLDEV_PER_DEV,
1395         .entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY,
1396         .id = RDMA_NLDEV_ATTR_RES_SRQN,
1397     },
1398 
1399 };
1400 
1401 static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1402                    struct netlink_ext_ack *extack,
1403                    enum rdma_restrack_type res_type,
1404                    res_fill_func_t fill_func)
1405 {
1406     const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1407     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1408     struct rdma_restrack_entry *res;
1409     struct ib_device *device;
1410     u32 index, id, port = 0;
1411     bool has_cap_net_admin;
1412     struct sk_buff *msg;
1413     int ret;
1414 
1415     ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1416                      nldev_policy, extack);
1417     if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1418         return -EINVAL;
1419 
1420     index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1421     device = ib_device_get_by_index(sock_net(skb->sk), index);
1422     if (!device)
1423         return -EINVAL;
1424 
1425     if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1426         port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1427         if (!rdma_is_port_valid(device, port)) {
1428             ret = -EINVAL;
1429             goto err;
1430         }
1431     }
1432 
1433     if ((port && fe->flags & NLDEV_PER_DEV) ||
1434         (!port && ~fe->flags & NLDEV_PER_DEV)) {
1435         ret = -EINVAL;
1436         goto err;
1437     }
1438 
1439     id = nla_get_u32(tb[fe->id]);
1440     res = rdma_restrack_get_byid(device, res_type, id);
1441     if (IS_ERR(res)) {
1442         ret = PTR_ERR(res);
1443         goto err;
1444     }
1445 
1446     msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1447     if (!msg) {
1448         ret = -ENOMEM;
1449         goto err_get;
1450     }
1451 
1452     nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1453             RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1454                      RDMA_NL_GET_OP(nlh->nlmsg_type)),
1455             0, 0);
1456 
1457     if (fill_nldev_handle(msg, device)) {
1458         ret = -EMSGSIZE;
1459         goto err_free;
1460     }
1461 
1462     has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1463 
1464     ret = fill_func(msg, has_cap_net_admin, res, port);
1465     if (ret)
1466         goto err_free;
1467 
1468     rdma_restrack_put(res);
1469     nlmsg_end(msg, nlh);
1470     ib_device_put(device);
1471     return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1472 
1473 err_free:
1474     nlmsg_free(msg);
1475 err_get:
1476     rdma_restrack_put(res);
1477 err:
1478     ib_device_put(device);
1479     return ret;
1480 }
1481 
1482 static int res_get_common_dumpit(struct sk_buff *skb,
1483                  struct netlink_callback *cb,
1484                  enum rdma_restrack_type res_type,
1485                  res_fill_func_t fill_func)
1486 {
1487     const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1488     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1489     struct rdma_restrack_entry *res;
1490     struct rdma_restrack_root *rt;
1491     int err, ret = 0, idx = 0;
1492     struct nlattr *table_attr;
1493     struct nlattr *entry_attr;
1494     struct ib_device *device;
1495     int start = cb->args[0];
1496     bool has_cap_net_admin;
1497     struct nlmsghdr *nlh;
1498     unsigned long id;
1499     u32 index, port = 0;
1500     bool filled = false;
1501 
1502     err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1503                      nldev_policy, NULL);
1504     /*
1505      * Right now, we are expecting the device index to get res information,
1506      * but it is possible to extend this code to return all devices in
1507      * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1508      * if it doesn't exist, we will iterate over all devices.
1509      *
1510      * But it is not needed for now.
1511      */
1512     if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1513         return -EINVAL;
1514 
1515     index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1516     device = ib_device_get_by_index(sock_net(skb->sk), index);
1517     if (!device)
1518         return -EINVAL;
1519 
1520     /*
1521      * If no PORT_INDEX is supplied, we will return all QPs from that device
1522      */
1523     if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1524         port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1525         if (!rdma_is_port_valid(device, port)) {
1526             ret = -EINVAL;
1527             goto err_index;
1528         }
1529     }
1530 
1531     nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1532             RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1533                      RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
1534             0, NLM_F_MULTI);
1535 
1536     if (fill_nldev_handle(skb, device)) {
1537         ret = -EMSGSIZE;
1538         goto err;
1539     }
1540 
1541     table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1542     if (!table_attr) {
1543         ret = -EMSGSIZE;
1544         goto err;
1545     }
1546 
1547     has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1548 
1549     rt = &device->res[res_type];
1550     xa_lock(&rt->xa);
1551     /*
1552      * FIXME: if the skip ahead is something common this loop should
1553      * use xas_for_each & xas_pause to optimize, we can have a lot of
1554      * objects.
1555      */
1556     xa_for_each(&rt->xa, id, res) {
1557         if (idx < start || !rdma_restrack_get(res))
1558             goto next;
1559 
1560         xa_unlock(&rt->xa);
1561 
1562         filled = true;
1563 
1564         entry_attr = nla_nest_start_noflag(skb, fe->entry);
1565         if (!entry_attr) {
1566             ret = -EMSGSIZE;
1567             rdma_restrack_put(res);
1568             goto msg_full;
1569         }
1570 
1571         ret = fill_func(skb, has_cap_net_admin, res, port);
1572 
1573         rdma_restrack_put(res);
1574 
1575         if (ret) {
1576             nla_nest_cancel(skb, entry_attr);
1577             if (ret == -EMSGSIZE)
1578                 goto msg_full;
1579             if (ret == -EAGAIN)
1580                 goto again;
1581             goto res_err;
1582         }
1583         nla_nest_end(skb, entry_attr);
1584 again:      xa_lock(&rt->xa);
1585 next:       idx++;
1586     }
1587     xa_unlock(&rt->xa);
1588 
1589 msg_full:
1590     nla_nest_end(skb, table_attr);
1591     nlmsg_end(skb, nlh);
1592     cb->args[0] = idx;
1593 
1594     /*
1595      * No more entries to fill, cancel the message and
1596      * return 0 to mark end of dumpit.
1597      */
1598     if (!filled)
1599         goto err;
1600 
1601     ib_device_put(device);
1602     return skb->len;
1603 
1604 res_err:
1605     nla_nest_cancel(skb, table_attr);
1606 
1607 err:
1608     nlmsg_cancel(skb, nlh);
1609 
1610 err_index:
1611     ib_device_put(device);
1612     return ret;
1613 }
1614 
1615 #define RES_GET_FUNCS(name, type)                                              \
1616     static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
1617                          struct netlink_callback *cb)  \
1618     {                                                                      \
1619         return res_get_common_dumpit(skb, cb, type,                    \
1620                          fill_res_##name##_entry);         \
1621     }                                                                      \
1622     static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
1623                            struct nlmsghdr *nlh,           \
1624                            struct netlink_ext_ack *extack) \
1625     {                                                                      \
1626         return res_get_common_doit(skb, nlh, extack, type,             \
1627                        fill_res_##name##_entry);           \
1628     }
1629 
1630 RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1631 RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
1632 RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1633 RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1634 RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
1635 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1636 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1637 RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
1638 RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
1639 RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX);
1640 RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ);
1641 
1642 static LIST_HEAD(link_ops);
1643 static DECLARE_RWSEM(link_ops_rwsem);
1644 
1645 static const struct rdma_link_ops *link_ops_get(const char *type)
1646 {
1647     const struct rdma_link_ops *ops;
1648 
1649     list_for_each_entry(ops, &link_ops, list) {
1650         if (!strcmp(ops->type, type))
1651             goto out;
1652     }
1653     ops = NULL;
1654 out:
1655     return ops;
1656 }
1657 
1658 void rdma_link_register(struct rdma_link_ops *ops)
1659 {
1660     down_write(&link_ops_rwsem);
1661     if (WARN_ON_ONCE(link_ops_get(ops->type)))
1662         goto out;
1663     list_add(&ops->list, &link_ops);
1664 out:
1665     up_write(&link_ops_rwsem);
1666 }
1667 EXPORT_SYMBOL(rdma_link_register);
1668 
1669 void rdma_link_unregister(struct rdma_link_ops *ops)
1670 {
1671     down_write(&link_ops_rwsem);
1672     list_del(&ops->list);
1673     up_write(&link_ops_rwsem);
1674 }
1675 EXPORT_SYMBOL(rdma_link_unregister);
1676 
1677 static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1678               struct netlink_ext_ack *extack)
1679 {
1680     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1681     char ibdev_name[IB_DEVICE_NAME_MAX];
1682     const struct rdma_link_ops *ops;
1683     char ndev_name[IFNAMSIZ];
1684     struct net_device *ndev;
1685     char type[IFNAMSIZ];
1686     int err;
1687 
1688     err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1689                      nldev_policy, extack);
1690     if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1691         !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1692         return -EINVAL;
1693 
1694     nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1695             sizeof(ibdev_name));
1696     if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
1697         return -EINVAL;
1698 
1699     nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1700     nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1701             sizeof(ndev_name));
1702 
1703     ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
1704     if (!ndev)
1705         return -ENODEV;
1706 
1707     down_read(&link_ops_rwsem);
1708     ops = link_ops_get(type);
1709 #ifdef CONFIG_MODULES
1710     if (!ops) {
1711         up_read(&link_ops_rwsem);
1712         request_module("rdma-link-%s", type);
1713         down_read(&link_ops_rwsem);
1714         ops = link_ops_get(type);
1715     }
1716 #endif
1717     err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1718     up_read(&link_ops_rwsem);
1719     dev_put(ndev);
1720 
1721     return err;
1722 }
1723 
1724 static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1725               struct netlink_ext_ack *extack)
1726 {
1727     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1728     struct ib_device *device;
1729     u32 index;
1730     int err;
1731 
1732     err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1733                      nldev_policy, extack);
1734     if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1735         return -EINVAL;
1736 
1737     index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1738     device = ib_device_get_by_index(sock_net(skb->sk), index);
1739     if (!device)
1740         return -EINVAL;
1741 
1742     if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
1743         ib_device_put(device);
1744         return -EINVAL;
1745     }
1746 
1747     ib_unregister_device_and_put(device);
1748     return 0;
1749 }
1750 
1751 static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
1752                  struct netlink_ext_ack *extack)
1753 {
1754     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1755     char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
1756     struct ib_client_nl_info data = {};
1757     struct ib_device *ibdev = NULL;
1758     struct sk_buff *msg;
1759     u32 index;
1760     int err;
1761 
1762     err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
1763               extack);
1764     if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
1765         return -EINVAL;
1766 
1767     nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
1768             sizeof(client_name));
1769 
1770     if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
1771         index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1772         ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
1773         if (!ibdev)
1774             return -EINVAL;
1775 
1776         if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1777             data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1778             if (!rdma_is_port_valid(ibdev, data.port)) {
1779                 err = -EINVAL;
1780                 goto out_put;
1781             }
1782         } else {
1783             data.port = -1;
1784         }
1785     } else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1786         return -EINVAL;
1787     }
1788 
1789     msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1790     if (!msg) {
1791         err = -ENOMEM;
1792         goto out_put;
1793     }
1794     nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1795             RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1796                      RDMA_NLDEV_CMD_GET_CHARDEV),
1797             0, 0);
1798 
1799     data.nl_msg = msg;
1800     err = ib_get_client_nl_info(ibdev, client_name, &data);
1801     if (err)
1802         goto out_nlmsg;
1803 
1804     err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
1805                 huge_encode_dev(data.cdev->devt),
1806                 RDMA_NLDEV_ATTR_PAD);
1807     if (err)
1808         goto out_data;
1809     err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
1810                 RDMA_NLDEV_ATTR_PAD);
1811     if (err)
1812         goto out_data;
1813     if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
1814                dev_name(data.cdev))) {
1815         err = -EMSGSIZE;
1816         goto out_data;
1817     }
1818 
1819     nlmsg_end(msg, nlh);
1820     put_device(data.cdev);
1821     if (ibdev)
1822         ib_device_put(ibdev);
1823     return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1824 
1825 out_data:
1826     put_device(data.cdev);
1827 out_nlmsg:
1828     nlmsg_free(msg);
1829 out_put:
1830     if (ibdev)
1831         ib_device_put(ibdev);
1832     return err;
1833 }
1834 
1835 static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1836                   struct netlink_ext_ack *extack)
1837 {
1838     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1839     struct sk_buff *msg;
1840     int err;
1841 
1842     err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1843               nldev_policy, extack);
1844     if (err)
1845         return err;
1846 
1847     msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1848     if (!msg)
1849         return -ENOMEM;
1850 
1851     nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1852             RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1853                      RDMA_NLDEV_CMD_SYS_GET),
1854             0, 0);
1855 
1856     err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1857              (u8)ib_devices_shared_netns);
1858     if (err) {
1859         nlmsg_free(msg);
1860         return err;
1861     }
1862 
1863     /*
1864      * Copy-on-fork is supported.
1865      * See commits:
1866      * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes")
1867      * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm")
1868      * for more details. Don't backport this without them.
1869      *
1870      * Return value ignored on purpose, assume copy-on-fork is not
1871      * supported in case of failure.
1872      */
1873     nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1);
1874 
1875     nlmsg_end(msg, nlh);
1876     return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1877 }
1878 
1879 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1880                   struct netlink_ext_ack *extack)
1881 {
1882     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1883     u8 enable;
1884     int err;
1885 
1886     err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1887               nldev_policy, extack);
1888     if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
1889         return -EINVAL;
1890 
1891     enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
1892     /* Only 0 and 1 are supported */
1893     if (enable > 1)
1894         return -EINVAL;
1895 
1896     err = rdma_compatdev_set(enable);
1897     return err;
1898 }
1899 
1900 static int nldev_stat_set_mode_doit(struct sk_buff *msg,
1901                     struct netlink_ext_ack *extack,
1902                     struct nlattr *tb[],
1903                     struct ib_device *device, u32 port)
1904 {
1905     u32 mode, mask = 0, qpn, cntn = 0;
1906     int ret;
1907 
1908     /* Currently only counter for QP is supported */
1909     if (!tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1910         nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1911         return -EINVAL;
1912 
1913     mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
1914     if (mode == RDMA_COUNTER_MODE_AUTO) {
1915         if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
1916             mask = nla_get_u32(
1917                 tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
1918         return rdma_counter_set_auto_mode(device, port, mask, extack);
1919     }
1920 
1921     if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
1922         return -EINVAL;
1923 
1924     qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
1925     if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
1926         cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
1927         ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
1928         if (ret)
1929             return ret;
1930     } else {
1931         ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn);
1932         if (ret)
1933             return ret;
1934     }
1935 
1936     if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
1937         nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
1938         ret = -EMSGSIZE;
1939         goto err_fill;
1940     }
1941 
1942     return 0;
1943 
1944 err_fill:
1945     rdma_counter_unbind_qpn(device, port, qpn, cntn);
1946     return ret;
1947 }
1948 
1949 static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
1950                            struct ib_device *device,
1951                            u32 port)
1952 {
1953     struct rdma_hw_stats *stats;
1954     struct nlattr *entry_attr;
1955     unsigned long *target;
1956     int rem, i, ret = 0;
1957     u32 index;
1958 
1959     stats = ib_get_hw_stats_port(device, port);
1960     if (!stats)
1961         return -EINVAL;
1962 
1963     target = kcalloc(BITS_TO_LONGS(stats->num_counters),
1964              sizeof(*stats->is_disabled), GFP_KERNEL);
1965     if (!target)
1966         return -ENOMEM;
1967 
1968     nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS],
1969                 rem) {
1970         index = nla_get_u32(entry_attr);
1971         if ((index >= stats->num_counters) ||
1972             !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) {
1973             ret = -EINVAL;
1974             goto out;
1975         }
1976 
1977         set_bit(index, target);
1978     }
1979 
1980     for (i = 0; i < stats->num_counters; i++) {
1981         if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL))
1982             continue;
1983 
1984         ret = rdma_counter_modify(device, port, i, test_bit(i, target));
1985         if (ret)
1986             goto out;
1987     }
1988 
1989 out:
1990     kfree(target);
1991     return ret;
1992 }
1993 
1994 static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1995                    struct netlink_ext_ack *extack)
1996 {
1997     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1998     struct ib_device *device;
1999     struct sk_buff *msg;
2000     u32 index, port;
2001     int ret;
2002 
2003     ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
2004               extack);
2005     if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2006         !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2007         return -EINVAL;
2008 
2009     index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2010     device = ib_device_get_by_index(sock_net(skb->sk), index);
2011     if (!device)
2012         return -EINVAL;
2013 
2014     port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2015     if (!rdma_is_port_valid(device, port)) {
2016         ret = -EINVAL;
2017         goto err_put_device;
2018     }
2019 
2020     if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] &&
2021         !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
2022         ret = -EINVAL;
2023         goto err_put_device;
2024     }
2025 
2026     msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2027     if (!msg) {
2028         ret = -ENOMEM;
2029         goto err_put_device;
2030     }
2031     nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2032             RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2033                      RDMA_NLDEV_CMD_STAT_SET),
2034             0, 0);
2035     if (fill_nldev_handle(msg, device) ||
2036         nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2037         ret = -EMSGSIZE;
2038         goto err_free_msg;
2039     }
2040 
2041     if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
2042         ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port);
2043         if (ret)
2044             goto err_free_msg;
2045     }
2046 
2047     if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
2048         ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
2049         if (ret)
2050             goto err_free_msg;
2051     }
2052 
2053     nlmsg_end(msg, nlh);
2054     ib_device_put(device);
2055     return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2056 
2057 err_free_msg:
2058     nlmsg_free(msg);
2059 err_put_device:
2060     ib_device_put(device);
2061     return ret;
2062 }
2063 
2064 static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2065                    struct netlink_ext_ack *extack)
2066 {
2067     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2068     struct ib_device *device;
2069     struct sk_buff *msg;
2070     u32 index, port, qpn, cntn;
2071     int ret;
2072 
2073     ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2074               nldev_policy, extack);
2075     if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
2076         !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
2077         !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
2078         !tb[RDMA_NLDEV_ATTR_RES_LQPN])
2079         return -EINVAL;
2080 
2081     if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
2082         return -EINVAL;
2083 
2084     index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2085     device = ib_device_get_by_index(sock_net(skb->sk), index);
2086     if (!device)
2087         return -EINVAL;
2088 
2089     port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2090     if (!rdma_is_port_valid(device, port)) {
2091         ret = -EINVAL;
2092         goto err;
2093     }
2094 
2095     msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2096     if (!msg) {
2097         ret = -ENOMEM;
2098         goto err;
2099     }
2100     nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2101             RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2102                      RDMA_NLDEV_CMD_STAT_SET),
2103             0, 0);
2104 
2105     cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
2106     qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
2107     if (fill_nldev_handle(msg, device) ||
2108         nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2109         nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
2110         nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
2111         ret = -EMSGSIZE;
2112         goto err_fill;
2113     }
2114 
2115     ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
2116     if (ret)
2117         goto err_fill;
2118 
2119     nlmsg_end(msg, nlh);
2120     ib_device_put(device);
2121     return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2122 
2123 err_fill:
2124     nlmsg_free(msg);
2125 err:
2126     ib_device_put(device);
2127     return ret;
2128 }
2129 
2130 static int stat_get_doit_default_counter(struct sk_buff *skb,
2131                      struct nlmsghdr *nlh,
2132                      struct netlink_ext_ack *extack,
2133                      struct nlattr *tb[])
2134 {
2135     struct rdma_hw_stats *stats;
2136     struct nlattr *table_attr;
2137     struct ib_device *device;
2138     int ret, num_cnts, i;
2139     struct sk_buff *msg;
2140     u32 index, port;
2141     u64 v;
2142 
2143     if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2144         return -EINVAL;
2145 
2146     index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2147     device = ib_device_get_by_index(sock_net(skb->sk), index);
2148     if (!device)
2149         return -EINVAL;
2150 
2151     if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) {
2152         ret = -EINVAL;
2153         goto err;
2154     }
2155 
2156     port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2157     stats = ib_get_hw_stats_port(device, port);
2158     if (!stats) {
2159         ret = -EINVAL;
2160         goto err;
2161     }
2162 
2163     msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2164     if (!msg) {
2165         ret = -ENOMEM;
2166         goto err;
2167     }
2168 
2169     nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2170             RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2171                      RDMA_NLDEV_CMD_STAT_GET),
2172             0, 0);
2173 
2174     if (fill_nldev_handle(msg, device) ||
2175         nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
2176         ret = -EMSGSIZE;
2177         goto err_msg;
2178     }
2179 
2180     mutex_lock(&stats->lock);
2181 
2182     num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
2183     if (num_cnts < 0) {
2184         ret = -EINVAL;
2185         goto err_stats;
2186     }
2187 
2188     table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2189     if (!table_attr) {
2190         ret = -EMSGSIZE;
2191         goto err_stats;
2192     }
2193     for (i = 0; i < num_cnts; i++) {
2194         if (test_bit(i, stats->is_disabled))
2195             continue;
2196 
2197         v = stats->value[i] +
2198             rdma_counter_get_hwstat_value(device, port, i);
2199         if (rdma_nl_stat_hwcounter_entry(msg,
2200                          stats->descs[i].name, v)) {
2201             ret = -EMSGSIZE;
2202             goto err_table;
2203         }
2204     }
2205     nla_nest_end(msg, table_attr);
2206 
2207     mutex_unlock(&stats->lock);
2208     nlmsg_end(msg, nlh);
2209     ib_device_put(device);
2210     return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2211 
2212 err_table:
2213     nla_nest_cancel(msg, table_attr);
2214 err_stats:
2215     mutex_unlock(&stats->lock);
2216 err_msg:
2217     nlmsg_free(msg);
2218 err:
2219     ib_device_put(device);
2220     return ret;
2221 }
2222 
2223 static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
2224                 struct netlink_ext_ack *extack, struct nlattr *tb[])
2225 
2226 {
2227     static enum rdma_nl_counter_mode mode;
2228     static enum rdma_nl_counter_mask mask;
2229     struct ib_device *device;
2230     struct sk_buff *msg;
2231     u32 index, port;
2232     int ret;
2233 
2234     if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
2235         return nldev_res_get_counter_doit(skb, nlh, extack);
2236 
2237     if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
2238         !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2239         return -EINVAL;
2240 
2241     index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2242     device = ib_device_get_by_index(sock_net(skb->sk), index);
2243     if (!device)
2244         return -EINVAL;
2245 
2246     port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2247     if (!rdma_is_port_valid(device, port)) {
2248         ret = -EINVAL;
2249         goto err;
2250     }
2251 
2252     msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2253     if (!msg) {
2254         ret = -ENOMEM;
2255         goto err;
2256     }
2257 
2258     nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2259             RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2260                      RDMA_NLDEV_CMD_STAT_GET),
2261             0, 0);
2262 
2263     ret = rdma_counter_get_mode(device, port, &mode, &mask);
2264     if (ret)
2265         goto err_msg;
2266 
2267     if (fill_nldev_handle(msg, device) ||
2268         nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2269         nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
2270         ret = -EMSGSIZE;
2271         goto err_msg;
2272     }
2273 
2274     if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2275         nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
2276         ret = -EMSGSIZE;
2277         goto err_msg;
2278     }
2279 
2280     nlmsg_end(msg, nlh);
2281     ib_device_put(device);
2282     return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2283 
2284 err_msg:
2285     nlmsg_free(msg);
2286 err:
2287     ib_device_put(device);
2288     return ret;
2289 }
2290 
2291 static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2292                    struct netlink_ext_ack *extack)
2293 {
2294     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2295     int ret;
2296 
2297     ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2298               nldev_policy, extack);
2299     if (ret)
2300         return -EINVAL;
2301 
2302     if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
2303         return stat_get_doit_default_counter(skb, nlh, extack, tb);
2304 
2305     switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2306     case RDMA_NLDEV_ATTR_RES_QP:
2307         ret = stat_get_doit_qp(skb, nlh, extack, tb);
2308         break;
2309     case RDMA_NLDEV_ATTR_RES_MR:
2310         ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
2311                       fill_stat_mr_entry);
2312         break;
2313     default:
2314         ret = -EINVAL;
2315         break;
2316     }
2317 
2318     return ret;
2319 }
2320 
2321 static int nldev_stat_get_dumpit(struct sk_buff *skb,
2322                  struct netlink_callback *cb)
2323 {
2324     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2325     int ret;
2326 
2327     ret = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2328               nldev_policy, NULL);
2329     if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
2330         return -EINVAL;
2331 
2332     switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2333     case RDMA_NLDEV_ATTR_RES_QP:
2334         ret = nldev_res_get_counter_dumpit(skb, cb);
2335         break;
2336     case RDMA_NLDEV_ATTR_RES_MR:
2337         ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
2338                         fill_stat_mr_entry);
2339         break;
2340     default:
2341         ret = -EINVAL;
2342         break;
2343     }
2344 
2345     return ret;
2346 }
2347 
2348 static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
2349                           struct nlmsghdr *nlh,
2350                           struct netlink_ext_ack *extack)
2351 {
2352     struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry;
2353     struct rdma_hw_stats *stats;
2354     struct ib_device *device;
2355     struct sk_buff *msg;
2356     u32 devid, port;
2357     int ret, i;
2358 
2359     ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2360               nldev_policy, extack);
2361     if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
2362         !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
2363         return -EINVAL;
2364 
2365     devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
2366     device = ib_device_get_by_index(sock_net(skb->sk), devid);
2367     if (!device)
2368         return -EINVAL;
2369 
2370     port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
2371     if (!rdma_is_port_valid(device, port)) {
2372         ret = -EINVAL;
2373         goto err;
2374     }
2375 
2376     stats = ib_get_hw_stats_port(device, port);
2377     if (!stats) {
2378         ret = -EINVAL;
2379         goto err;
2380     }
2381 
2382     msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2383     if (!msg) {
2384         ret = -ENOMEM;
2385         goto err;
2386     }
2387 
2388     nlh = nlmsg_put(
2389         msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2390         RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS),
2391         0, 0);
2392 
2393     ret = -EMSGSIZE;
2394     if (fill_nldev_handle(msg, device) ||
2395         nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
2396         goto err_msg;
2397 
2398     table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
2399     if (!table)
2400         goto err_msg;
2401 
2402     mutex_lock(&stats->lock);
2403     for (i = 0; i < stats->num_counters; i++) {
2404         entry = nla_nest_start(msg,
2405                        RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
2406         if (!entry)
2407             goto err_msg_table;
2408 
2409         if (nla_put_string(msg,
2410                    RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
2411                    stats->descs[i].name) ||
2412             nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i))
2413             goto err_msg_entry;
2414 
2415         if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) &&
2416             (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC,
2417                 !test_bit(i, stats->is_disabled))))
2418             goto err_msg_entry;
2419 
2420         nla_nest_end(msg, entry);
2421     }
2422     mutex_unlock(&stats->lock);
2423 
2424     nla_nest_end(msg, table);
2425     nlmsg_end(msg, nlh);
2426     ib_device_put(device);
2427     return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2428 
2429 err_msg_entry:
2430     nla_nest_cancel(msg, entry);
2431 err_msg_table:
2432     mutex_unlock(&stats->lock);
2433     nla_nest_cancel(msg, table);
2434 err_msg:
2435     nlmsg_free(msg);
2436 err:
2437     ib_device_put(device);
2438     return ret;
2439 }
2440 
2441 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
2442     [RDMA_NLDEV_CMD_GET] = {
2443         .doit = nldev_get_doit,
2444         .dump = nldev_get_dumpit,
2445     },
2446     [RDMA_NLDEV_CMD_GET_CHARDEV] = {
2447         .doit = nldev_get_chardev,
2448     },
2449     [RDMA_NLDEV_CMD_SET] = {
2450         .doit = nldev_set_doit,
2451         .flags = RDMA_NL_ADMIN_PERM,
2452     },
2453     [RDMA_NLDEV_CMD_NEWLINK] = {
2454         .doit = nldev_newlink,
2455         .flags = RDMA_NL_ADMIN_PERM,
2456     },
2457     [RDMA_NLDEV_CMD_DELLINK] = {
2458         .doit = nldev_dellink,
2459         .flags = RDMA_NL_ADMIN_PERM,
2460     },
2461     [RDMA_NLDEV_CMD_PORT_GET] = {
2462         .doit = nldev_port_get_doit,
2463         .dump = nldev_port_get_dumpit,
2464     },
2465     [RDMA_NLDEV_CMD_RES_GET] = {
2466         .doit = nldev_res_get_doit,
2467         .dump = nldev_res_get_dumpit,
2468     },
2469     [RDMA_NLDEV_CMD_RES_QP_GET] = {
2470         .doit = nldev_res_get_qp_doit,
2471         .dump = nldev_res_get_qp_dumpit,
2472     },
2473     [RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
2474         .doit = nldev_res_get_cm_id_doit,
2475         .dump = nldev_res_get_cm_id_dumpit,
2476     },
2477     [RDMA_NLDEV_CMD_RES_CQ_GET] = {
2478         .doit = nldev_res_get_cq_doit,
2479         .dump = nldev_res_get_cq_dumpit,
2480     },
2481     [RDMA_NLDEV_CMD_RES_MR_GET] = {
2482         .doit = nldev_res_get_mr_doit,
2483         .dump = nldev_res_get_mr_dumpit,
2484     },
2485     [RDMA_NLDEV_CMD_RES_PD_GET] = {
2486         .doit = nldev_res_get_pd_doit,
2487         .dump = nldev_res_get_pd_dumpit,
2488     },
2489     [RDMA_NLDEV_CMD_RES_CTX_GET] = {
2490         .doit = nldev_res_get_ctx_doit,
2491         .dump = nldev_res_get_ctx_dumpit,
2492     },
2493     [RDMA_NLDEV_CMD_RES_SRQ_GET] = {
2494         .doit = nldev_res_get_srq_doit,
2495         .dump = nldev_res_get_srq_dumpit,
2496     },
2497     [RDMA_NLDEV_CMD_SYS_GET] = {
2498         .doit = nldev_sys_get_doit,
2499     },
2500     [RDMA_NLDEV_CMD_SYS_SET] = {
2501         .doit = nldev_set_sys_set_doit,
2502     },
2503     [RDMA_NLDEV_CMD_STAT_SET] = {
2504         .doit = nldev_stat_set_doit,
2505         .flags = RDMA_NL_ADMIN_PERM,
2506     },
2507     [RDMA_NLDEV_CMD_STAT_GET] = {
2508         .doit = nldev_stat_get_doit,
2509         .dump = nldev_stat_get_dumpit,
2510     },
2511     [RDMA_NLDEV_CMD_STAT_DEL] = {
2512         .doit = nldev_stat_del_doit,
2513         .flags = RDMA_NL_ADMIN_PERM,
2514     },
2515     [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
2516         .doit = nldev_res_get_qp_raw_doit,
2517         .dump = nldev_res_get_qp_raw_dumpit,
2518         .flags = RDMA_NL_ADMIN_PERM,
2519     },
2520     [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
2521         .doit = nldev_res_get_cq_raw_doit,
2522         .dump = nldev_res_get_cq_raw_dumpit,
2523         .flags = RDMA_NL_ADMIN_PERM,
2524     },
2525     [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
2526         .doit = nldev_res_get_mr_raw_doit,
2527         .dump = nldev_res_get_mr_raw_dumpit,
2528         .flags = RDMA_NL_ADMIN_PERM,
2529     },
2530     [RDMA_NLDEV_CMD_STAT_GET_STATUS] = {
2531         .doit = nldev_stat_get_counter_status_doit,
2532     },
2533 };
2534 
2535 void __init nldev_init(void)
2536 {
2537     rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
2538 }
2539 
2540 void __exit nldev_exit(void)
2541 {
2542     rdma_nl_unregister(RDMA_NL_NLDEV);
2543 }
2544 
2545 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);