Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
0003  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
0004  * Copyright (c) 2006 Intel Corporation.  All rights reserved.
0005  *
0006  * This software is available to you under a choice of one of two
0007  * licenses.  You may choose to be licensed under the terms of the GNU
0008  * General Public License (GPL) Version 2, available from the file
0009  * COPYING in the main directory of this source tree, or the
0010  * OpenIB.org BSD license below:
0011  *
0012  *     Redistribution and use in source and binary forms, with or
0013  *     without modification, are permitted provided that the following
0014  *     conditions are met:
0015  *
0016  *      - Redistributions of source code must retain the above
0017  *        copyright notice, this list of conditions and the following
0018  *        disclaimer.
0019  *
0020  *      - Redistributions in binary form must reproduce the above
0021  *        copyright notice, this list of conditions and the following
0022  *        disclaimer in the documentation and/or other materials
0023  *        provided with the distribution.
0024  *
0025  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0026  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0027  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0028  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0029  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0030  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0031  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0032  * SOFTWARE.
0033  */
0034 
0035 #include <linux/init.h>
0036 #include <linux/err.h>
0037 #include <linux/random.h>
0038 #include <linux/spinlock.h>
0039 #include <linux/slab.h>
0040 #include <linux/dma-mapping.h>
0041 #include <linux/kref.h>
0042 #include <linux/xarray.h>
0043 #include <linux/workqueue.h>
0044 #include <uapi/linux/if_ether.h>
0045 #include <rdma/ib_pack.h>
0046 #include <rdma/ib_cache.h>
0047 #include <rdma/rdma_netlink.h>
0048 #include <net/netlink.h>
0049 #include <uapi/rdma/ib_user_sa.h>
0050 #include <rdma/ib_marshall.h>
0051 #include <rdma/ib_addr.h>
0052 #include <rdma/opa_addr.h>
0053 #include "sa.h"
0054 #include "core_priv.h"
0055 
0056 #define IB_SA_LOCAL_SVC_TIMEOUT_MIN     100
0057 #define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT     2000
0058 #define IB_SA_LOCAL_SVC_TIMEOUT_MAX     200000
0059 #define IB_SA_CPI_MAX_RETRY_CNT         3
0060 #define IB_SA_CPI_RETRY_WAIT            1000 /*msecs */
0061 static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
0062 
0063 struct ib_sa_sm_ah {
0064     struct ib_ah        *ah;
0065     struct kref          ref;
0066     u16          pkey_index;
0067     u8           src_path_mask;
0068 };
0069 
0070 enum rdma_class_port_info_type {
0071     RDMA_CLASS_PORT_INFO_IB,
0072     RDMA_CLASS_PORT_INFO_OPA
0073 };
0074 
0075 struct rdma_class_port_info {
0076     enum rdma_class_port_info_type type;
0077     union {
0078         struct ib_class_port_info ib;
0079         struct opa_class_port_info opa;
0080     };
0081 };
0082 
0083 struct ib_sa_classport_cache {
0084     bool valid;
0085     int retry_cnt;
0086     struct rdma_class_port_info data;
0087 };
0088 
0089 struct ib_sa_port {
0090     struct ib_mad_agent *agent;
0091     struct ib_sa_sm_ah  *sm_ah;
0092     struct work_struct   update_task;
0093     struct ib_sa_classport_cache classport_info;
0094     struct delayed_work ib_cpi_work;
0095     spinlock_t                   classport_lock; /* protects class port info set */
0096     spinlock_t           ah_lock;
0097     u32          port_num;
0098 };
0099 
0100 struct ib_sa_device {
0101     int                     start_port, end_port;
0102     struct ib_event_handler event_handler;
0103     struct ib_sa_port port[];
0104 };
0105 
0106 struct ib_sa_query {
0107     void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
0108     void (*release)(struct ib_sa_query *);
0109     struct ib_sa_client    *client;
0110     struct ib_sa_port      *port;
0111     struct ib_mad_send_buf *mad_buf;
0112     struct ib_sa_sm_ah     *sm_ah;
0113     int         id;
0114     u32         flags;
0115     struct list_head    list; /* Local svc request list */
0116     u32         seq; /* Local svc request sequence number */
0117     unsigned long       timeout; /* Local svc timeout */
0118     u8          path_use; /* How will the pathrecord be used */
0119 };
0120 
0121 #define IB_SA_ENABLE_LOCAL_SERVICE  0x00000001
0122 #define IB_SA_CANCEL            0x00000002
0123 #define IB_SA_QUERY_OPA         0x00000004
0124 
0125 struct ib_sa_path_query {
0126     void (*callback)(int, struct sa_path_rec *, void *);
0127     void *context;
0128     struct ib_sa_query sa_query;
0129     struct sa_path_rec *conv_pr;
0130 };
0131 
0132 struct ib_sa_guidinfo_query {
0133     void (*callback)(int, struct ib_sa_guidinfo_rec *, void *);
0134     void *context;
0135     struct ib_sa_query sa_query;
0136 };
0137 
0138 struct ib_sa_classport_info_query {
0139     void (*callback)(void *);
0140     void *context;
0141     struct ib_sa_query sa_query;
0142 };
0143 
0144 struct ib_sa_mcmember_query {
0145     void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
0146     void *context;
0147     struct ib_sa_query sa_query;
0148 };
0149 
0150 static LIST_HEAD(ib_nl_request_list);
0151 static DEFINE_SPINLOCK(ib_nl_request_lock);
0152 static atomic_t ib_nl_sa_request_seq;
0153 static struct workqueue_struct *ib_nl_wq;
0154 static struct delayed_work ib_nl_timed_work;
0155 static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = {
0156     [LS_NLA_TYPE_PATH_RECORD]   = {.type = NLA_BINARY,
0157         .len = sizeof(struct ib_path_rec_data)},
0158     [LS_NLA_TYPE_TIMEOUT]       = {.type = NLA_U32},
0159     [LS_NLA_TYPE_SERVICE_ID]    = {.type = NLA_U64},
0160     [LS_NLA_TYPE_DGID]      = {.type = NLA_BINARY,
0161         .len = sizeof(struct rdma_nla_ls_gid)},
0162     [LS_NLA_TYPE_SGID]      = {.type = NLA_BINARY,
0163         .len = sizeof(struct rdma_nla_ls_gid)},
0164     [LS_NLA_TYPE_TCLASS]        = {.type = NLA_U8},
0165     [LS_NLA_TYPE_PKEY]      = {.type = NLA_U16},
0166     [LS_NLA_TYPE_QOS_CLASS]     = {.type = NLA_U16},
0167 };
0168 
0169 
0170 static int ib_sa_add_one(struct ib_device *device);
0171 static void ib_sa_remove_one(struct ib_device *device, void *client_data);
0172 
0173 static struct ib_client sa_client = {
0174     .name   = "sa",
0175     .add    = ib_sa_add_one,
0176     .remove = ib_sa_remove_one
0177 };
0178 
0179 static DEFINE_XARRAY_FLAGS(queries, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
0180 
0181 static DEFINE_SPINLOCK(tid_lock);
0182 static u32 tid;
0183 
0184 #define PATH_REC_FIELD(field) \
0185     .struct_offset_bytes = offsetof(struct sa_path_rec, field), \
0186     .struct_size_bytes   = sizeof_field(struct sa_path_rec, field), \
0187     .field_name          = "sa_path_rec:" #field
0188 
0189 static const struct ib_field path_rec_table[] = {
0190     { PATH_REC_FIELD(service_id),
0191       .offset_words = 0,
0192       .offset_bits  = 0,
0193       .size_bits    = 64 },
0194     { PATH_REC_FIELD(dgid),
0195       .offset_words = 2,
0196       .offset_bits  = 0,
0197       .size_bits    = 128 },
0198     { PATH_REC_FIELD(sgid),
0199       .offset_words = 6,
0200       .offset_bits  = 0,
0201       .size_bits    = 128 },
0202     { PATH_REC_FIELD(ib.dlid),
0203       .offset_words = 10,
0204       .offset_bits  = 0,
0205       .size_bits    = 16 },
0206     { PATH_REC_FIELD(ib.slid),
0207       .offset_words = 10,
0208       .offset_bits  = 16,
0209       .size_bits    = 16 },
0210     { PATH_REC_FIELD(ib.raw_traffic),
0211       .offset_words = 11,
0212       .offset_bits  = 0,
0213       .size_bits    = 1 },
0214     { RESERVED,
0215       .offset_words = 11,
0216       .offset_bits  = 1,
0217       .size_bits    = 3 },
0218     { PATH_REC_FIELD(flow_label),
0219       .offset_words = 11,
0220       .offset_bits  = 4,
0221       .size_bits    = 20 },
0222     { PATH_REC_FIELD(hop_limit),
0223       .offset_words = 11,
0224       .offset_bits  = 24,
0225       .size_bits    = 8 },
0226     { PATH_REC_FIELD(traffic_class),
0227       .offset_words = 12,
0228       .offset_bits  = 0,
0229       .size_bits    = 8 },
0230     { PATH_REC_FIELD(reversible),
0231       .offset_words = 12,
0232       .offset_bits  = 8,
0233       .size_bits    = 1 },
0234     { PATH_REC_FIELD(numb_path),
0235       .offset_words = 12,
0236       .offset_bits  = 9,
0237       .size_bits    = 7 },
0238     { PATH_REC_FIELD(pkey),
0239       .offset_words = 12,
0240       .offset_bits  = 16,
0241       .size_bits    = 16 },
0242     { PATH_REC_FIELD(qos_class),
0243       .offset_words = 13,
0244       .offset_bits  = 0,
0245       .size_bits    = 12 },
0246     { PATH_REC_FIELD(sl),
0247       .offset_words = 13,
0248       .offset_bits  = 12,
0249       .size_bits    = 4 },
0250     { PATH_REC_FIELD(mtu_selector),
0251       .offset_words = 13,
0252       .offset_bits  = 16,
0253       .size_bits    = 2 },
0254     { PATH_REC_FIELD(mtu),
0255       .offset_words = 13,
0256       .offset_bits  = 18,
0257       .size_bits    = 6 },
0258     { PATH_REC_FIELD(rate_selector),
0259       .offset_words = 13,
0260       .offset_bits  = 24,
0261       .size_bits    = 2 },
0262     { PATH_REC_FIELD(rate),
0263       .offset_words = 13,
0264       .offset_bits  = 26,
0265       .size_bits    = 6 },
0266     { PATH_REC_FIELD(packet_life_time_selector),
0267       .offset_words = 14,
0268       .offset_bits  = 0,
0269       .size_bits    = 2 },
0270     { PATH_REC_FIELD(packet_life_time),
0271       .offset_words = 14,
0272       .offset_bits  = 2,
0273       .size_bits    = 6 },
0274     { PATH_REC_FIELD(preference),
0275       .offset_words = 14,
0276       .offset_bits  = 8,
0277       .size_bits    = 8 },
0278     { RESERVED,
0279       .offset_words = 14,
0280       .offset_bits  = 16,
0281       .size_bits    = 48 },
0282 };
0283 
0284 #define OPA_PATH_REC_FIELD(field) \
0285     .struct_offset_bytes = \
0286         offsetof(struct sa_path_rec, field), \
0287     .struct_size_bytes   = \
0288         sizeof_field(struct sa_path_rec, field),    \
0289     .field_name          = "sa_path_rec:" #field
0290 
0291 static const struct ib_field opa_path_rec_table[] = {
0292     { OPA_PATH_REC_FIELD(service_id),
0293       .offset_words = 0,
0294       .offset_bits  = 0,
0295       .size_bits    = 64 },
0296     { OPA_PATH_REC_FIELD(dgid),
0297       .offset_words = 2,
0298       .offset_bits  = 0,
0299       .size_bits    = 128 },
0300     { OPA_PATH_REC_FIELD(sgid),
0301       .offset_words = 6,
0302       .offset_bits  = 0,
0303       .size_bits    = 128 },
0304     { OPA_PATH_REC_FIELD(opa.dlid),
0305       .offset_words = 10,
0306       .offset_bits  = 0,
0307       .size_bits    = 32 },
0308     { OPA_PATH_REC_FIELD(opa.slid),
0309       .offset_words = 11,
0310       .offset_bits  = 0,
0311       .size_bits    = 32 },
0312     { OPA_PATH_REC_FIELD(opa.raw_traffic),
0313       .offset_words = 12,
0314       .offset_bits  = 0,
0315       .size_bits    = 1 },
0316     { RESERVED,
0317       .offset_words = 12,
0318       .offset_bits  = 1,
0319       .size_bits    = 3 },
0320     { OPA_PATH_REC_FIELD(flow_label),
0321       .offset_words = 12,
0322       .offset_bits  = 4,
0323       .size_bits    = 20 },
0324     { OPA_PATH_REC_FIELD(hop_limit),
0325       .offset_words = 12,
0326       .offset_bits  = 24,
0327       .size_bits    = 8 },
0328     { OPA_PATH_REC_FIELD(traffic_class),
0329       .offset_words = 13,
0330       .offset_bits  = 0,
0331       .size_bits    = 8 },
0332     { OPA_PATH_REC_FIELD(reversible),
0333       .offset_words = 13,
0334       .offset_bits  = 8,
0335       .size_bits    = 1 },
0336     { OPA_PATH_REC_FIELD(numb_path),
0337       .offset_words = 13,
0338       .offset_bits  = 9,
0339       .size_bits    = 7 },
0340     { OPA_PATH_REC_FIELD(pkey),
0341       .offset_words = 13,
0342       .offset_bits  = 16,
0343       .size_bits    = 16 },
0344     { OPA_PATH_REC_FIELD(opa.l2_8B),
0345       .offset_words = 14,
0346       .offset_bits  = 0,
0347       .size_bits    = 1 },
0348     { OPA_PATH_REC_FIELD(opa.l2_10B),
0349       .offset_words = 14,
0350       .offset_bits  = 1,
0351       .size_bits    = 1 },
0352     { OPA_PATH_REC_FIELD(opa.l2_9B),
0353       .offset_words = 14,
0354       .offset_bits  = 2,
0355       .size_bits    = 1 },
0356     { OPA_PATH_REC_FIELD(opa.l2_16B),
0357       .offset_words = 14,
0358       .offset_bits  = 3,
0359       .size_bits    = 1 },
0360     { RESERVED,
0361       .offset_words = 14,
0362       .offset_bits  = 4,
0363       .size_bits    = 2 },
0364     { OPA_PATH_REC_FIELD(opa.qos_type),
0365       .offset_words = 14,
0366       .offset_bits  = 6,
0367       .size_bits    = 2 },
0368     { OPA_PATH_REC_FIELD(opa.qos_priority),
0369       .offset_words = 14,
0370       .offset_bits  = 8,
0371       .size_bits    = 8 },
0372     { RESERVED,
0373       .offset_words = 14,
0374       .offset_bits  = 16,
0375       .size_bits    = 3 },
0376     { OPA_PATH_REC_FIELD(sl),
0377       .offset_words = 14,
0378       .offset_bits  = 19,
0379       .size_bits    = 5 },
0380     { RESERVED,
0381       .offset_words = 14,
0382       .offset_bits  = 24,
0383       .size_bits    = 8 },
0384     { OPA_PATH_REC_FIELD(mtu_selector),
0385       .offset_words = 15,
0386       .offset_bits  = 0,
0387       .size_bits    = 2 },
0388     { OPA_PATH_REC_FIELD(mtu),
0389       .offset_words = 15,
0390       .offset_bits  = 2,
0391       .size_bits    = 6 },
0392     { OPA_PATH_REC_FIELD(rate_selector),
0393       .offset_words = 15,
0394       .offset_bits  = 8,
0395       .size_bits    = 2 },
0396     { OPA_PATH_REC_FIELD(rate),
0397       .offset_words = 15,
0398       .offset_bits  = 10,
0399       .size_bits    = 6 },
0400     { OPA_PATH_REC_FIELD(packet_life_time_selector),
0401       .offset_words = 15,
0402       .offset_bits  = 16,
0403       .size_bits    = 2 },
0404     { OPA_PATH_REC_FIELD(packet_life_time),
0405       .offset_words = 15,
0406       .offset_bits  = 18,
0407       .size_bits    = 6 },
0408     { OPA_PATH_REC_FIELD(preference),
0409       .offset_words = 15,
0410       .offset_bits  = 24,
0411       .size_bits    = 8 },
0412 };
0413 
0414 #define MCMEMBER_REC_FIELD(field) \
0415     .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field),  \
0416     .struct_size_bytes   = sizeof_field(struct ib_sa_mcmember_rec, field),  \
0417     .field_name          = "sa_mcmember_rec:" #field
0418 
0419 static const struct ib_field mcmember_rec_table[] = {
0420     { MCMEMBER_REC_FIELD(mgid),
0421       .offset_words = 0,
0422       .offset_bits  = 0,
0423       .size_bits    = 128 },
0424     { MCMEMBER_REC_FIELD(port_gid),
0425       .offset_words = 4,
0426       .offset_bits  = 0,
0427       .size_bits    = 128 },
0428     { MCMEMBER_REC_FIELD(qkey),
0429       .offset_words = 8,
0430       .offset_bits  = 0,
0431       .size_bits    = 32 },
0432     { MCMEMBER_REC_FIELD(mlid),
0433       .offset_words = 9,
0434       .offset_bits  = 0,
0435       .size_bits    = 16 },
0436     { MCMEMBER_REC_FIELD(mtu_selector),
0437       .offset_words = 9,
0438       .offset_bits  = 16,
0439       .size_bits    = 2 },
0440     { MCMEMBER_REC_FIELD(mtu),
0441       .offset_words = 9,
0442       .offset_bits  = 18,
0443       .size_bits    = 6 },
0444     { MCMEMBER_REC_FIELD(traffic_class),
0445       .offset_words = 9,
0446       .offset_bits  = 24,
0447       .size_bits    = 8 },
0448     { MCMEMBER_REC_FIELD(pkey),
0449       .offset_words = 10,
0450       .offset_bits  = 0,
0451       .size_bits    = 16 },
0452     { MCMEMBER_REC_FIELD(rate_selector),
0453       .offset_words = 10,
0454       .offset_bits  = 16,
0455       .size_bits    = 2 },
0456     { MCMEMBER_REC_FIELD(rate),
0457       .offset_words = 10,
0458       .offset_bits  = 18,
0459       .size_bits    = 6 },
0460     { MCMEMBER_REC_FIELD(packet_life_time_selector),
0461       .offset_words = 10,
0462       .offset_bits  = 24,
0463       .size_bits    = 2 },
0464     { MCMEMBER_REC_FIELD(packet_life_time),
0465       .offset_words = 10,
0466       .offset_bits  = 26,
0467       .size_bits    = 6 },
0468     { MCMEMBER_REC_FIELD(sl),
0469       .offset_words = 11,
0470       .offset_bits  = 0,
0471       .size_bits    = 4 },
0472     { MCMEMBER_REC_FIELD(flow_label),
0473       .offset_words = 11,
0474       .offset_bits  = 4,
0475       .size_bits    = 20 },
0476     { MCMEMBER_REC_FIELD(hop_limit),
0477       .offset_words = 11,
0478       .offset_bits  = 24,
0479       .size_bits    = 8 },
0480     { MCMEMBER_REC_FIELD(scope),
0481       .offset_words = 12,
0482       .offset_bits  = 0,
0483       .size_bits    = 4 },
0484     { MCMEMBER_REC_FIELD(join_state),
0485       .offset_words = 12,
0486       .offset_bits  = 4,
0487       .size_bits    = 4 },
0488     { MCMEMBER_REC_FIELD(proxy_join),
0489       .offset_words = 12,
0490       .offset_bits  = 8,
0491       .size_bits    = 1 },
0492     { RESERVED,
0493       .offset_words = 12,
0494       .offset_bits  = 9,
0495       .size_bits    = 23 },
0496 };
0497 
0498 #define CLASSPORTINFO_REC_FIELD(field) \
0499     .struct_offset_bytes = offsetof(struct ib_class_port_info, field),  \
0500     .struct_size_bytes   = sizeof_field(struct ib_class_port_info, field),  \
0501     .field_name          = "ib_class_port_info:" #field
0502 
0503 static const struct ib_field ib_classport_info_rec_table[] = {
0504     { CLASSPORTINFO_REC_FIELD(base_version),
0505       .offset_words = 0,
0506       .offset_bits  = 0,
0507       .size_bits    = 8 },
0508     { CLASSPORTINFO_REC_FIELD(class_version),
0509       .offset_words = 0,
0510       .offset_bits  = 8,
0511       .size_bits    = 8 },
0512     { CLASSPORTINFO_REC_FIELD(capability_mask),
0513       .offset_words = 0,
0514       .offset_bits  = 16,
0515       .size_bits    = 16 },
0516     { CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
0517       .offset_words = 1,
0518       .offset_bits  = 0,
0519       .size_bits    = 32 },
0520     { CLASSPORTINFO_REC_FIELD(redirect_gid),
0521       .offset_words = 2,
0522       .offset_bits  = 0,
0523       .size_bits    = 128 },
0524     { CLASSPORTINFO_REC_FIELD(redirect_tcslfl),
0525       .offset_words = 6,
0526       .offset_bits  = 0,
0527       .size_bits    = 32 },
0528     { CLASSPORTINFO_REC_FIELD(redirect_lid),
0529       .offset_words = 7,
0530       .offset_bits  = 0,
0531       .size_bits    = 16 },
0532     { CLASSPORTINFO_REC_FIELD(redirect_pkey),
0533       .offset_words = 7,
0534       .offset_bits  = 16,
0535       .size_bits    = 16 },
0536 
0537     { CLASSPORTINFO_REC_FIELD(redirect_qp),
0538       .offset_words = 8,
0539       .offset_bits  = 0,
0540       .size_bits    = 32 },
0541     { CLASSPORTINFO_REC_FIELD(redirect_qkey),
0542       .offset_words = 9,
0543       .offset_bits  = 0,
0544       .size_bits    = 32 },
0545 
0546     { CLASSPORTINFO_REC_FIELD(trap_gid),
0547       .offset_words = 10,
0548       .offset_bits  = 0,
0549       .size_bits    = 128 },
0550     { CLASSPORTINFO_REC_FIELD(trap_tcslfl),
0551       .offset_words = 14,
0552       .offset_bits  = 0,
0553       .size_bits    = 32 },
0554 
0555     { CLASSPORTINFO_REC_FIELD(trap_lid),
0556       .offset_words = 15,
0557       .offset_bits  = 0,
0558       .size_bits    = 16 },
0559     { CLASSPORTINFO_REC_FIELD(trap_pkey),
0560       .offset_words = 15,
0561       .offset_bits  = 16,
0562       .size_bits    = 16 },
0563 
0564     { CLASSPORTINFO_REC_FIELD(trap_hlqp),
0565       .offset_words = 16,
0566       .offset_bits  = 0,
0567       .size_bits    = 32 },
0568     { CLASSPORTINFO_REC_FIELD(trap_qkey),
0569       .offset_words = 17,
0570       .offset_bits  = 0,
0571       .size_bits    = 32 },
0572 };
0573 
0574 #define OPA_CLASSPORTINFO_REC_FIELD(field) \
0575     .struct_offset_bytes =\
0576         offsetof(struct opa_class_port_info, field),    \
0577     .struct_size_bytes   = \
0578         sizeof_field(struct opa_class_port_info, field),    \
0579     .field_name          = "opa_class_port_info:" #field
0580 
0581 static const struct ib_field opa_classport_info_rec_table[] = {
0582     { OPA_CLASSPORTINFO_REC_FIELD(base_version),
0583       .offset_words = 0,
0584       .offset_bits  = 0,
0585       .size_bits    = 8 },
0586     { OPA_CLASSPORTINFO_REC_FIELD(class_version),
0587       .offset_words = 0,
0588       .offset_bits  = 8,
0589       .size_bits    = 8 },
0590     { OPA_CLASSPORTINFO_REC_FIELD(cap_mask),
0591       .offset_words = 0,
0592       .offset_bits  = 16,
0593       .size_bits    = 16 },
0594     { OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
0595       .offset_words = 1,
0596       .offset_bits  = 0,
0597       .size_bits    = 32 },
0598     { OPA_CLASSPORTINFO_REC_FIELD(redirect_gid),
0599       .offset_words = 2,
0600       .offset_bits  = 0,
0601       .size_bits    = 128 },
0602     { OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl),
0603       .offset_words = 6,
0604       .offset_bits  = 0,
0605       .size_bits    = 32 },
0606     { OPA_CLASSPORTINFO_REC_FIELD(redirect_lid),
0607       .offset_words = 7,
0608       .offset_bits  = 0,
0609       .size_bits    = 32 },
0610     { OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp),
0611       .offset_words = 8,
0612       .offset_bits  = 0,
0613       .size_bits    = 32 },
0614     { OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey),
0615       .offset_words = 9,
0616       .offset_bits  = 0,
0617       .size_bits    = 32 },
0618     { OPA_CLASSPORTINFO_REC_FIELD(trap_gid),
0619       .offset_words = 10,
0620       .offset_bits  = 0,
0621       .size_bits    = 128 },
0622     { OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl),
0623       .offset_words = 14,
0624       .offset_bits  = 0,
0625       .size_bits    = 32 },
0626     { OPA_CLASSPORTINFO_REC_FIELD(trap_lid),
0627       .offset_words = 15,
0628       .offset_bits  = 0,
0629       .size_bits    = 32 },
0630     { OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp),
0631       .offset_words = 16,
0632       .offset_bits  = 0,
0633       .size_bits    = 32 },
0634     { OPA_CLASSPORTINFO_REC_FIELD(trap_qkey),
0635       .offset_words = 17,
0636       .offset_bits  = 0,
0637       .size_bits    = 32 },
0638     { OPA_CLASSPORTINFO_REC_FIELD(trap_pkey),
0639       .offset_words = 18,
0640       .offset_bits  = 0,
0641       .size_bits    = 16 },
0642     { OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey),
0643       .offset_words = 18,
0644       .offset_bits  = 16,
0645       .size_bits    = 16 },
0646     { OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd),
0647       .offset_words = 19,
0648       .offset_bits  = 0,
0649       .size_bits    = 8 },
0650     { RESERVED,
0651       .offset_words = 19,
0652       .offset_bits  = 8,
0653       .size_bits    = 24 },
0654 };
0655 
0656 #define GUIDINFO_REC_FIELD(field) \
0657     .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field),  \
0658     .struct_size_bytes   = sizeof_field(struct ib_sa_guidinfo_rec, field),  \
0659     .field_name          = "sa_guidinfo_rec:" #field
0660 
0661 static const struct ib_field guidinfo_rec_table[] = {
0662     { GUIDINFO_REC_FIELD(lid),
0663       .offset_words = 0,
0664       .offset_bits  = 0,
0665       .size_bits    = 16 },
0666     { GUIDINFO_REC_FIELD(block_num),
0667       .offset_words = 0,
0668       .offset_bits  = 16,
0669       .size_bits    = 8 },
0670     { GUIDINFO_REC_FIELD(res1),
0671       .offset_words = 0,
0672       .offset_bits  = 24,
0673       .size_bits    = 8 },
0674     { GUIDINFO_REC_FIELD(res2),
0675       .offset_words = 1,
0676       .offset_bits  = 0,
0677       .size_bits    = 32 },
0678     { GUIDINFO_REC_FIELD(guid_info_list),
0679       .offset_words = 2,
0680       .offset_bits  = 0,
0681       .size_bits    = 512 },
0682 };
0683 
0684 static inline void ib_sa_disable_local_svc(struct ib_sa_query *query)
0685 {
0686     query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE;
0687 }
0688 
0689 static inline int ib_sa_query_cancelled(struct ib_sa_query *query)
0690 {
0691     return (query->flags & IB_SA_CANCEL);
0692 }
0693 
0694 static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
0695                      struct ib_sa_query *query)
0696 {
0697     struct sa_path_rec *sa_rec = query->mad_buf->context[1];
0698     struct ib_sa_mad *mad = query->mad_buf->mad;
0699     ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask;
0700     u16 val16;
0701     u64 val64;
0702     struct rdma_ls_resolve_header *header;
0703 
0704     query->mad_buf->context[1] = NULL;
0705 
0706     /* Construct the family header first */
0707     header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
0708     strscpy_pad(header->device_name,
0709             dev_name(&query->port->agent->device->dev),
0710             LS_DEVICE_NAME_MAX);
0711     header->port_num = query->port->port_num;
0712 
0713     if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) &&
0714         sa_rec->reversible != 0)
0715         query->path_use = LS_RESOLVE_PATH_USE_GMP;
0716     else
0717         query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL;
0718     header->path_use = query->path_use;
0719 
0720     /* Now build the attributes */
0721     if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) {
0722         val64 = be64_to_cpu(sa_rec->service_id);
0723         nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID,
0724             sizeof(val64), &val64);
0725     }
0726     if (comp_mask & IB_SA_PATH_REC_DGID)
0727         nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_DGID,
0728             sizeof(sa_rec->dgid), &sa_rec->dgid);
0729     if (comp_mask & IB_SA_PATH_REC_SGID)
0730         nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SGID,
0731             sizeof(sa_rec->sgid), &sa_rec->sgid);
0732     if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
0733         nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_TCLASS,
0734             sizeof(sa_rec->traffic_class), &sa_rec->traffic_class);
0735 
0736     if (comp_mask & IB_SA_PATH_REC_PKEY) {
0737         val16 = be16_to_cpu(sa_rec->pkey);
0738         nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_PKEY,
0739             sizeof(val16), &val16);
0740     }
0741     if (comp_mask & IB_SA_PATH_REC_QOS_CLASS) {
0742         val16 = be16_to_cpu(sa_rec->qos_class);
0743         nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_QOS_CLASS,
0744             sizeof(val16), &val16);
0745     }
0746 }
0747 
0748 static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
0749 {
0750     int len = 0;
0751 
0752     if (comp_mask & IB_SA_PATH_REC_SERVICE_ID)
0753         len += nla_total_size(sizeof(u64));
0754     if (comp_mask & IB_SA_PATH_REC_DGID)
0755         len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
0756     if (comp_mask & IB_SA_PATH_REC_SGID)
0757         len += nla_total_size(sizeof(struct rdma_nla_ls_gid));
0758     if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS)
0759         len += nla_total_size(sizeof(u8));
0760     if (comp_mask & IB_SA_PATH_REC_PKEY)
0761         len += nla_total_size(sizeof(u16));
0762     if (comp_mask & IB_SA_PATH_REC_QOS_CLASS)
0763         len += nla_total_size(sizeof(u16));
0764 
0765     /*
0766      * Make sure that at least some of the required comp_mask bits are
0767      * set.
0768      */
0769     if (WARN_ON(len == 0))
0770         return len;
0771 
0772     /* Add the family header */
0773     len += NLMSG_ALIGN(sizeof(struct rdma_ls_resolve_header));
0774 
0775     return len;
0776 }
0777 
0778 static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
0779 {
0780     struct sk_buff *skb = NULL;
0781     struct nlmsghdr *nlh;
0782     void *data;
0783     struct ib_sa_mad *mad;
0784     int len;
0785     unsigned long flags;
0786     unsigned long delay;
0787     gfp_t gfp_flag;
0788     int ret;
0789 
0790     INIT_LIST_HEAD(&query->list);
0791     query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
0792 
0793     mad = query->mad_buf->mad;
0794     len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask);
0795     if (len <= 0)
0796         return -EMSGSIZE;
0797 
0798     skb = nlmsg_new(len, gfp_mask);
0799     if (!skb)
0800         return -ENOMEM;
0801 
0802     /* Put nlmsg header only for now */
0803     data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS,
0804                 RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST);
0805     if (!data) {
0806         nlmsg_free(skb);
0807         return -EMSGSIZE;
0808     }
0809 
0810     /* Add attributes */
0811     ib_nl_set_path_rec_attrs(skb, query);
0812 
0813     /* Repair the nlmsg header length */
0814     nlmsg_end(skb, nlh);
0815 
0816     gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC :
0817         GFP_NOWAIT;
0818 
0819     spin_lock_irqsave(&ib_nl_request_lock, flags);
0820     ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_flag);
0821 
0822     if (ret)
0823         goto out;
0824 
0825     /* Put the request on the list.*/
0826     delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
0827     query->timeout = delay + jiffies;
0828     list_add_tail(&query->list, &ib_nl_request_list);
0829     /* Start the timeout if this is the only request */
0830     if (ib_nl_request_list.next == &query->list)
0831         queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
0832 
0833 out:
0834     spin_unlock_irqrestore(&ib_nl_request_lock, flags);
0835 
0836     return ret;
0837 }
0838 
0839 static int ib_nl_cancel_request(struct ib_sa_query *query)
0840 {
0841     unsigned long flags;
0842     struct ib_sa_query *wait_query;
0843     int found = 0;
0844 
0845     spin_lock_irqsave(&ib_nl_request_lock, flags);
0846     list_for_each_entry(wait_query, &ib_nl_request_list, list) {
0847         /* Let the timeout to take care of the callback */
0848         if (query == wait_query) {
0849             query->flags |= IB_SA_CANCEL;
0850             query->timeout = jiffies;
0851             list_move(&query->list, &ib_nl_request_list);
0852             found = 1;
0853             mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, 1);
0854             break;
0855         }
0856     }
0857     spin_unlock_irqrestore(&ib_nl_request_lock, flags);
0858 
0859     return found;
0860 }
0861 
0862 static void send_handler(struct ib_mad_agent *agent,
0863              struct ib_mad_send_wc *mad_send_wc);
0864 
0865 static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query,
0866                        const struct nlmsghdr *nlh)
0867 {
0868     struct ib_mad_send_wc mad_send_wc;
0869     struct ib_sa_mad *mad = NULL;
0870     const struct nlattr *head, *curr;
0871     struct ib_path_rec_data  *rec;
0872     int len, rem;
0873     u32 mask = 0;
0874     int status = -EIO;
0875 
0876     if (query->callback) {
0877         head = (const struct nlattr *) nlmsg_data(nlh);
0878         len = nlmsg_len(nlh);
0879         switch (query->path_use) {
0880         case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
0881             mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
0882             break;
0883 
0884         case LS_RESOLVE_PATH_USE_ALL:
0885         case LS_RESOLVE_PATH_USE_GMP:
0886         default:
0887             mask = IB_PATH_PRIMARY | IB_PATH_GMP |
0888                 IB_PATH_BIDIRECTIONAL;
0889             break;
0890         }
0891         nla_for_each_attr(curr, head, len, rem) {
0892             if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) {
0893                 rec = nla_data(curr);
0894                 /*
0895                  * Get the first one. In the future, we may
0896                  * need to get up to 6 pathrecords.
0897                  */
0898                 if ((rec->flags & mask) == mask) {
0899                     mad = query->mad_buf->mad;
0900                     mad->mad_hdr.method |=
0901                         IB_MGMT_METHOD_RESP;
0902                     memcpy(mad->data, rec->path_rec,
0903                            sizeof(rec->path_rec));
0904                     status = 0;
0905                     break;
0906                 }
0907             }
0908         }
0909         query->callback(query, status, mad);
0910     }
0911 
0912     mad_send_wc.send_buf = query->mad_buf;
0913     mad_send_wc.status = IB_WC_SUCCESS;
0914     send_handler(query->mad_buf->mad_agent, &mad_send_wc);
0915 }
0916 
0917 static void ib_nl_request_timeout(struct work_struct *work)
0918 {
0919     unsigned long flags;
0920     struct ib_sa_query *query;
0921     unsigned long delay;
0922     struct ib_mad_send_wc mad_send_wc;
0923     int ret;
0924 
0925     spin_lock_irqsave(&ib_nl_request_lock, flags);
0926     while (!list_empty(&ib_nl_request_list)) {
0927         query = list_entry(ib_nl_request_list.next,
0928                    struct ib_sa_query, list);
0929 
0930         if (time_after(query->timeout, jiffies)) {
0931             delay = query->timeout - jiffies;
0932             if ((long)delay <= 0)
0933                 delay = 1;
0934             queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
0935             break;
0936         }
0937 
0938         list_del(&query->list);
0939         ib_sa_disable_local_svc(query);
0940         /* Hold the lock to protect against query cancellation */
0941         if (ib_sa_query_cancelled(query))
0942             ret = -1;
0943         else
0944             ret = ib_post_send_mad(query->mad_buf, NULL);
0945         if (ret) {
0946             mad_send_wc.send_buf = query->mad_buf;
0947             mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
0948             spin_unlock_irqrestore(&ib_nl_request_lock, flags);
0949             send_handler(query->port->agent, &mad_send_wc);
0950             spin_lock_irqsave(&ib_nl_request_lock, flags);
0951         }
0952     }
0953     spin_unlock_irqrestore(&ib_nl_request_lock, flags);
0954 }
0955 
0956 int ib_nl_handle_set_timeout(struct sk_buff *skb,
0957                  struct nlmsghdr *nlh,
0958                  struct netlink_ext_ack *extack)
0959 {
0960     int timeout, delta, abs_delta;
0961     const struct nlattr *attr;
0962     unsigned long flags;
0963     struct ib_sa_query *query;
0964     long delay = 0;
0965     struct nlattr *tb[LS_NLA_TYPE_MAX];
0966     int ret;
0967 
0968     if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
0969         !(NETLINK_CB(skb).sk))
0970         return -EPERM;
0971 
0972     ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
0973                    nlmsg_len(nlh), ib_nl_policy, NULL);
0974     attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT];
0975     if (ret || !attr)
0976         goto settimeout_out;
0977 
0978     timeout = *(int *) nla_data(attr);
0979     if (timeout < IB_SA_LOCAL_SVC_TIMEOUT_MIN)
0980         timeout = IB_SA_LOCAL_SVC_TIMEOUT_MIN;
0981     if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX)
0982         timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX;
0983 
0984     delta = timeout - sa_local_svc_timeout_ms;
0985     if (delta < 0)
0986         abs_delta = -delta;
0987     else
0988         abs_delta = delta;
0989 
0990     if (delta != 0) {
0991         spin_lock_irqsave(&ib_nl_request_lock, flags);
0992         sa_local_svc_timeout_ms = timeout;
0993         list_for_each_entry(query, &ib_nl_request_list, list) {
0994             if (delta < 0 && abs_delta > query->timeout)
0995                 query->timeout = 0;
0996             else
0997                 query->timeout += delta;
0998 
0999             /* Get the new delay from the first entry */
1000             if (!delay) {
1001                 delay = query->timeout - jiffies;
1002                 if (delay <= 0)
1003                     delay = 1;
1004             }
1005         }
1006         if (delay)
1007             mod_delayed_work(ib_nl_wq, &ib_nl_timed_work,
1008                      (unsigned long)delay);
1009         spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1010     }
1011 
1012 settimeout_out:
1013     return 0;
1014 }
1015 
1016 static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
1017 {
1018     struct nlattr *tb[LS_NLA_TYPE_MAX];
1019     int ret;
1020 
1021     if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
1022         return 0;
1023 
1024     ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
1025                    nlmsg_len(nlh), ib_nl_policy, NULL);
1026     if (ret)
1027         return 0;
1028 
1029     return 1;
1030 }
1031 
1032 int ib_nl_handle_resolve_resp(struct sk_buff *skb,
1033                   struct nlmsghdr *nlh,
1034                   struct netlink_ext_ack *extack)
1035 {
1036     unsigned long flags;
1037     struct ib_sa_query *query = NULL, *iter;
1038     struct ib_mad_send_buf *send_buf;
1039     struct ib_mad_send_wc mad_send_wc;
1040     int ret;
1041 
1042     if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
1043         !(NETLINK_CB(skb).sk))
1044         return -EPERM;
1045 
1046     spin_lock_irqsave(&ib_nl_request_lock, flags);
1047     list_for_each_entry(iter, &ib_nl_request_list, list) {
1048         /*
1049          * If the query is cancelled, let the timeout routine
1050          * take care of it.
1051          */
1052         if (nlh->nlmsg_seq == iter->seq) {
1053             if (!ib_sa_query_cancelled(iter)) {
1054                 list_del(&iter->list);
1055                 query = iter;
1056             }
1057             break;
1058         }
1059     }
1060 
1061     if (!query) {
1062         spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1063         goto resp_out;
1064     }
1065 
1066     send_buf = query->mad_buf;
1067 
1068     if (!ib_nl_is_good_resolve_resp(nlh)) {
1069         /* if the result is a failure, send out the packet via IB */
1070         ib_sa_disable_local_svc(query);
1071         ret = ib_post_send_mad(query->mad_buf, NULL);
1072         spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1073         if (ret) {
1074             mad_send_wc.send_buf = send_buf;
1075             mad_send_wc.status = IB_WC_GENERAL_ERR;
1076             send_handler(query->port->agent, &mad_send_wc);
1077         }
1078     } else {
1079         spin_unlock_irqrestore(&ib_nl_request_lock, flags);
1080         ib_nl_process_good_resolve_rsp(query, nlh);
1081     }
1082 
1083 resp_out:
1084     return 0;
1085 }
1086 
1087 static void free_sm_ah(struct kref *kref)
1088 {
1089     struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
1090 
1091     rdma_destroy_ah(sm_ah->ah, 0);
1092     kfree(sm_ah);
1093 }
1094 
1095 void ib_sa_register_client(struct ib_sa_client *client)
1096 {
1097     atomic_set(&client->users, 1);
1098     init_completion(&client->comp);
1099 }
1100 EXPORT_SYMBOL(ib_sa_register_client);
1101 
1102 void ib_sa_unregister_client(struct ib_sa_client *client)
1103 {
1104     ib_sa_client_put(client);
1105     wait_for_completion(&client->comp);
1106 }
1107 EXPORT_SYMBOL(ib_sa_unregister_client);
1108 
1109 /**
1110  * ib_sa_cancel_query - try to cancel an SA query
1111  * @id:ID of query to cancel
1112  * @query:query pointer to cancel
1113  *
1114  * Try to cancel an SA query.  If the id and query don't match up or
1115  * the query has already completed, nothing is done.  Otherwise the
1116  * query is canceled and will complete with a status of -EINTR.
1117  */
1118 void ib_sa_cancel_query(int id, struct ib_sa_query *query)
1119 {
1120     unsigned long flags;
1121     struct ib_mad_send_buf *mad_buf;
1122 
1123     xa_lock_irqsave(&queries, flags);
1124     if (xa_load(&queries, id) != query) {
1125         xa_unlock_irqrestore(&queries, flags);
1126         return;
1127     }
1128     mad_buf = query->mad_buf;
1129     xa_unlock_irqrestore(&queries, flags);
1130 
1131     /*
1132      * If the query is still on the netlink request list, schedule
1133      * it to be cancelled by the timeout routine. Otherwise, it has been
1134      * sent to the MAD layer and has to be cancelled from there.
1135      */
1136     if (!ib_nl_cancel_request(query))
1137         ib_cancel_mad(mad_buf);
1138 }
1139 EXPORT_SYMBOL(ib_sa_cancel_query);
1140 
1141 static u8 get_src_path_mask(struct ib_device *device, u32 port_num)
1142 {
1143     struct ib_sa_device *sa_dev;
1144     struct ib_sa_port   *port;
1145     unsigned long flags;
1146     u8 src_path_mask;
1147 
1148     sa_dev = ib_get_client_data(device, &sa_client);
1149     if (!sa_dev)
1150         return 0x7f;
1151 
1152     port  = &sa_dev->port[port_num - sa_dev->start_port];
1153     spin_lock_irqsave(&port->ah_lock, flags);
1154     src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f;
1155     spin_unlock_irqrestore(&port->ah_lock, flags);
1156 
1157     return src_path_mask;
1158 }
1159 
1160 static int init_ah_attr_grh_fields(struct ib_device *device, u32 port_num,
1161                    struct sa_path_rec *rec,
1162                    struct rdma_ah_attr *ah_attr,
1163                    const struct ib_gid_attr *gid_attr)
1164 {
1165     enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
1166 
1167     if (!gid_attr) {
1168         gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type,
1169                          port_num, NULL);
1170         if (IS_ERR(gid_attr))
1171             return PTR_ERR(gid_attr);
1172     } else
1173         rdma_hold_gid_attr(gid_attr);
1174 
1175     rdma_move_grh_sgid_attr(ah_attr, &rec->dgid,
1176                 be32_to_cpu(rec->flow_label),
1177                 rec->hop_limit, rec->traffic_class,
1178                 gid_attr);
1179     return 0;
1180 }
1181 
1182 /**
1183  * ib_init_ah_attr_from_path - Initialize address handle attributes based on
1184  *   an SA path record.
1185  * @device: Device associated ah attributes initialization.
1186  * @port_num: Port on the specified device.
1187  * @rec: path record entry to use for ah attributes initialization.
1188  * @ah_attr: address handle attributes to initialization from path record.
1189  * @gid_attr: SGID attribute to consider during initialization.
1190  *
1191  * When ib_init_ah_attr_from_path() returns success,
1192  * (a) for IB link layer it optionally contains a reference to SGID attribute
1193  * when GRH is present for IB link layer.
1194  * (b) for RoCE link layer it contains a reference to SGID attribute.
1195  * User must invoke rdma_destroy_ah_attr() to release reference to SGID
1196  * attributes which are initialized using ib_init_ah_attr_from_path().
1197  */
1198 int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num,
1199                   struct sa_path_rec *rec,
1200                   struct rdma_ah_attr *ah_attr,
1201                   const struct ib_gid_attr *gid_attr)
1202 {
1203     int ret = 0;
1204 
1205     memset(ah_attr, 0, sizeof(*ah_attr));
1206     ah_attr->type = rdma_ah_find_type(device, port_num);
1207     rdma_ah_set_sl(ah_attr, rec->sl);
1208     rdma_ah_set_port_num(ah_attr, port_num);
1209     rdma_ah_set_static_rate(ah_attr, rec->rate);
1210 
1211     if (sa_path_is_roce(rec)) {
1212         ret = roce_resolve_route_from_path(rec, gid_attr);
1213         if (ret)
1214             return ret;
1215 
1216         memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN);
1217     } else {
1218         rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
1219         if (sa_path_is_opa(rec) &&
1220             rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))
1221             rdma_ah_set_make_grd(ah_attr, true);
1222 
1223         rdma_ah_set_path_bits(ah_attr,
1224                       be32_to_cpu(sa_path_get_slid(rec)) &
1225                       get_src_path_mask(device, port_num));
1226     }
1227 
1228     if (rec->hop_limit > 0 || sa_path_is_roce(rec))
1229         ret = init_ah_attr_grh_fields(device, port_num,
1230                           rec, ah_attr, gid_attr);
1231     return ret;
1232 }
1233 EXPORT_SYMBOL(ib_init_ah_attr_from_path);
1234 
1235 static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
1236 {
1237     struct rdma_ah_attr ah_attr;
1238     unsigned long flags;
1239 
1240     spin_lock_irqsave(&query->port->ah_lock, flags);
1241     if (!query->port->sm_ah) {
1242         spin_unlock_irqrestore(&query->port->ah_lock, flags);
1243         return -EAGAIN;
1244     }
1245     kref_get(&query->port->sm_ah->ref);
1246     query->sm_ah = query->port->sm_ah;
1247     spin_unlock_irqrestore(&query->port->ah_lock, flags);
1248 
1249     /*
1250      * Always check if sm_ah has valid dlid assigned,
1251      * before querying for class port info
1252      */
1253     if ((rdma_query_ah(query->sm_ah->ah, &ah_attr) < 0) ||
1254         !rdma_is_valid_unicast_lid(&ah_attr)) {
1255         kref_put(&query->sm_ah->ref, free_sm_ah);
1256         return -EAGAIN;
1257     }
1258     query->mad_buf = ib_create_send_mad(query->port->agent, 1,
1259                         query->sm_ah->pkey_index,
1260                         0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
1261                         gfp_mask,
1262                         ((query->flags & IB_SA_QUERY_OPA) ?
1263                          OPA_MGMT_BASE_VERSION :
1264                          IB_MGMT_BASE_VERSION));
1265     if (IS_ERR(query->mad_buf)) {
1266         kref_put(&query->sm_ah->ref, free_sm_ah);
1267         return -ENOMEM;
1268     }
1269 
1270     query->mad_buf->ah = query->sm_ah->ah;
1271 
1272     return 0;
1273 }
1274 
1275 static void free_mad(struct ib_sa_query *query)
1276 {
1277     ib_free_send_mad(query->mad_buf);
1278     kref_put(&query->sm_ah->ref, free_sm_ah);
1279 }
1280 
1281 static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
1282 {
1283     struct ib_sa_mad *mad = query->mad_buf->mad;
1284     unsigned long flags;
1285 
1286     memset(mad, 0, sizeof *mad);
1287 
1288     if (query->flags & IB_SA_QUERY_OPA) {
1289         mad->mad_hdr.base_version  = OPA_MGMT_BASE_VERSION;
1290         mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION;
1291     } else {
1292         mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
1293         mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
1294     }
1295     mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_SUBN_ADM;
1296     spin_lock_irqsave(&tid_lock, flags);
1297     mad->mad_hdr.tid           =
1298         cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
1299     spin_unlock_irqrestore(&tid_lock, flags);
1300 }
1301 
1302 static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
1303             gfp_t gfp_mask)
1304 {
1305     unsigned long flags;
1306     int ret, id;
1307     const int nmbr_sa_query_retries = 10;
1308 
1309     xa_lock_irqsave(&queries, flags);
1310     ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask);
1311     xa_unlock_irqrestore(&queries, flags);
1312     if (ret < 0)
1313         return ret;
1314 
1315     query->mad_buf->timeout_ms  = timeout_ms / nmbr_sa_query_retries;
1316     query->mad_buf->retries = nmbr_sa_query_retries;
1317     if (!query->mad_buf->timeout_ms) {
1318         /* Special case, very small timeout_ms */
1319         query->mad_buf->timeout_ms = 1;
1320         query->mad_buf->retries = timeout_ms;
1321     }
1322     query->mad_buf->context[0] = query;
1323     query->id = id;
1324 
1325     if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
1326         (!(query->flags & IB_SA_QUERY_OPA))) {
1327         if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
1328             if (!ib_nl_make_request(query, gfp_mask))
1329                 return id;
1330         }
1331         ib_sa_disable_local_svc(query);
1332     }
1333 
1334     ret = ib_post_send_mad(query->mad_buf, NULL);
1335     if (ret) {
1336         xa_lock_irqsave(&queries, flags);
1337         __xa_erase(&queries, id);
1338         xa_unlock_irqrestore(&queries, flags);
1339     }
1340 
1341     /*
1342      * It's not safe to dereference query any more, because the
1343      * send may already have completed and freed the query in
1344      * another context.
1345      */
1346     return ret ? ret : id;
1347 }
1348 
1349 void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec)
1350 {
1351     ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
1352 }
1353 EXPORT_SYMBOL(ib_sa_unpack_path);
1354 
1355 void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
1356 {
1357     ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
1358 }
1359 EXPORT_SYMBOL(ib_sa_pack_path);
1360 
1361 static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
1362                      struct ib_sa_device *sa_dev,
1363                      u32 port_num)
1364 {
1365     struct ib_sa_port *port;
1366     unsigned long flags;
1367     bool ret = false;
1368 
1369     port = &sa_dev->port[port_num - sa_dev->start_port];
1370     spin_lock_irqsave(&port->classport_lock, flags);
1371     if (!port->classport_info.valid)
1372         goto ret;
1373 
1374     if (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_OPA)
1375         ret = opa_get_cpi_capmask2(&port->classport_info.data.opa) &
1376             OPA_CLASS_PORT_INFO_PR_SUPPORT;
1377 ret:
1378     spin_unlock_irqrestore(&port->classport_lock, flags);
1379     return ret;
1380 }
1381 
1382 enum opa_pr_supported {
1383     PR_NOT_SUPPORTED,
1384     PR_OPA_SUPPORTED,
1385     PR_IB_SUPPORTED
1386 };
1387 
1388 /*
1389  * opa_pr_query_possible - Check if current PR query can be an OPA query.
1390  *
1391  * Retuns PR_NOT_SUPPORTED if a path record query is not
1392  * possible, PR_OPA_SUPPORTED if an OPA path record query
1393  * is possible and PR_IB_SUPPORTED if an IB path record
1394  * query is possible.
1395  */
1396 static int opa_pr_query_possible(struct ib_sa_client *client,
1397                  struct ib_sa_device *sa_dev,
1398                  struct ib_device *device, u32 port_num)
1399 {
1400     struct ib_port_attr port_attr;
1401 
1402     if (ib_query_port(device, port_num, &port_attr))
1403         return PR_NOT_SUPPORTED;
1404 
1405     if (ib_sa_opa_pathrecord_support(client, sa_dev, port_num))
1406         return PR_OPA_SUPPORTED;
1407 
1408     if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
1409         return PR_NOT_SUPPORTED;
1410     else
1411         return PR_IB_SUPPORTED;
1412 }
1413 
1414 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
1415                     int status,
1416                     struct ib_sa_mad *mad)
1417 {
1418     struct ib_sa_path_query *query =
1419         container_of(sa_query, struct ib_sa_path_query, sa_query);
1420 
1421     if (mad) {
1422         struct sa_path_rec rec;
1423 
1424         if (sa_query->flags & IB_SA_QUERY_OPA) {
1425             ib_unpack(opa_path_rec_table,
1426                   ARRAY_SIZE(opa_path_rec_table),
1427                   mad->data, &rec);
1428             rec.rec_type = SA_PATH_REC_TYPE_OPA;
1429             query->callback(status, &rec, query->context);
1430         } else {
1431             ib_unpack(path_rec_table,
1432                   ARRAY_SIZE(path_rec_table),
1433                   mad->data, &rec);
1434             rec.rec_type = SA_PATH_REC_TYPE_IB;
1435             sa_path_set_dmac_zero(&rec);
1436 
1437             if (query->conv_pr) {
1438                 struct sa_path_rec opa;
1439 
1440                 memset(&opa, 0, sizeof(struct sa_path_rec));
1441                 sa_convert_path_ib_to_opa(&opa, &rec);
1442                 query->callback(status, &opa, query->context);
1443             } else {
1444                 query->callback(status, &rec, query->context);
1445             }
1446         }
1447     } else
1448         query->callback(status, NULL, query->context);
1449 }
1450 
1451 static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
1452 {
1453     struct ib_sa_path_query *query =
1454         container_of(sa_query, struct ib_sa_path_query, sa_query);
1455 
1456     kfree(query->conv_pr);
1457     kfree(query);
1458 }
1459 
1460 /**
1461  * ib_sa_path_rec_get - Start a Path get query
1462  * @client:SA client
1463  * @device:device to send query on
1464  * @port_num: port number to send query on
1465  * @rec:Path Record to send in query
1466  * @comp_mask:component mask to send in query
1467  * @timeout_ms:time to wait for response
1468  * @gfp_mask:GFP mask to use for internal allocations
1469  * @callback:function called when query completes, times out or is
1470  * canceled
1471  * @context:opaque user context passed to callback
1472  * @sa_query:query context, used to cancel query
1473  *
1474  * Send a Path Record Get query to the SA to look up a path.  The
1475  * callback function will be called when the query completes (or
1476  * fails); status is 0 for a successful response, -EINTR if the query
1477  * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
1478  * occurred sending the query.  The resp parameter of the callback is
1479  * only valid if status is 0.
1480  *
1481  * If the return value of ib_sa_path_rec_get() is negative, it is an
1482  * error code.  Otherwise it is a query ID that can be used to cancel
1483  * the query.
1484  */
1485 int ib_sa_path_rec_get(struct ib_sa_client *client,
1486                struct ib_device *device, u32 port_num,
1487                struct sa_path_rec *rec,
1488                ib_sa_comp_mask comp_mask,
1489                unsigned long timeout_ms, gfp_t gfp_mask,
1490                void (*callback)(int status,
1491                     struct sa_path_rec *resp,
1492                     void *context),
1493                void *context,
1494                struct ib_sa_query **sa_query)
1495 {
1496     struct ib_sa_path_query *query;
1497     struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1498     struct ib_sa_port   *port;
1499     struct ib_mad_agent *agent;
1500     struct ib_sa_mad *mad;
1501     enum opa_pr_supported status;
1502     int ret;
1503 
1504     if (!sa_dev)
1505         return -ENODEV;
1506 
1507     if ((rec->rec_type != SA_PATH_REC_TYPE_IB) &&
1508         (rec->rec_type != SA_PATH_REC_TYPE_OPA))
1509         return -EINVAL;
1510 
1511     port  = &sa_dev->port[port_num - sa_dev->start_port];
1512     agent = port->agent;
1513 
1514     query = kzalloc(sizeof(*query), gfp_mask);
1515     if (!query)
1516         return -ENOMEM;
1517 
1518     query->sa_query.port     = port;
1519     if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
1520         status = opa_pr_query_possible(client, sa_dev, device, port_num);
1521         if (status == PR_NOT_SUPPORTED) {
1522             ret = -EINVAL;
1523             goto err1;
1524         } else if (status == PR_OPA_SUPPORTED) {
1525             query->sa_query.flags |= IB_SA_QUERY_OPA;
1526         } else {
1527             query->conv_pr =
1528                 kmalloc(sizeof(*query->conv_pr), gfp_mask);
1529             if (!query->conv_pr) {
1530                 ret = -ENOMEM;
1531                 goto err1;
1532             }
1533         }
1534     }
1535 
1536     ret = alloc_mad(&query->sa_query, gfp_mask);
1537     if (ret)
1538         goto err2;
1539 
1540     ib_sa_client_get(client);
1541     query->sa_query.client = client;
1542     query->callback        = callback;
1543     query->context         = context;
1544 
1545     mad = query->sa_query.mad_buf->mad;
1546     init_mad(&query->sa_query, agent);
1547 
1548     query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
1549     query->sa_query.release  = ib_sa_path_rec_release;
1550     mad->mad_hdr.method  = IB_MGMT_METHOD_GET;
1551     mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_PATH_REC);
1552     mad->sa_hdr.comp_mask    = comp_mask;
1553 
1554     if (query->sa_query.flags & IB_SA_QUERY_OPA) {
1555         ib_pack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
1556             rec, mad->data);
1557     } else if (query->conv_pr) {
1558         sa_convert_path_opa_to_ib(query->conv_pr, rec);
1559         ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
1560             query->conv_pr, mad->data);
1561     } else {
1562         ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
1563             rec, mad->data);
1564     }
1565 
1566     *sa_query = &query->sa_query;
1567 
1568     query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE;
1569     query->sa_query.mad_buf->context[1] = (query->conv_pr) ?
1570                         query->conv_pr : rec;
1571 
1572     ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1573     if (ret < 0)
1574         goto err3;
1575 
1576     return ret;
1577 
1578 err3:
1579     *sa_query = NULL;
1580     ib_sa_client_put(query->sa_query.client);
1581     free_mad(&query->sa_query);
1582 err2:
1583     kfree(query->conv_pr);
1584 err1:
1585     kfree(query);
1586     return ret;
1587 }
1588 EXPORT_SYMBOL(ib_sa_path_rec_get);
1589 
1590 static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
1591                     int status,
1592                     struct ib_sa_mad *mad)
1593 {
1594     struct ib_sa_mcmember_query *query =
1595         container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
1596 
1597     if (mad) {
1598         struct ib_sa_mcmember_rec rec;
1599 
1600         ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
1601               mad->data, &rec);
1602         query->callback(status, &rec, query->context);
1603     } else
1604         query->callback(status, NULL, query->context);
1605 }
1606 
1607 static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
1608 {
1609     kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
1610 }
1611 
1612 int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
1613                  struct ib_device *device, u32 port_num,
1614                  u8 method,
1615                  struct ib_sa_mcmember_rec *rec,
1616                  ib_sa_comp_mask comp_mask,
1617                  unsigned long timeout_ms, gfp_t gfp_mask,
1618                  void (*callback)(int status,
1619                           struct ib_sa_mcmember_rec *resp,
1620                           void *context),
1621                  void *context,
1622                  struct ib_sa_query **sa_query)
1623 {
1624     struct ib_sa_mcmember_query *query;
1625     struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1626     struct ib_sa_port   *port;
1627     struct ib_mad_agent *agent;
1628     struct ib_sa_mad *mad;
1629     int ret;
1630 
1631     if (!sa_dev)
1632         return -ENODEV;
1633 
1634     port  = &sa_dev->port[port_num - sa_dev->start_port];
1635     agent = port->agent;
1636 
1637     query = kzalloc(sizeof(*query), gfp_mask);
1638     if (!query)
1639         return -ENOMEM;
1640 
1641     query->sa_query.port     = port;
1642     ret = alloc_mad(&query->sa_query, gfp_mask);
1643     if (ret)
1644         goto err1;
1645 
1646     ib_sa_client_get(client);
1647     query->sa_query.client = client;
1648     query->callback        = callback;
1649     query->context         = context;
1650 
1651     mad = query->sa_query.mad_buf->mad;
1652     init_mad(&query->sa_query, agent);
1653 
1654     query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
1655     query->sa_query.release  = ib_sa_mcmember_rec_release;
1656     mad->mad_hdr.method  = method;
1657     mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
1658     mad->sa_hdr.comp_mask    = comp_mask;
1659 
1660     ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
1661         rec, mad->data);
1662 
1663     *sa_query = &query->sa_query;
1664 
1665     ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1666     if (ret < 0)
1667         goto err2;
1668 
1669     return ret;
1670 
1671 err2:
1672     *sa_query = NULL;
1673     ib_sa_client_put(query->sa_query.client);
1674     free_mad(&query->sa_query);
1675 
1676 err1:
1677     kfree(query);
1678     return ret;
1679 }
1680 
1681 /* Support GuidInfoRecord */
1682 static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
1683                     int status,
1684                     struct ib_sa_mad *mad)
1685 {
1686     struct ib_sa_guidinfo_query *query =
1687         container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
1688 
1689     if (mad) {
1690         struct ib_sa_guidinfo_rec rec;
1691 
1692         ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table),
1693               mad->data, &rec);
1694         query->callback(status, &rec, query->context);
1695     } else
1696         query->callback(status, NULL, query->context);
1697 }
1698 
1699 static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
1700 {
1701     kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query));
1702 }
1703 
1704 int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
1705                   struct ib_device *device, u32 port_num,
1706                   struct ib_sa_guidinfo_rec *rec,
1707                   ib_sa_comp_mask comp_mask, u8 method,
1708                   unsigned long timeout_ms, gfp_t gfp_mask,
1709                   void (*callback)(int status,
1710                            struct ib_sa_guidinfo_rec *resp,
1711                            void *context),
1712                   void *context,
1713                   struct ib_sa_query **sa_query)
1714 {
1715     struct ib_sa_guidinfo_query *query;
1716     struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1717     struct ib_sa_port *port;
1718     struct ib_mad_agent *agent;
1719     struct ib_sa_mad *mad;
1720     int ret;
1721 
1722     if (!sa_dev)
1723         return -ENODEV;
1724 
1725     if (method != IB_MGMT_METHOD_GET &&
1726         method != IB_MGMT_METHOD_SET &&
1727         method != IB_SA_METHOD_DELETE) {
1728         return -EINVAL;
1729     }
1730 
1731     port  = &sa_dev->port[port_num - sa_dev->start_port];
1732     agent = port->agent;
1733 
1734     query = kzalloc(sizeof(*query), gfp_mask);
1735     if (!query)
1736         return -ENOMEM;
1737 
1738     query->sa_query.port = port;
1739     ret = alloc_mad(&query->sa_query, gfp_mask);
1740     if (ret)
1741         goto err1;
1742 
1743     ib_sa_client_get(client);
1744     query->sa_query.client = client;
1745     query->callback        = callback;
1746     query->context         = context;
1747 
1748     mad = query->sa_query.mad_buf->mad;
1749     init_mad(&query->sa_query, agent);
1750 
1751     query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
1752     query->sa_query.release  = ib_sa_guidinfo_rec_release;
1753 
1754     mad->mad_hdr.method  = method;
1755     mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC);
1756     mad->sa_hdr.comp_mask    = comp_mask;
1757 
1758     ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec,
1759         mad->data);
1760 
1761     *sa_query = &query->sa_query;
1762 
1763     ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1764     if (ret < 0)
1765         goto err2;
1766 
1767     return ret;
1768 
1769 err2:
1770     *sa_query = NULL;
1771     ib_sa_client_put(query->sa_query.client);
1772     free_mad(&query->sa_query);
1773 
1774 err1:
1775     kfree(query);
1776     return ret;
1777 }
1778 EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
1779 
1780 struct ib_classport_info_context {
1781     struct completion   done;
1782     struct ib_sa_query  *sa_query;
1783 };
1784 
1785 static void ib_classportinfo_cb(void *context)
1786 {
1787     struct ib_classport_info_context *cb_ctx = context;
1788 
1789     complete(&cb_ctx->done);
1790 }
1791 
1792 static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
1793                           int status,
1794                           struct ib_sa_mad *mad)
1795 {
1796     unsigned long flags;
1797     struct ib_sa_classport_info_query *query =
1798         container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
1799     struct ib_sa_classport_cache *info = &sa_query->port->classport_info;
1800 
1801     if (mad) {
1802         if (sa_query->flags & IB_SA_QUERY_OPA) {
1803             struct opa_class_port_info rec;
1804 
1805             ib_unpack(opa_classport_info_rec_table,
1806                   ARRAY_SIZE(opa_classport_info_rec_table),
1807                   mad->data, &rec);
1808 
1809             spin_lock_irqsave(&sa_query->port->classport_lock,
1810                       flags);
1811             if (!status && !info->valid) {
1812                 memcpy(&info->data.opa, &rec,
1813                        sizeof(info->data.opa));
1814 
1815                 info->valid = true;
1816                 info->data.type = RDMA_CLASS_PORT_INFO_OPA;
1817             }
1818             spin_unlock_irqrestore(&sa_query->port->classport_lock,
1819                            flags);
1820 
1821         } else {
1822             struct ib_class_port_info rec;
1823 
1824             ib_unpack(ib_classport_info_rec_table,
1825                   ARRAY_SIZE(ib_classport_info_rec_table),
1826                   mad->data, &rec);
1827 
1828             spin_lock_irqsave(&sa_query->port->classport_lock,
1829                       flags);
1830             if (!status && !info->valid) {
1831                 memcpy(&info->data.ib, &rec,
1832                        sizeof(info->data.ib));
1833 
1834                 info->valid = true;
1835                 info->data.type = RDMA_CLASS_PORT_INFO_IB;
1836             }
1837             spin_unlock_irqrestore(&sa_query->port->classport_lock,
1838                            flags);
1839         }
1840     }
1841     query->callback(query->context);
1842 }
1843 
1844 static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
1845 {
1846     kfree(container_of(sa_query, struct ib_sa_classport_info_query,
1847                sa_query));
1848 }
1849 
1850 static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
1851                       unsigned long timeout_ms,
1852                       void (*callback)(void *context),
1853                       void *context,
1854                       struct ib_sa_query **sa_query)
1855 {
1856     struct ib_mad_agent *agent;
1857     struct ib_sa_classport_info_query *query;
1858     struct ib_sa_mad *mad;
1859     gfp_t gfp_mask = GFP_KERNEL;
1860     int ret;
1861 
1862     agent = port->agent;
1863 
1864     query = kzalloc(sizeof(*query), gfp_mask);
1865     if (!query)
1866         return -ENOMEM;
1867 
1868     query->sa_query.port = port;
1869     query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device,
1870                          port->port_num) ?
1871                  IB_SA_QUERY_OPA : 0;
1872     ret = alloc_mad(&query->sa_query, gfp_mask);
1873     if (ret)
1874         goto err_free;
1875 
1876     query->callback = callback;
1877     query->context = context;
1878 
1879     mad = query->sa_query.mad_buf->mad;
1880     init_mad(&query->sa_query, agent);
1881 
1882     query->sa_query.callback = ib_sa_classport_info_rec_callback;
1883     query->sa_query.release  = ib_sa_classport_info_rec_release;
1884     mad->mad_hdr.method  = IB_MGMT_METHOD_GET;
1885     mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
1886     mad->sa_hdr.comp_mask    = 0;
1887     *sa_query = &query->sa_query;
1888 
1889     ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1890     if (ret < 0)
1891         goto err_free_mad;
1892 
1893     return ret;
1894 
1895 err_free_mad:
1896     *sa_query = NULL;
1897     free_mad(&query->sa_query);
1898 
1899 err_free:
1900     kfree(query);
1901     return ret;
1902 }
1903 
1904 static void update_ib_cpi(struct work_struct *work)
1905 {
1906     struct ib_sa_port *port =
1907         container_of(work, struct ib_sa_port, ib_cpi_work.work);
1908     struct ib_classport_info_context *cb_context;
1909     unsigned long flags;
1910     int ret;
1911 
1912     /* If the classport info is valid, nothing
1913      * to do here.
1914      */
1915     spin_lock_irqsave(&port->classport_lock, flags);
1916     if (port->classport_info.valid) {
1917         spin_unlock_irqrestore(&port->classport_lock, flags);
1918         return;
1919     }
1920     spin_unlock_irqrestore(&port->classport_lock, flags);
1921 
1922     cb_context = kmalloc(sizeof(*cb_context), GFP_KERNEL);
1923     if (!cb_context)
1924         goto err_nomem;
1925 
1926     init_completion(&cb_context->done);
1927 
1928     ret = ib_sa_classport_info_rec_query(port, 3000,
1929                          ib_classportinfo_cb, cb_context,
1930                          &cb_context->sa_query);
1931     if (ret < 0)
1932         goto free_cb_err;
1933     wait_for_completion(&cb_context->done);
1934 free_cb_err:
1935     kfree(cb_context);
1936     spin_lock_irqsave(&port->classport_lock, flags);
1937 
1938     /* If the classport info is still not valid, the query should have
1939      * failed for some reason. Retry issuing the query
1940      */
1941     if (!port->classport_info.valid) {
1942         port->classport_info.retry_cnt++;
1943         if (port->classport_info.retry_cnt <=
1944             IB_SA_CPI_MAX_RETRY_CNT) {
1945             unsigned long delay =
1946                 msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
1947 
1948             queue_delayed_work(ib_wq, &port->ib_cpi_work, delay);
1949         }
1950     }
1951     spin_unlock_irqrestore(&port->classport_lock, flags);
1952 
1953 err_nomem:
1954     return;
1955 }
1956 
1957 static void send_handler(struct ib_mad_agent *agent,
1958              struct ib_mad_send_wc *mad_send_wc)
1959 {
1960     struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
1961     unsigned long flags;
1962 
1963     if (query->callback)
1964         switch (mad_send_wc->status) {
1965         case IB_WC_SUCCESS:
1966             /* No callback -- already got recv */
1967             break;
1968         case IB_WC_RESP_TIMEOUT_ERR:
1969             query->callback(query, -ETIMEDOUT, NULL);
1970             break;
1971         case IB_WC_WR_FLUSH_ERR:
1972             query->callback(query, -EINTR, NULL);
1973             break;
1974         default:
1975             query->callback(query, -EIO, NULL);
1976             break;
1977         }
1978 
1979     xa_lock_irqsave(&queries, flags);
1980     __xa_erase(&queries, query->id);
1981     xa_unlock_irqrestore(&queries, flags);
1982 
1983     free_mad(query);
1984     if (query->client)
1985         ib_sa_client_put(query->client);
1986     query->release(query);
1987 }
1988 
1989 static void recv_handler(struct ib_mad_agent *mad_agent,
1990              struct ib_mad_send_buf *send_buf,
1991              struct ib_mad_recv_wc *mad_recv_wc)
1992 {
1993     struct ib_sa_query *query;
1994 
1995     if (!send_buf)
1996         return;
1997 
1998     query = send_buf->context[0];
1999     if (query->callback) {
2000         if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
2001             query->callback(query,
2002                     mad_recv_wc->recv_buf.mad->mad_hdr.status ?
2003                     -EINVAL : 0,
2004                     (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
2005         else
2006             query->callback(query, -EIO, NULL);
2007     }
2008 
2009     ib_free_recv_mad(mad_recv_wc);
2010 }
2011 
2012 static void update_sm_ah(struct work_struct *work)
2013 {
2014     struct ib_sa_port *port =
2015         container_of(work, struct ib_sa_port, update_task);
2016     struct ib_sa_sm_ah *new_ah;
2017     struct ib_port_attr port_attr;
2018     struct rdma_ah_attr   ah_attr;
2019     bool grh_required;
2020 
2021     if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
2022         pr_warn("Couldn't query port\n");
2023         return;
2024     }
2025 
2026     new_ah = kmalloc(sizeof(*new_ah), GFP_KERNEL);
2027     if (!new_ah)
2028         return;
2029 
2030     kref_init(&new_ah->ref);
2031     new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
2032 
2033     new_ah->pkey_index = 0;
2034     if (ib_find_pkey(port->agent->device, port->port_num,
2035              IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
2036         pr_err("Couldn't find index for default PKey\n");
2037 
2038     memset(&ah_attr, 0, sizeof(ah_attr));
2039     ah_attr.type = rdma_ah_find_type(port->agent->device,
2040                      port->port_num);
2041     rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
2042     rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
2043     rdma_ah_set_port_num(&ah_attr, port->port_num);
2044 
2045     grh_required = rdma_is_grh_required(port->agent->device,
2046                         port->port_num);
2047 
2048     /*
2049      * The OPA sm_lid of 0xFFFF needs special handling so that it can be
2050      * differentiated from a permissive LID of 0xFFFF.  We set the
2051      * grh_required flag here so the SA can program the DGID in the
2052      * address handle appropriately
2053      */
2054     if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA &&
2055         (grh_required ||
2056          port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)))
2057         rdma_ah_set_make_grd(&ah_attr, true);
2058 
2059     if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) {
2060         rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
2061         rdma_ah_set_subnet_prefix(&ah_attr,
2062                       cpu_to_be64(port_attr.subnet_prefix));
2063         rdma_ah_set_interface_id(&ah_attr,
2064                      cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
2065     }
2066 
2067     new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr,
2068                     RDMA_CREATE_AH_SLEEPABLE);
2069     if (IS_ERR(new_ah->ah)) {
2070         pr_warn("Couldn't create new SM AH\n");
2071         kfree(new_ah);
2072         return;
2073     }
2074 
2075     spin_lock_irq(&port->ah_lock);
2076     if (port->sm_ah)
2077         kref_put(&port->sm_ah->ref, free_sm_ah);
2078     port->sm_ah = new_ah;
2079     spin_unlock_irq(&port->ah_lock);
2080 }
2081 
2082 static void ib_sa_event(struct ib_event_handler *handler,
2083             struct ib_event *event)
2084 {
2085     if (event->event == IB_EVENT_PORT_ERR    ||
2086         event->event == IB_EVENT_PORT_ACTIVE ||
2087         event->event == IB_EVENT_LID_CHANGE  ||
2088         event->event == IB_EVENT_PKEY_CHANGE ||
2089         event->event == IB_EVENT_SM_CHANGE   ||
2090         event->event == IB_EVENT_CLIENT_REREGISTER) {
2091         unsigned long flags;
2092         struct ib_sa_device *sa_dev =
2093             container_of(handler, typeof(*sa_dev), event_handler);
2094         u32 port_num = event->element.port_num - sa_dev->start_port;
2095         struct ib_sa_port *port = &sa_dev->port[port_num];
2096 
2097         if (!rdma_cap_ib_sa(handler->device, port->port_num))
2098             return;
2099 
2100         spin_lock_irqsave(&port->ah_lock, flags);
2101         if (port->sm_ah)
2102             kref_put(&port->sm_ah->ref, free_sm_ah);
2103         port->sm_ah = NULL;
2104         spin_unlock_irqrestore(&port->ah_lock, flags);
2105 
2106         if (event->event == IB_EVENT_SM_CHANGE ||
2107             event->event == IB_EVENT_CLIENT_REREGISTER ||
2108             event->event == IB_EVENT_LID_CHANGE ||
2109             event->event == IB_EVENT_PORT_ACTIVE) {
2110             unsigned long delay =
2111                 msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
2112 
2113             spin_lock_irqsave(&port->classport_lock, flags);
2114             port->classport_info.valid = false;
2115             port->classport_info.retry_cnt = 0;
2116             spin_unlock_irqrestore(&port->classport_lock, flags);
2117             queue_delayed_work(ib_wq,
2118                        &port->ib_cpi_work, delay);
2119         }
2120         queue_work(ib_wq, &sa_dev->port[port_num].update_task);
2121     }
2122 }
2123 
2124 static int ib_sa_add_one(struct ib_device *device)
2125 {
2126     struct ib_sa_device *sa_dev;
2127     int s, e, i;
2128     int count = 0;
2129     int ret;
2130 
2131     s = rdma_start_port(device);
2132     e = rdma_end_port(device);
2133 
2134     sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL);
2135     if (!sa_dev)
2136         return -ENOMEM;
2137 
2138     sa_dev->start_port = s;
2139     sa_dev->end_port   = e;
2140 
2141     for (i = 0; i <= e - s; ++i) {
2142         spin_lock_init(&sa_dev->port[i].ah_lock);
2143         if (!rdma_cap_ib_sa(device, i + 1))
2144             continue;
2145 
2146         sa_dev->port[i].sm_ah    = NULL;
2147         sa_dev->port[i].port_num = i + s;
2148 
2149         spin_lock_init(&sa_dev->port[i].classport_lock);
2150         sa_dev->port[i].classport_info.valid = false;
2151 
2152         sa_dev->port[i].agent =
2153             ib_register_mad_agent(device, i + s, IB_QPT_GSI,
2154                           NULL, 0, send_handler,
2155                           recv_handler, sa_dev, 0);
2156         if (IS_ERR(sa_dev->port[i].agent)) {
2157             ret = PTR_ERR(sa_dev->port[i].agent);
2158             goto err;
2159         }
2160 
2161         INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
2162         INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
2163                   update_ib_cpi);
2164 
2165         count++;
2166     }
2167 
2168     if (!count) {
2169         ret = -EOPNOTSUPP;
2170         goto free;
2171     }
2172 
2173     ib_set_client_data(device, &sa_client, sa_dev);
2174 
2175     /*
2176      * We register our event handler after everything is set up,
2177      * and then update our cached info after the event handler is
2178      * registered to avoid any problems if a port changes state
2179      * during our initialization.
2180      */
2181 
2182     INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
2183     ib_register_event_handler(&sa_dev->event_handler);
2184 
2185     for (i = 0; i <= e - s; ++i) {
2186         if (rdma_cap_ib_sa(device, i + 1))
2187             update_sm_ah(&sa_dev->port[i].update_task);
2188     }
2189 
2190     return 0;
2191 
2192 err:
2193     while (--i >= 0) {
2194         if (rdma_cap_ib_sa(device, i + 1))
2195             ib_unregister_mad_agent(sa_dev->port[i].agent);
2196     }
2197 free:
2198     kfree(sa_dev);
2199     return ret;
2200 }
2201 
2202 static void ib_sa_remove_one(struct ib_device *device, void *client_data)
2203 {
2204     struct ib_sa_device *sa_dev = client_data;
2205     int i;
2206 
2207     ib_unregister_event_handler(&sa_dev->event_handler);
2208     flush_workqueue(ib_wq);
2209 
2210     for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
2211         if (rdma_cap_ib_sa(device, i + 1)) {
2212             cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work);
2213             ib_unregister_mad_agent(sa_dev->port[i].agent);
2214             if (sa_dev->port[i].sm_ah)
2215                 kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
2216         }
2217 
2218     }
2219 
2220     kfree(sa_dev);
2221 }
2222 
2223 int ib_sa_init(void)
2224 {
2225     int ret;
2226 
2227     get_random_bytes(&tid, sizeof tid);
2228 
2229     atomic_set(&ib_nl_sa_request_seq, 0);
2230 
2231     ret = ib_register_client(&sa_client);
2232     if (ret) {
2233         pr_err("Couldn't register ib_sa client\n");
2234         goto err1;
2235     }
2236 
2237     ret = mcast_init();
2238     if (ret) {
2239         pr_err("Couldn't initialize multicast handling\n");
2240         goto err2;
2241     }
2242 
2243     ib_nl_wq = alloc_ordered_workqueue("ib_nl_sa_wq", WQ_MEM_RECLAIM);
2244     if (!ib_nl_wq) {
2245         ret = -ENOMEM;
2246         goto err3;
2247     }
2248 
2249     INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout);
2250 
2251     return 0;
2252 
2253 err3:
2254     mcast_cleanup();
2255 err2:
2256     ib_unregister_client(&sa_client);
2257 err1:
2258     return ret;
2259 }
2260 
2261 void ib_sa_cleanup(void)
2262 {
2263     cancel_delayed_work(&ib_nl_timed_work);
2264     destroy_workqueue(ib_nl_wq);
2265     mcast_cleanup();
2266     ib_unregister_client(&sa_client);
2267     WARN_ON(!xa_empty(&queries));
2268 }