Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
0002 /*
0003  * Copyright(c) 2016 - 2019 Intel Corporation.
0004  */
0005 
0006 #ifndef DEF_RDMA_VT_H
0007 #define DEF_RDMA_VT_H
0008 
0009 /*
0010  * Structure that low level drivers will populate in order to register with the
0011  * rdmavt layer.
0012  */
0013 
0014 #include <linux/spinlock.h>
0015 #include <linux/list.h>
0016 #include <linux/hash.h>
0017 #include <rdma/ib_verbs.h>
0018 #include <rdma/ib_mad.h>
0019 #include <rdma/rdmavt_mr.h>
0020 
0021 #define RVT_MAX_PKEY_VALUES 16
0022 
0023 #define RVT_MAX_TRAP_LEN 100 /* Limit pending trap list */
0024 #define RVT_MAX_TRAP_LISTS 5 /*((IB_NOTICE_TYPE_INFO & 0x0F) + 1)*/
0025 #define RVT_TRAP_TIMEOUT 4096 /* 4.096 usec */
0026 
0027 struct trap_list {
0028     u32 list_len;
0029     struct list_head list;
0030 };
0031 
0032 struct rvt_qp;
0033 struct rvt_qpn_table;
0034 struct rvt_ibport {
0035     struct rvt_qp __rcu *qp[2];
0036     struct ib_mad_agent *send_agent;    /* agent for SMI (traps) */
0037     struct rb_root mcast_tree;
0038     spinlock_t lock;        /* protect changes in this struct */
0039 
0040     /* non-zero when timer is set */
0041     unsigned long mkey_lease_timeout;
0042     unsigned long trap_timeout;
0043     __be64 gid_prefix;      /* in network order */
0044     __be64 mkey;
0045     u64 tid;
0046     u32 port_cap_flags;
0047     u16 port_cap3_flags;
0048     u32 pma_sample_start;
0049     u32 pma_sample_interval;
0050     __be16 pma_counter_select[5];
0051     u16 pma_tag;
0052     u16 mkey_lease_period;
0053     u32 sm_lid;
0054     u8 sm_sl;
0055     u8 mkeyprot;
0056     u8 subnet_timeout;
0057     u8 vl_high_limit;
0058 
0059     /*
0060      * Driver is expected to keep these up to date. These
0061      * counters are informational only and not required to be
0062      * completely accurate.
0063      */
0064     u64 n_rc_resends;
0065     u64 n_seq_naks;
0066     u64 n_rdma_seq;
0067     u64 n_rnr_naks;
0068     u64 n_other_naks;
0069     u64 n_loop_pkts;
0070     u64 n_pkt_drops;
0071     u64 n_vl15_dropped;
0072     u64 n_rc_timeouts;
0073     u64 n_dmawait;
0074     u64 n_unaligned;
0075     u64 n_rc_dupreq;
0076     u64 n_rc_seqnak;
0077     u64 n_rc_crwaits;
0078     u16 pkey_violations;
0079     u16 qkey_violations;
0080     u16 mkey_violations;
0081 
0082     /* Hot-path per CPU counters to avoid cacheline trading to update */
0083     u64 z_rc_acks;
0084     u64 z_rc_qacks;
0085     u64 z_rc_delayed_comp;
0086     u64 __percpu *rc_acks;
0087     u64 __percpu *rc_qacks;
0088     u64 __percpu *rc_delayed_comp;
0089 
0090     void *priv; /* driver private data */
0091 
0092     /*
0093      * The pkey table is allocated and maintained by the driver. Drivers
0094      * need to have access to this before registering with rdmav. However
0095      * rdmavt will need access to it so drivers need to provide this during
0096      * the attach port API call.
0097      */
0098     u16 *pkey_table;
0099 
0100     struct rvt_ah *sm_ah;
0101 
0102     /*
0103      * Keep a list of traps that have not been repressed.  They will be
0104      * resent based on trap_timer.
0105      */
0106     struct trap_list trap_lists[RVT_MAX_TRAP_LISTS];
0107     struct timer_list trap_timer;
0108 };
0109 
0110 #define RVT_CQN_MAX 16 /* maximum length of cq name */
0111 
0112 #define RVT_SGE_COPY_MEMCPY 0
0113 #define RVT_SGE_COPY_CACHELESS  1
0114 #define RVT_SGE_COPY_ADAPTIVE   2
0115 
0116 /*
0117  * Things that are driver specific, module parameters in hfi1 and qib
0118  */
0119 struct rvt_driver_params {
0120     struct ib_device_attr props;
0121 
0122     /*
0123      * Anything driver specific that is not covered by props
0124      * For instance special module parameters. Goes here.
0125      */
0126     unsigned int lkey_table_size;
0127     unsigned int qp_table_size;
0128     unsigned int sge_copy_mode;
0129     unsigned int wss_threshold;
0130     unsigned int wss_clean_period;
0131     int qpn_start;
0132     int qpn_inc;
0133     int qpn_res_start;
0134     int qpn_res_end;
0135     int nports;
0136     int npkeys;
0137     int node;
0138     int psn_mask;
0139     int psn_shift;
0140     int psn_modify_mask;
0141     u32 core_cap_flags;
0142     u32 max_mad_size;
0143     u8 qos_shift;
0144     u8 max_rdma_atomic;
0145     u8 extra_rdma_atomic;
0146     u8 reserved_operations;
0147 };
0148 
0149 /* User context */
0150 struct rvt_ucontext {
0151     struct ib_ucontext ibucontext;
0152 };
0153 
0154 /* Protection domain */
0155 struct rvt_pd {
0156     struct ib_pd ibpd;
0157     bool user;
0158 };
0159 
0160 /* Address handle */
0161 struct rvt_ah {
0162     struct ib_ah ibah;
0163     struct rdma_ah_attr attr;
0164     u8 vl;
0165     u8 log_pmtu;
0166 };
0167 
0168 /*
0169  * This structure is used by rvt_mmap() to validate an offset
0170  * when an mmap() request is made.  The vm_area_struct then uses
0171  * this as its vm_private_data.
0172  */
0173 struct rvt_mmap_info {
0174     struct list_head pending_mmaps;
0175     struct ib_ucontext *context;
0176     void *obj;
0177     __u64 offset;
0178     struct kref ref;
0179     u32 size;
0180 };
0181 
0182 /* memory working set size */
0183 struct rvt_wss {
0184     unsigned long *entries;
0185     atomic_t total_count;
0186     atomic_t clean_counter;
0187     atomic_t clean_entry;
0188 
0189     int threshold;
0190     int num_entries;
0191     long pages_mask;
0192     unsigned int clean_period;
0193 };
0194 
0195 struct rvt_dev_info;
0196 struct rvt_swqe;
0197 struct rvt_driver_provided {
0198     /*
0199      * Which functions are required depends on which verbs rdmavt is
0200      * providing and which verbs the driver is overriding. See
0201      * check_support() for details.
0202      */
0203 
0204     /* hot path calldowns in a single cacheline */
0205 
0206     /*
0207      * Give the driver a notice that there is send work to do. It is up to
0208      * the driver to generally push the packets out, this just queues the
0209      * work with the driver. There are two variants here. The no_lock
0210      * version requires the s_lock not to be held. The other assumes the
0211      * s_lock is held.
0212      */
0213     bool (*schedule_send)(struct rvt_qp *qp);
0214     bool (*schedule_send_no_lock)(struct rvt_qp *qp);
0215 
0216     /*
0217      * Driver specific work request setup and checking.
0218      * This function is allowed to perform any setup, checks, or
0219      * adjustments required to the SWQE in order to be usable by
0220      * underlying protocols. This includes private data structure
0221      * allocations.
0222      */
0223     int (*setup_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe,
0224              bool *call_send);
0225 
0226     /*
0227      * Sometimes rdmavt needs to kick the driver's send progress. That is
0228      * done by this call back.
0229      */
0230     void (*do_send)(struct rvt_qp *qp);
0231 
0232     /*
0233      * Returns a pointer to the underlying hardware's PCI device. This is
0234      * used to display information as to what hardware is being referenced
0235      * in an output message
0236      */
0237     struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi);
0238 
0239     /*
0240      * Allocate a private queue pair data structure for driver specific
0241      * information which is opaque to rdmavt.  Errors are returned via
0242      * ERR_PTR(err).  The driver is free to return NULL or a valid
0243      * pointer.
0244      */
0245     void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
0246 
0247     /*
0248      * Init a structure allocated with qp_priv_alloc(). This should be
0249      * called after all qp fields have been initialized in rdmavt.
0250      */
0251     int (*qp_priv_init)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
0252                 struct ib_qp_init_attr *init_attr);
0253 
0254     /*
0255      * Free the driver's private qp structure.
0256      */
0257     void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
0258 
0259     /*
0260      * Inform the driver the particular qp in question has been reset so
0261      * that it can clean up anything it needs to.
0262      */
0263     void (*notify_qp_reset)(struct rvt_qp *qp);
0264 
0265     /*
0266      * Get a path mtu from the driver based on qp attributes.
0267      */
0268     int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
0269                   struct ib_qp_attr *attr);
0270 
0271     /*
0272      * Notify driver that it needs to flush any outstanding IO requests that
0273      * are waiting on a qp.
0274      */
0275     void (*flush_qp_waiters)(struct rvt_qp *qp);
0276 
0277     /*
0278      * Notify driver to stop its queue of sending packets. Nothing else
0279      * should be posted to the queue pair after this has been called.
0280      */
0281     void (*stop_send_queue)(struct rvt_qp *qp);
0282 
0283     /*
0284      * Have the driver drain any in progress operations
0285      */
0286     void (*quiesce_qp)(struct rvt_qp *qp);
0287 
0288     /*
0289      * Inform the driver a qp has went to error state.
0290      */
0291     void (*notify_error_qp)(struct rvt_qp *qp);
0292 
0293     /*
0294      * Get an MTU for a qp.
0295      */
0296     u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
0297                u32 pmtu);
0298     /*
0299      * Convert an mtu to a path mtu
0300      */
0301     int (*mtu_to_path_mtu)(u32 mtu);
0302 
0303     /*
0304      * Get the guid of a port in big endian byte order
0305      */
0306     int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
0307                int guid_index, __be64 *guid);
0308 
0309     /*
0310      * Query driver for the state of the port.
0311      */
0312     int (*query_port_state)(struct rvt_dev_info *rdi, u32 port_num,
0313                 struct ib_port_attr *props);
0314 
0315     /*
0316      * Tell driver to shutdown a port
0317      */
0318     int (*shut_down_port)(struct rvt_dev_info *rdi, u32 port_num);
0319 
0320     /* Tell driver to send a trap for changed  port capabilities */
0321     void (*cap_mask_chg)(struct rvt_dev_info *rdi, u32 port_num);
0322 
0323     /*
0324      * The following functions can be safely ignored completely. Any use of
0325      * these is checked for NULL before blindly calling. Rdmavt should also
0326      * be functional if drivers omit these.
0327      */
0328 
0329     /* Called to inform the driver that all qps should now be freed. */
0330     unsigned (*free_all_qps)(struct rvt_dev_info *rdi);
0331 
0332     /* Driver specific AH validation */
0333     int (*check_ah)(struct ib_device *, struct rdma_ah_attr *);
0334 
0335     /* Inform the driver a new AH has been created */
0336     void (*notify_new_ah)(struct ib_device *, struct rdma_ah_attr *,
0337                   struct rvt_ah *);
0338 
0339     /* Let the driver pick the next queue pair number*/
0340     int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
0341              enum ib_qp_type type, u32 port_num);
0342 
0343     /* Determine if its safe or allowed to modify the qp */
0344     int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
0345                    int attr_mask, struct ib_udata *udata);
0346 
0347     /* Driver specific QP modification/notification-of */
0348     void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
0349               int attr_mask, struct ib_udata *udata);
0350 
0351     /* Notify driver a mad agent has been created */
0352     void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
0353 
0354     /* Notify driver a mad agent has been removed */
0355     void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
0356 
0357     /* Notify driver to restart rc */
0358     void (*notify_restart_rc)(struct rvt_qp *qp, u32 psn, int wait);
0359 
0360     /* Get and return CPU to pin CQ processing thread */
0361     int (*comp_vect_cpu_lookup)(struct rvt_dev_info *rdi, int comp_vect);
0362 };
0363 
0364 struct rvt_dev_info {
0365     struct ib_device ibdev; /* Keep this first. Nothing above here */
0366 
0367     /*
0368      * Prior to calling for registration the driver will be responsible for
0369      * allocating space for this structure.
0370      *
0371      * The driver will also be responsible for filling in certain members of
0372      * dparms.props. The driver needs to fill in dparms exactly as it would
0373      * want values reported to a ULP. This will be returned to the caller
0374      * in rdmavt's device. The driver should also therefore refrain from
0375      * modifying this directly after registration with rdmavt.
0376      */
0377 
0378     /* Driver specific properties */
0379     struct rvt_driver_params dparms;
0380 
0381     /* post send table */
0382     const struct rvt_operation_params *post_parms;
0383 
0384     /* opcode translation table */
0385     const enum ib_wc_opcode *wc_opcode;
0386 
0387     /* Driver specific helper functions */
0388     struct rvt_driver_provided driver_f;
0389 
0390     struct rvt_mregion __rcu *dma_mr;
0391     struct rvt_lkey_table lkey_table;
0392 
0393     /* Internal use */
0394     int n_pds_allocated;
0395     spinlock_t n_pds_lock; /* Protect pd allocated count */
0396 
0397     int n_ahs_allocated;
0398     spinlock_t n_ahs_lock; /* Protect ah allocated count */
0399 
0400     u32 n_srqs_allocated;
0401     spinlock_t n_srqs_lock; /* Protect srqs allocated count */
0402 
0403     int flags;
0404     struct rvt_ibport **ports;
0405 
0406     /* QP */
0407     struct rvt_qp_ibdev *qp_dev;
0408     u32 n_qps_allocated;    /* number of QPs allocated for device */
0409     u32 n_rc_qps;       /* number of RC QPs allocated for device */
0410     u32 busy_jiffies;   /* timeout scaling based on RC QP count */
0411     spinlock_t n_qps_lock;  /* protect qps, rc qps and busy jiffy counts */
0412 
0413     /* memory maps */
0414     struct list_head pending_mmaps;
0415     spinlock_t mmap_offset_lock; /* protect mmap_offset */
0416     u32 mmap_offset;
0417     spinlock_t pending_lock; /* protect pending mmap list */
0418 
0419     /* CQ */
0420     u32 n_cqs_allocated;    /* number of CQs allocated for device */
0421     spinlock_t n_cqs_lock; /* protect count of in use cqs */
0422 
0423     /* Multicast */
0424     u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
0425     spinlock_t n_mcast_grps_lock;
0426 
0427     /* Memory Working Set Size */
0428     struct rvt_wss *wss;
0429 };
0430 
0431 /**
0432  * rvt_set_ibdev_name - Craft an IB device name from client info
0433  * @rdi: pointer to the client rvt_dev_info structure
0434  * @name: client specific name
0435  * @unit: client specific unit number.
0436  */
0437 static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi,
0438                       const char *fmt, const char *name,
0439                       const int unit)
0440 {
0441     /*
0442      * FIXME: rvt and its users want to touch the ibdev before
0443      * registration and have things like the name work. We don't have the
0444      * infrastructure in the core to support this directly today, hack it
0445      * to work by setting the name manually here.
0446      */
0447     dev_set_name(&rdi->ibdev.dev, fmt, name, unit);
0448     strlcpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX);
0449 }
0450 
0451 /**
0452  * rvt_get_ibdev_name - return the IB name
0453  * @rdi: rdmavt device
0454  *
0455  * Return the registered name of the device.
0456  */
0457 static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi)
0458 {
0459     return dev_name(&rdi->ibdev.dev);
0460 }
0461 
0462 static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd)
0463 {
0464     return container_of(ibpd, struct rvt_pd, ibpd);
0465 }
0466 
0467 static inline struct rvt_ah *ibah_to_rvtah(struct ib_ah *ibah)
0468 {
0469     return container_of(ibah, struct rvt_ah, ibah);
0470 }
0471 
0472 static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev)
0473 {
0474     return  container_of(ibdev, struct rvt_dev_info, ibdev);
0475 }
0476 
0477 static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi)
0478 {
0479     /*
0480      * All ports have same number of pkeys.
0481      */
0482     return rdi->dparms.npkeys;
0483 }
0484 
0485 /*
0486  * Return the max atomic suitable for determining
0487  * the size of the ack ring buffer in a QP.
0488  */
0489 static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi)
0490 {
0491     return rdi->dparms.max_rdma_atomic +
0492         rdi->dparms.extra_rdma_atomic + 1;
0493 }
0494 
0495 static inline unsigned int rvt_size_atomic(struct rvt_dev_info *rdi)
0496 {
0497     return rdi->dparms.max_rdma_atomic +
0498         rdi->dparms.extra_rdma_atomic;
0499 }
0500 
0501 /*
0502  * Return the indexed PKEY from the port PKEY table.
0503  */
0504 static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi,
0505                    int port_index,
0506                    unsigned index)
0507 {
0508     if (index >= rvt_get_npkeys(rdi))
0509         return 0;
0510     else
0511         return rdi->ports[port_index]->pkey_table[index];
0512 }
0513 
0514 struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
0515 void rvt_dealloc_device(struct rvt_dev_info *rdi);
0516 int rvt_register_device(struct rvt_dev_info *rvd);
0517 void rvt_unregister_device(struct rvt_dev_info *rvd);
0518 int rvt_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
0519 int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port,
0520           int port_index, u16 *pkey_table);
0521 int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
0522             int access);
0523 int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey);
0524 int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
0525         u32 len, u64 vaddr, u32 rkey, int acc);
0526 int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
0527         struct rvt_sge *isge, struct rvt_sge *last_sge,
0528         struct ib_sge *sge, int acc);
0529 struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid,
0530                  u16 lid);
0531 
0532 #endif          /* DEF_RDMA_VT_H */