0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033 #include "ib_mr.h"
0034
0035 static inline void
0036 rds_transition_frwr_state(struct rds_ib_mr *ibmr,
0037 enum rds_ib_fr_state old_state,
0038 enum rds_ib_fr_state new_state)
0039 {
0040 if (cmpxchg(&ibmr->u.frmr.fr_state,
0041 old_state, new_state) == old_state &&
0042 old_state == FRMR_IS_INUSE) {
0043
0044
0045
0046 smp_mb__before_atomic();
0047 atomic_dec(&ibmr->ic->i_fastreg_inuse_count);
0048 if (waitqueue_active(&rds_ib_ring_empty_wait))
0049 wake_up(&rds_ib_ring_empty_wait);
0050 }
0051 }
0052
0053 static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
0054 int npages)
0055 {
0056 struct rds_ib_mr_pool *pool;
0057 struct rds_ib_mr *ibmr = NULL;
0058 struct rds_ib_frmr *frmr;
0059 int err = 0;
0060
0061 if (npages <= RDS_MR_8K_MSG_SIZE)
0062 pool = rds_ibdev->mr_8k_pool;
0063 else
0064 pool = rds_ibdev->mr_1m_pool;
0065
0066 ibmr = rds_ib_try_reuse_ibmr(pool);
0067 if (ibmr)
0068 return ibmr;
0069
0070 ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL,
0071 rdsibdev_to_node(rds_ibdev));
0072 if (!ibmr) {
0073 err = -ENOMEM;
0074 goto out_no_cigar;
0075 }
0076
0077 frmr = &ibmr->u.frmr;
0078 frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG,
0079 pool->max_pages);
0080 if (IS_ERR(frmr->mr)) {
0081 pr_warn("RDS/IB: %s failed to allocate MR", __func__);
0082 err = PTR_ERR(frmr->mr);
0083 goto out_no_cigar;
0084 }
0085
0086 ibmr->pool = pool;
0087 if (pool->pool_type == RDS_IB_MR_8K_POOL)
0088 rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
0089 else
0090 rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
0091
0092 if (atomic_read(&pool->item_count) > pool->max_items_soft)
0093 pool->max_items_soft = pool->max_items;
0094
0095 frmr->fr_state = FRMR_IS_FREE;
0096 init_waitqueue_head(&frmr->fr_inv_done);
0097 init_waitqueue_head(&frmr->fr_reg_done);
0098 return ibmr;
0099
0100 out_no_cigar:
0101 kfree(ibmr);
0102 atomic_dec(&pool->item_count);
0103 return ERR_PTR(err);
0104 }
0105
0106 static void rds_ib_free_frmr(struct rds_ib_mr *ibmr, bool drop)
0107 {
0108 struct rds_ib_mr_pool *pool = ibmr->pool;
0109
0110 if (drop)
0111 llist_add(&ibmr->llnode, &pool->drop_list);
0112 else
0113 llist_add(&ibmr->llnode, &pool->free_list);
0114 atomic_add(ibmr->sg_len, &pool->free_pinned);
0115 atomic_inc(&pool->dirty_count);
0116
0117
0118 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
0119 atomic_read(&pool->dirty_count) >= pool->max_items / 5)
0120 queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
0121 }
0122
0123 static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
0124 {
0125 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
0126 struct ib_reg_wr reg_wr;
0127 int ret, off = 0;
0128
0129 while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
0130 atomic_inc(&ibmr->ic->i_fastreg_wrs);
0131 cpu_relax();
0132 }
0133
0134 ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len,
0135 &off, PAGE_SIZE);
0136 if (unlikely(ret != ibmr->sg_dma_len))
0137 return ret < 0 ? ret : -EINVAL;
0138
0139 if (cmpxchg(&frmr->fr_state,
0140 FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE)
0141 return -EBUSY;
0142
0143 atomic_inc(&ibmr->ic->i_fastreg_inuse_count);
0144
0145
0146
0147
0148
0149
0150 ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
0151 frmr->fr_reg = true;
0152
0153 memset(®_wr, 0, sizeof(reg_wr));
0154 reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
0155 reg_wr.wr.opcode = IB_WR_REG_MR;
0156 reg_wr.wr.num_sge = 0;
0157 reg_wr.mr = frmr->mr;
0158 reg_wr.key = frmr->mr->rkey;
0159 reg_wr.access = IB_ACCESS_LOCAL_WRITE |
0160 IB_ACCESS_REMOTE_READ |
0161 IB_ACCESS_REMOTE_WRITE;
0162 reg_wr.wr.send_flags = IB_SEND_SIGNALED;
0163
0164 ret = ib_post_send(ibmr->ic->i_cm_id->qp, ®_wr.wr, NULL);
0165 if (unlikely(ret)) {
0166
0167 rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
0168
0169 atomic_inc(&ibmr->ic->i_fastreg_wrs);
0170 if (printk_ratelimit())
0171 pr_warn("RDS/IB: %s returned error(%d)\n",
0172 __func__, ret);
0173 goto out;
0174 }
0175
0176
0177
0178
0179
0180 wait_event(frmr->fr_reg_done, !frmr->fr_reg);
0181
0182 out:
0183
0184 return ret;
0185 }
0186
0187 static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev,
0188 struct rds_ib_mr_pool *pool,
0189 struct rds_ib_mr *ibmr,
0190 struct scatterlist *sg, unsigned int sg_len)
0191 {
0192 struct ib_device *dev = rds_ibdev->dev;
0193 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
0194 int i;
0195 u32 len;
0196 int ret = 0;
0197
0198
0199
0200
0201 rds_ib_teardown_mr(ibmr);
0202
0203 ibmr->sg = sg;
0204 ibmr->sg_len = sg_len;
0205 ibmr->sg_dma_len = 0;
0206 frmr->sg_byte_len = 0;
0207 WARN_ON(ibmr->sg_dma_len);
0208 ibmr->sg_dma_len = ib_dma_map_sg(dev, ibmr->sg, ibmr->sg_len,
0209 DMA_BIDIRECTIONAL);
0210 if (unlikely(!ibmr->sg_dma_len)) {
0211 pr_warn("RDS/IB: %s failed!\n", __func__);
0212 return -EBUSY;
0213 }
0214
0215 frmr->sg_byte_len = 0;
0216 frmr->dma_npages = 0;
0217 len = 0;
0218
0219 ret = -EINVAL;
0220 for (i = 0; i < ibmr->sg_dma_len; ++i) {
0221 unsigned int dma_len = sg_dma_len(&ibmr->sg[i]);
0222 u64 dma_addr = sg_dma_address(&ibmr->sg[i]);
0223
0224 frmr->sg_byte_len += dma_len;
0225 if (dma_addr & ~PAGE_MASK) {
0226 if (i > 0)
0227 goto out_unmap;
0228 else
0229 ++frmr->dma_npages;
0230 }
0231
0232 if ((dma_addr + dma_len) & ~PAGE_MASK) {
0233 if (i < ibmr->sg_dma_len - 1)
0234 goto out_unmap;
0235 else
0236 ++frmr->dma_npages;
0237 }
0238
0239 len += dma_len;
0240 }
0241 frmr->dma_npages += len >> PAGE_SHIFT;
0242
0243 if (frmr->dma_npages > ibmr->pool->max_pages) {
0244 ret = -EMSGSIZE;
0245 goto out_unmap;
0246 }
0247
0248 ret = rds_ib_post_reg_frmr(ibmr);
0249 if (ret)
0250 goto out_unmap;
0251
0252 if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
0253 rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
0254 else
0255 rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
0256
0257 return ret;
0258
0259 out_unmap:
0260 ib_dma_unmap_sg(rds_ibdev->dev, ibmr->sg, ibmr->sg_len,
0261 DMA_BIDIRECTIONAL);
0262 ibmr->sg_dma_len = 0;
0263 return ret;
0264 }
0265
0266 static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
0267 {
0268 struct ib_send_wr *s_wr;
0269 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
0270 struct rdma_cm_id *i_cm_id = ibmr->ic->i_cm_id;
0271 int ret = -EINVAL;
0272
0273 if (!i_cm_id || !i_cm_id->qp || !frmr->mr)
0274 goto out;
0275
0276 if (frmr->fr_state != FRMR_IS_INUSE)
0277 goto out;
0278
0279 while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
0280 atomic_inc(&ibmr->ic->i_fastreg_wrs);
0281 cpu_relax();
0282 }
0283
0284 frmr->fr_inv = true;
0285 s_wr = &frmr->fr_wr;
0286
0287 memset(s_wr, 0, sizeof(*s_wr));
0288 s_wr->wr_id = (unsigned long)(void *)ibmr;
0289 s_wr->opcode = IB_WR_LOCAL_INV;
0290 s_wr->ex.invalidate_rkey = frmr->mr->rkey;
0291 s_wr->send_flags = IB_SEND_SIGNALED;
0292
0293 ret = ib_post_send(i_cm_id->qp, s_wr, NULL);
0294 if (unlikely(ret)) {
0295 rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
0296 frmr->fr_inv = false;
0297
0298
0299
0300 smp_mb__before_atomic();
0301 atomic_inc(&ibmr->ic->i_fastreg_wrs);
0302 pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
0303 goto out;
0304 }
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316 wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE);
0317
0318 out:
0319 return ret;
0320 }
0321
0322 void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
0323 {
0324 struct rds_ib_mr *ibmr = (void *)(unsigned long)wc->wr_id;
0325 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
0326
0327 if (wc->status != IB_WC_SUCCESS) {
0328 rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
0329 if (rds_conn_up(ic->conn))
0330 rds_ib_conn_error(ic->conn,
0331 "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n",
0332 &ic->conn->c_laddr,
0333 &ic->conn->c_faddr,
0334 wc->status,
0335 ib_wc_status_msg(wc->status),
0336 wc->vendor_err);
0337 }
0338
0339 if (frmr->fr_inv) {
0340 rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_FREE);
0341 frmr->fr_inv = false;
0342 wake_up(&frmr->fr_inv_done);
0343 }
0344
0345 if (frmr->fr_reg) {
0346 frmr->fr_reg = false;
0347 wake_up(&frmr->fr_reg_done);
0348 }
0349
0350
0351
0352
0353 smp_mb__before_atomic();
0354 atomic_inc(&ic->i_fastreg_wrs);
0355 }
0356
0357 void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
0358 unsigned long *unpinned, unsigned int goal)
0359 {
0360 struct rds_ib_mr *ibmr, *next;
0361 struct rds_ib_frmr *frmr;
0362 int ret = 0, ret2;
0363 unsigned int freed = *nfreed;
0364
0365
0366 list_for_each_entry(ibmr, list, unmap_list) {
0367 if (ibmr->sg_dma_len) {
0368 ret2 = rds_ib_post_inv(ibmr);
0369 if (ret2 && !ret)
0370 ret = ret2;
0371 }
0372 }
0373
0374 if (ret)
0375 pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
0376
0377
0378 list_for_each_entry_safe(ibmr, next, list, unmap_list) {
0379 *unpinned += ibmr->sg_len;
0380 frmr = &ibmr->u.frmr;
0381 __rds_ib_teardown_mr(ibmr);
0382 if (freed < goal || frmr->fr_state == FRMR_IS_STALE) {
0383
0384 if (frmr->fr_state == FRMR_IS_INUSE)
0385 continue;
0386
0387 if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
0388 rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
0389 else
0390 rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
0391 list_del(&ibmr->unmap_list);
0392 if (frmr->mr)
0393 ib_dereg_mr(frmr->mr);
0394 kfree(ibmr);
0395 freed++;
0396 }
0397 }
0398 *nfreed = freed;
0399 }
0400
0401 struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
0402 struct rds_ib_connection *ic,
0403 struct scatterlist *sg,
0404 unsigned long nents, u32 *key)
0405 {
0406 struct rds_ib_mr *ibmr = NULL;
0407 struct rds_ib_frmr *frmr;
0408 int ret;
0409
0410 if (!ic) {
0411
0412 return ERR_PTR(-EOPNOTSUPP);
0413 }
0414
0415 do {
0416 if (ibmr)
0417 rds_ib_free_frmr(ibmr, true);
0418 ibmr = rds_ib_alloc_frmr(rds_ibdev, nents);
0419 if (IS_ERR(ibmr))
0420 return ibmr;
0421 frmr = &ibmr->u.frmr;
0422 } while (frmr->fr_state != FRMR_IS_FREE);
0423
0424 ibmr->ic = ic;
0425 ibmr->device = rds_ibdev;
0426 ret = rds_ib_map_frmr(rds_ibdev, ibmr->pool, ibmr, sg, nents);
0427 if (ret == 0) {
0428 *key = frmr->mr->rkey;
0429 } else {
0430 rds_ib_free_frmr(ibmr, false);
0431 ibmr = ERR_PTR(ret);
0432 }
0433
0434 return ibmr;
0435 }
0436
0437 void rds_ib_free_frmr_list(struct rds_ib_mr *ibmr)
0438 {
0439 struct rds_ib_mr_pool *pool = ibmr->pool;
0440 struct rds_ib_frmr *frmr = &ibmr->u.frmr;
0441
0442 if (frmr->fr_state == FRMR_IS_STALE)
0443 llist_add(&ibmr->llnode, &pool->drop_list);
0444 else
0445 llist_add(&ibmr->llnode, &pool->free_list);
0446 }