0001
0002
0003
0004
0005
0006
0007 #include <linux/kernel.h>
0008 #include <linux/pci.h>
0009 #include <linux/types.h>
0010
0011 #include "erdma.h"
0012 #include "erdma_hw.h"
0013 #include "erdma_verbs.h"
0014
0015 static void arm_cmdq_cq(struct erdma_cmdq *cmdq)
0016 {
0017 struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
0018 u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) |
0019 FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
0020 FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) |
0021 FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn);
0022
0023 *cmdq->cq.db_record = db_data;
0024 writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG);
0025
0026 atomic64_inc(&cmdq->cq.armed_num);
0027 }
0028
0029 static void kick_cmdq_db(struct erdma_cmdq *cmdq)
0030 {
0031 struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
0032 u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi);
0033
0034 *cmdq->sq.db_record = db_data;
0035 writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG);
0036 }
0037
0038 static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq)
0039 {
0040 int comp_idx;
0041
0042 spin_lock(&cmdq->lock);
0043 comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap,
0044 cmdq->max_outstandings);
0045 if (comp_idx == cmdq->max_outstandings) {
0046 spin_unlock(&cmdq->lock);
0047 return ERR_PTR(-ENOMEM);
0048 }
0049
0050 __set_bit(comp_idx, cmdq->comp_wait_bitmap);
0051 spin_unlock(&cmdq->lock);
0052
0053 return &cmdq->wait_pool[comp_idx];
0054 }
0055
0056 static void put_comp_wait(struct erdma_cmdq *cmdq,
0057 struct erdma_comp_wait *comp_wait)
0058 {
0059 int used;
0060
0061 cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT;
0062 spin_lock(&cmdq->lock);
0063 used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap);
0064 spin_unlock(&cmdq->lock);
0065
0066 WARN_ON(!used);
0067 }
0068
0069 static int erdma_cmdq_wait_res_init(struct erdma_dev *dev,
0070 struct erdma_cmdq *cmdq)
0071 {
0072 int i;
0073
0074 cmdq->wait_pool =
0075 devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings,
0076 sizeof(struct erdma_comp_wait), GFP_KERNEL);
0077 if (!cmdq->wait_pool)
0078 return -ENOMEM;
0079
0080 spin_lock_init(&cmdq->lock);
0081 cmdq->comp_wait_bitmap = devm_bitmap_zalloc(
0082 &dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL);
0083 if (!cmdq->comp_wait_bitmap)
0084 return -ENOMEM;
0085
0086 for (i = 0; i < cmdq->max_outstandings; i++) {
0087 init_completion(&cmdq->wait_pool[i].wait_event);
0088 cmdq->wait_pool[i].ctx_id = i;
0089 }
0090
0091 return 0;
0092 }
0093
0094 static int erdma_cmdq_sq_init(struct erdma_dev *dev)
0095 {
0096 struct erdma_cmdq *cmdq = &dev->cmdq;
0097 struct erdma_cmdq_sq *sq = &cmdq->sq;
0098 u32 buf_size;
0099
0100 sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE);
0101 sq->depth = cmdq->max_outstandings * sq->wqebb_cnt;
0102
0103 buf_size = sq->depth << SQEBB_SHIFT;
0104
0105 sq->qbuf =
0106 dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
0107 &sq->qbuf_dma_addr, GFP_KERNEL);
0108 if (!sq->qbuf)
0109 return -ENOMEM;
0110
0111 sq->db_record = (u64 *)(sq->qbuf + buf_size);
0112
0113 spin_lock_init(&sq->lock);
0114
0115 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG,
0116 upper_32_bits(sq->qbuf_dma_addr));
0117 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG,
0118 lower_32_bits(sq->qbuf_dma_addr));
0119 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth);
0120 erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG,
0121 sq->qbuf_dma_addr + buf_size);
0122
0123 return 0;
0124 }
0125
0126 static int erdma_cmdq_cq_init(struct erdma_dev *dev)
0127 {
0128 struct erdma_cmdq *cmdq = &dev->cmdq;
0129 struct erdma_cmdq_cq *cq = &cmdq->cq;
0130 u32 buf_size;
0131
0132 cq->depth = cmdq->sq.depth;
0133 buf_size = cq->depth << CQE_SHIFT;
0134
0135 cq->qbuf =
0136 dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
0137 &cq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
0138 if (!cq->qbuf)
0139 return -ENOMEM;
0140
0141 spin_lock_init(&cq->lock);
0142
0143 cq->db_record = (u64 *)(cq->qbuf + buf_size);
0144
0145 atomic64_set(&cq->armed_num, 0);
0146
0147 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG,
0148 upper_32_bits(cq->qbuf_dma_addr));
0149 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG,
0150 lower_32_bits(cq->qbuf_dma_addr));
0151 erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG,
0152 cq->qbuf_dma_addr + buf_size);
0153
0154 return 0;
0155 }
0156
0157 static int erdma_cmdq_eq_init(struct erdma_dev *dev)
0158 {
0159 struct erdma_cmdq *cmdq = &dev->cmdq;
0160 struct erdma_eq *eq = &cmdq->eq;
0161 u32 buf_size;
0162
0163 eq->depth = cmdq->max_outstandings;
0164 buf_size = eq->depth << EQE_SHIFT;
0165
0166 eq->qbuf =
0167 dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
0168 &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
0169 if (!eq->qbuf)
0170 return -ENOMEM;
0171
0172 spin_lock_init(&eq->lock);
0173 atomic64_set(&eq->event_num, 0);
0174
0175 eq->db_addr =
0176 (u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG);
0177 eq->db_record = (u64 *)(eq->qbuf + buf_size);
0178
0179 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG,
0180 upper_32_bits(eq->qbuf_dma_addr));
0181 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG,
0182 lower_32_bits(eq->qbuf_dma_addr));
0183 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth);
0184 erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG,
0185 eq->qbuf_dma_addr + buf_size);
0186
0187 return 0;
0188 }
0189
0190 int erdma_cmdq_init(struct erdma_dev *dev)
0191 {
0192 int err, i;
0193 struct erdma_cmdq *cmdq = &dev->cmdq;
0194 u32 sts, ctrl;
0195
0196 cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING;
0197 cmdq->use_event = false;
0198
0199 sema_init(&cmdq->credits, cmdq->max_outstandings);
0200
0201 err = erdma_cmdq_wait_res_init(dev, cmdq);
0202 if (err)
0203 return err;
0204
0205 err = erdma_cmdq_sq_init(dev);
0206 if (err)
0207 return err;
0208
0209 err = erdma_cmdq_cq_init(dev);
0210 if (err)
0211 goto err_destroy_sq;
0212
0213 err = erdma_cmdq_eq_init(dev);
0214 if (err)
0215 goto err_destroy_cq;
0216
0217 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1);
0218 erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
0219
0220 for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) {
0221 sts = erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
0222 ERDMA_REG_DEV_ST_INIT_DONE_MASK);
0223 if (sts)
0224 break;
0225
0226 msleep(ERDMA_REG_ACCESS_WAIT_MS);
0227 }
0228
0229 if (i == ERDMA_WAIT_DEV_DONE_CNT) {
0230 dev_err(&dev->pdev->dev, "wait init done failed.\n");
0231 err = -ETIMEDOUT;
0232 goto err_destroy_eq;
0233 }
0234
0235 set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
0236
0237 return 0;
0238
0239 err_destroy_eq:
0240 dma_free_coherent(&dev->pdev->dev,
0241 (cmdq->eq.depth << EQE_SHIFT) +
0242 ERDMA_EXTRA_BUFFER_SIZE,
0243 cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
0244
0245 err_destroy_cq:
0246 dma_free_coherent(&dev->pdev->dev,
0247 (cmdq->cq.depth << CQE_SHIFT) +
0248 ERDMA_EXTRA_BUFFER_SIZE,
0249 cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
0250
0251 err_destroy_sq:
0252 dma_free_coherent(&dev->pdev->dev,
0253 (cmdq->sq.depth << SQEBB_SHIFT) +
0254 ERDMA_EXTRA_BUFFER_SIZE,
0255 cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
0256
0257 return err;
0258 }
0259
0260 void erdma_finish_cmdq_init(struct erdma_dev *dev)
0261 {
0262
0263 dev->cmdq.use_event = true;
0264 arm_cmdq_cq(&dev->cmdq);
0265 }
0266
0267 void erdma_cmdq_destroy(struct erdma_dev *dev)
0268 {
0269 struct erdma_cmdq *cmdq = &dev->cmdq;
0270
0271 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
0272
0273 dma_free_coherent(&dev->pdev->dev,
0274 (cmdq->eq.depth << EQE_SHIFT) +
0275 ERDMA_EXTRA_BUFFER_SIZE,
0276 cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
0277 dma_free_coherent(&dev->pdev->dev,
0278 (cmdq->sq.depth << SQEBB_SHIFT) +
0279 ERDMA_EXTRA_BUFFER_SIZE,
0280 cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
0281 dma_free_coherent(&dev->pdev->dev,
0282 (cmdq->cq.depth << CQE_SHIFT) +
0283 ERDMA_EXTRA_BUFFER_SIZE,
0284 cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
0285 }
0286
0287 static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq)
0288 {
0289 __be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci,
0290 cmdq->cq.depth, CQE_SHIFT);
0291 u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
0292 __be32_to_cpu(READ_ONCE(*cqe)));
0293
0294 return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL;
0295 }
0296
0297 static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len,
0298 struct erdma_comp_wait *comp_wait)
0299 {
0300 __le64 *wqe;
0301 u64 hdr = *req;
0302
0303 comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED;
0304 reinit_completion(&comp_wait->wait_event);
0305 comp_wait->sq_pi = cmdq->sq.pi;
0306
0307 wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth,
0308 SQEBB_SHIFT);
0309 memcpy(wqe, req, req_len);
0310
0311 cmdq->sq.pi += cmdq->sq.wqebb_cnt;
0312 hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) |
0313 FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK,
0314 comp_wait->ctx_id) |
0315 FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1);
0316 *wqe = cpu_to_le64(hdr);
0317
0318 kick_cmdq_db(cmdq);
0319 }
0320
0321 static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq)
0322 {
0323 struct erdma_comp_wait *comp_wait;
0324 u32 hdr0, sqe_idx;
0325 __be32 *cqe;
0326 u16 ctx_id;
0327 u64 *sqe;
0328 int i;
0329
0330 cqe = get_next_valid_cmdq_cqe(cmdq);
0331 if (!cqe)
0332 return -EAGAIN;
0333
0334 cmdq->cq.ci++;
0335
0336 dma_rmb();
0337 hdr0 = __be32_to_cpu(*cqe);
0338 sqe_idx = __be32_to_cpu(*(cqe + 1));
0339
0340 sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth,
0341 SQEBB_SHIFT);
0342 ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe);
0343 comp_wait = &cmdq->wait_pool[ctx_id];
0344 if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED)
0345 return -EIO;
0346
0347 comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED;
0348 comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0);
0349 cmdq->sq.ci += cmdq->sq.wqebb_cnt;
0350
0351 for (i = 0; i < 4; i++)
0352 comp_wait->comp_data[i] = __be32_to_cpu(*(cqe + 2 + i));
0353
0354 if (cmdq->use_event)
0355 complete(&comp_wait->wait_event);
0356
0357 return 0;
0358 }
0359
0360 static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq)
0361 {
0362 unsigned long flags;
0363 u16 comp_num;
0364
0365 spin_lock_irqsave(&cmdq->cq.lock, flags);
0366
0367
0368
0369
0370 for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++)
0371 if (erdma_poll_single_cmd_completion(cmdq))
0372 break;
0373
0374 if (comp_num && cmdq->use_event)
0375 arm_cmdq_cq(cmdq);
0376
0377 spin_unlock_irqrestore(&cmdq->cq.lock, flags);
0378 }
0379
0380 void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
0381 {
0382 int got_event = 0;
0383
0384 if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) ||
0385 !cmdq->use_event)
0386 return;
0387
0388 while (get_next_valid_eqe(&cmdq->eq)) {
0389 cmdq->eq.ci++;
0390 got_event++;
0391 }
0392
0393 if (got_event) {
0394 cmdq->cq.cmdsn++;
0395 erdma_polling_cmd_completions(cmdq);
0396 }
0397
0398 notify_eq(&cmdq->eq);
0399 }
0400
0401 static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx,
0402 struct erdma_cmdq *cmdq, u32 timeout)
0403 {
0404 unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout);
0405
0406 while (1) {
0407 erdma_polling_cmd_completions(cmdq);
0408 if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED)
0409 break;
0410
0411 if (time_is_before_jiffies(comp_timeout))
0412 return -ETIME;
0413
0414 msleep(20);
0415 }
0416
0417 return 0;
0418 }
0419
0420 static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx,
0421 struct erdma_cmdq *cmdq, u32 timeout)
0422 {
0423 unsigned long flags = 0;
0424
0425 wait_for_completion_timeout(&comp_ctx->wait_event,
0426 msecs_to_jiffies(timeout));
0427
0428 if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) {
0429 spin_lock_irqsave(&cmdq->cq.lock, flags);
0430 comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT;
0431 spin_unlock_irqrestore(&cmdq->cq.lock, flags);
0432 return -ETIME;
0433 }
0434
0435 return 0;
0436 }
0437
0438 void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op)
0439 {
0440 *hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) |
0441 FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op);
0442 }
0443
0444 int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, u64 *req, u32 req_size,
0445 u64 *resp0, u64 *resp1)
0446 {
0447 struct erdma_comp_wait *comp_wait;
0448 int ret;
0449
0450 if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
0451 return -ENODEV;
0452
0453 down(&cmdq->credits);
0454
0455 comp_wait = get_comp_wait(cmdq);
0456 if (IS_ERR(comp_wait)) {
0457 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
0458 set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state);
0459 up(&cmdq->credits);
0460 return PTR_ERR(comp_wait);
0461 }
0462
0463 spin_lock(&cmdq->sq.lock);
0464 push_cmdq_sqe(cmdq, req, req_size, comp_wait);
0465 spin_unlock(&cmdq->sq.lock);
0466
0467 if (cmdq->use_event)
0468 ret = erdma_wait_cmd_completion(comp_wait, cmdq,
0469 ERDMA_CMDQ_TIMEOUT_MS);
0470 else
0471 ret = erdma_poll_cmd_completion(comp_wait, cmdq,
0472 ERDMA_CMDQ_TIMEOUT_MS);
0473
0474 if (ret) {
0475 set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state);
0476 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
0477 goto out;
0478 }
0479
0480 if (comp_wait->comp_status)
0481 ret = -EIO;
0482
0483 if (resp0 && resp1) {
0484 *resp0 = *((u64 *)&comp_wait->comp_data[0]);
0485 *resp1 = *((u64 *)&comp_wait->comp_data[2]);
0486 }
0487 put_comp_wait(cmdq, comp_wait);
0488
0489 out:
0490 up(&cmdq->credits);
0491
0492 return ret;
0493 }