Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
0002 
0003 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
0004 /*          Kai Shen <kaishen@linux.alibaba.com> */
0005 /* Copyright (c) 2020-2022, Alibaba Group. */
0006 
0007 #include <linux/kernel.h>
0008 #include <linux/pci.h>
0009 #include <linux/types.h>
0010 
0011 #include "erdma.h"
0012 #include "erdma_hw.h"
0013 #include "erdma_verbs.h"
0014 
0015 static void arm_cmdq_cq(struct erdma_cmdq *cmdq)
0016 {
0017     struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
0018     u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) |
0019               FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
0020               FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) |
0021               FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn);
0022 
0023     *cmdq->cq.db_record = db_data;
0024     writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG);
0025 
0026     atomic64_inc(&cmdq->cq.armed_num);
0027 }
0028 
0029 static void kick_cmdq_db(struct erdma_cmdq *cmdq)
0030 {
0031     struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
0032     u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi);
0033 
0034     *cmdq->sq.db_record = db_data;
0035     writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG);
0036 }
0037 
0038 static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq)
0039 {
0040     int comp_idx;
0041 
0042     spin_lock(&cmdq->lock);
0043     comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap,
0044                        cmdq->max_outstandings);
0045     if (comp_idx == cmdq->max_outstandings) {
0046         spin_unlock(&cmdq->lock);
0047         return ERR_PTR(-ENOMEM);
0048     }
0049 
0050     __set_bit(comp_idx, cmdq->comp_wait_bitmap);
0051     spin_unlock(&cmdq->lock);
0052 
0053     return &cmdq->wait_pool[comp_idx];
0054 }
0055 
0056 static void put_comp_wait(struct erdma_cmdq *cmdq,
0057               struct erdma_comp_wait *comp_wait)
0058 {
0059     int used;
0060 
0061     cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT;
0062     spin_lock(&cmdq->lock);
0063     used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap);
0064     spin_unlock(&cmdq->lock);
0065 
0066     WARN_ON(!used);
0067 }
0068 
0069 static int erdma_cmdq_wait_res_init(struct erdma_dev *dev,
0070                     struct erdma_cmdq *cmdq)
0071 {
0072     int i;
0073 
0074     cmdq->wait_pool =
0075         devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings,
0076                  sizeof(struct erdma_comp_wait), GFP_KERNEL);
0077     if (!cmdq->wait_pool)
0078         return -ENOMEM;
0079 
0080     spin_lock_init(&cmdq->lock);
0081     cmdq->comp_wait_bitmap = devm_bitmap_zalloc(
0082         &dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL);
0083     if (!cmdq->comp_wait_bitmap)
0084         return -ENOMEM;
0085 
0086     for (i = 0; i < cmdq->max_outstandings; i++) {
0087         init_completion(&cmdq->wait_pool[i].wait_event);
0088         cmdq->wait_pool[i].ctx_id = i;
0089     }
0090 
0091     return 0;
0092 }
0093 
0094 static int erdma_cmdq_sq_init(struct erdma_dev *dev)
0095 {
0096     struct erdma_cmdq *cmdq = &dev->cmdq;
0097     struct erdma_cmdq_sq *sq = &cmdq->sq;
0098     u32 buf_size;
0099 
0100     sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE);
0101     sq->depth = cmdq->max_outstandings * sq->wqebb_cnt;
0102 
0103     buf_size = sq->depth << SQEBB_SHIFT;
0104 
0105     sq->qbuf =
0106         dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
0107                    &sq->qbuf_dma_addr, GFP_KERNEL);
0108     if (!sq->qbuf)
0109         return -ENOMEM;
0110 
0111     sq->db_record = (u64 *)(sq->qbuf + buf_size);
0112 
0113     spin_lock_init(&sq->lock);
0114 
0115     erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG,
0116               upper_32_bits(sq->qbuf_dma_addr));
0117     erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG,
0118               lower_32_bits(sq->qbuf_dma_addr));
0119     erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth);
0120     erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG,
0121               sq->qbuf_dma_addr + buf_size);
0122 
0123     return 0;
0124 }
0125 
0126 static int erdma_cmdq_cq_init(struct erdma_dev *dev)
0127 {
0128     struct erdma_cmdq *cmdq = &dev->cmdq;
0129     struct erdma_cmdq_cq *cq = &cmdq->cq;
0130     u32 buf_size;
0131 
0132     cq->depth = cmdq->sq.depth;
0133     buf_size = cq->depth << CQE_SHIFT;
0134 
0135     cq->qbuf =
0136         dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
0137                    &cq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
0138     if (!cq->qbuf)
0139         return -ENOMEM;
0140 
0141     spin_lock_init(&cq->lock);
0142 
0143     cq->db_record = (u64 *)(cq->qbuf + buf_size);
0144 
0145     atomic64_set(&cq->armed_num, 0);
0146 
0147     erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG,
0148               upper_32_bits(cq->qbuf_dma_addr));
0149     erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG,
0150               lower_32_bits(cq->qbuf_dma_addr));
0151     erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG,
0152               cq->qbuf_dma_addr + buf_size);
0153 
0154     return 0;
0155 }
0156 
0157 static int erdma_cmdq_eq_init(struct erdma_dev *dev)
0158 {
0159     struct erdma_cmdq *cmdq = &dev->cmdq;
0160     struct erdma_eq *eq = &cmdq->eq;
0161     u32 buf_size;
0162 
0163     eq->depth = cmdq->max_outstandings;
0164     buf_size = eq->depth << EQE_SHIFT;
0165 
0166     eq->qbuf =
0167         dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
0168                    &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
0169     if (!eq->qbuf)
0170         return -ENOMEM;
0171 
0172     spin_lock_init(&eq->lock);
0173     atomic64_set(&eq->event_num, 0);
0174 
0175     eq->db_addr =
0176         (u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG);
0177     eq->db_record = (u64 *)(eq->qbuf + buf_size);
0178 
0179     erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG,
0180               upper_32_bits(eq->qbuf_dma_addr));
0181     erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG,
0182               lower_32_bits(eq->qbuf_dma_addr));
0183     erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth);
0184     erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG,
0185               eq->qbuf_dma_addr + buf_size);
0186 
0187     return 0;
0188 }
0189 
0190 int erdma_cmdq_init(struct erdma_dev *dev)
0191 {
0192     int err, i;
0193     struct erdma_cmdq *cmdq = &dev->cmdq;
0194     u32 sts, ctrl;
0195 
0196     cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING;
0197     cmdq->use_event = false;
0198 
0199     sema_init(&cmdq->credits, cmdq->max_outstandings);
0200 
0201     err = erdma_cmdq_wait_res_init(dev, cmdq);
0202     if (err)
0203         return err;
0204 
0205     err = erdma_cmdq_sq_init(dev);
0206     if (err)
0207         return err;
0208 
0209     err = erdma_cmdq_cq_init(dev);
0210     if (err)
0211         goto err_destroy_sq;
0212 
0213     err = erdma_cmdq_eq_init(dev);
0214     if (err)
0215         goto err_destroy_cq;
0216 
0217     ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1);
0218     erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
0219 
0220     for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) {
0221         sts = erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
0222                          ERDMA_REG_DEV_ST_INIT_DONE_MASK);
0223         if (sts)
0224             break;
0225 
0226         msleep(ERDMA_REG_ACCESS_WAIT_MS);
0227     }
0228 
0229     if (i == ERDMA_WAIT_DEV_DONE_CNT) {
0230         dev_err(&dev->pdev->dev, "wait init done failed.\n");
0231         err = -ETIMEDOUT;
0232         goto err_destroy_eq;
0233     }
0234 
0235     set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
0236 
0237     return 0;
0238 
0239 err_destroy_eq:
0240     dma_free_coherent(&dev->pdev->dev,
0241               (cmdq->eq.depth << EQE_SHIFT) +
0242                   ERDMA_EXTRA_BUFFER_SIZE,
0243               cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
0244 
0245 err_destroy_cq:
0246     dma_free_coherent(&dev->pdev->dev,
0247               (cmdq->cq.depth << CQE_SHIFT) +
0248                   ERDMA_EXTRA_BUFFER_SIZE,
0249               cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
0250 
0251 err_destroy_sq:
0252     dma_free_coherent(&dev->pdev->dev,
0253               (cmdq->sq.depth << SQEBB_SHIFT) +
0254                   ERDMA_EXTRA_BUFFER_SIZE,
0255               cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
0256 
0257     return err;
0258 }
0259 
0260 void erdma_finish_cmdq_init(struct erdma_dev *dev)
0261 {
0262     /* after device init successfully, change cmdq to event mode. */
0263     dev->cmdq.use_event = true;
0264     arm_cmdq_cq(&dev->cmdq);
0265 }
0266 
0267 void erdma_cmdq_destroy(struct erdma_dev *dev)
0268 {
0269     struct erdma_cmdq *cmdq = &dev->cmdq;
0270 
0271     clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
0272 
0273     dma_free_coherent(&dev->pdev->dev,
0274               (cmdq->eq.depth << EQE_SHIFT) +
0275                   ERDMA_EXTRA_BUFFER_SIZE,
0276               cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
0277     dma_free_coherent(&dev->pdev->dev,
0278               (cmdq->sq.depth << SQEBB_SHIFT) +
0279                   ERDMA_EXTRA_BUFFER_SIZE,
0280               cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
0281     dma_free_coherent(&dev->pdev->dev,
0282               (cmdq->cq.depth << CQE_SHIFT) +
0283                   ERDMA_EXTRA_BUFFER_SIZE,
0284               cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
0285 }
0286 
0287 static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq)
0288 {
0289     __be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci,
0290                       cmdq->cq.depth, CQE_SHIFT);
0291     u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
0292                   __be32_to_cpu(READ_ONCE(*cqe)));
0293 
0294     return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL;
0295 }
0296 
0297 static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len,
0298               struct erdma_comp_wait *comp_wait)
0299 {
0300     __le64 *wqe;
0301     u64 hdr = *req;
0302 
0303     comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED;
0304     reinit_completion(&comp_wait->wait_event);
0305     comp_wait->sq_pi = cmdq->sq.pi;
0306 
0307     wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth,
0308                   SQEBB_SHIFT);
0309     memcpy(wqe, req, req_len);
0310 
0311     cmdq->sq.pi += cmdq->sq.wqebb_cnt;
0312     hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) |
0313            FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK,
0314               comp_wait->ctx_id) |
0315            FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1);
0316     *wqe = cpu_to_le64(hdr);
0317 
0318     kick_cmdq_db(cmdq);
0319 }
0320 
0321 static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq)
0322 {
0323     struct erdma_comp_wait *comp_wait;
0324     u32 hdr0, sqe_idx;
0325     __be32 *cqe;
0326     u16 ctx_id;
0327     u64 *sqe;
0328     int i;
0329 
0330     cqe = get_next_valid_cmdq_cqe(cmdq);
0331     if (!cqe)
0332         return -EAGAIN;
0333 
0334     cmdq->cq.ci++;
0335 
0336     dma_rmb();
0337     hdr0 = __be32_to_cpu(*cqe);
0338     sqe_idx = __be32_to_cpu(*(cqe + 1));
0339 
0340     sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth,
0341                   SQEBB_SHIFT);
0342     ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe);
0343     comp_wait = &cmdq->wait_pool[ctx_id];
0344     if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED)
0345         return -EIO;
0346 
0347     comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED;
0348     comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0);
0349     cmdq->sq.ci += cmdq->sq.wqebb_cnt;
0350 
0351     for (i = 0; i < 4; i++)
0352         comp_wait->comp_data[i] = __be32_to_cpu(*(cqe + 2 + i));
0353 
0354     if (cmdq->use_event)
0355         complete(&comp_wait->wait_event);
0356 
0357     return 0;
0358 }
0359 
0360 static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq)
0361 {
0362     unsigned long flags;
0363     u16 comp_num;
0364 
0365     spin_lock_irqsave(&cmdq->cq.lock, flags);
0366 
0367     /* We must have less than # of max_outstandings
0368      * completions at one time.
0369      */
0370     for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++)
0371         if (erdma_poll_single_cmd_completion(cmdq))
0372             break;
0373 
0374     if (comp_num && cmdq->use_event)
0375         arm_cmdq_cq(cmdq);
0376 
0377     spin_unlock_irqrestore(&cmdq->cq.lock, flags);
0378 }
0379 
0380 void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
0381 {
0382     int got_event = 0;
0383 
0384     if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) ||
0385         !cmdq->use_event)
0386         return;
0387 
0388     while (get_next_valid_eqe(&cmdq->eq)) {
0389         cmdq->eq.ci++;
0390         got_event++;
0391     }
0392 
0393     if (got_event) {
0394         cmdq->cq.cmdsn++;
0395         erdma_polling_cmd_completions(cmdq);
0396     }
0397 
0398     notify_eq(&cmdq->eq);
0399 }
0400 
0401 static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx,
0402                      struct erdma_cmdq *cmdq, u32 timeout)
0403 {
0404     unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout);
0405 
0406     while (1) {
0407         erdma_polling_cmd_completions(cmdq);
0408         if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED)
0409             break;
0410 
0411         if (time_is_before_jiffies(comp_timeout))
0412             return -ETIME;
0413 
0414         msleep(20);
0415     }
0416 
0417     return 0;
0418 }
0419 
0420 static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx,
0421                      struct erdma_cmdq *cmdq, u32 timeout)
0422 {
0423     unsigned long flags = 0;
0424 
0425     wait_for_completion_timeout(&comp_ctx->wait_event,
0426                     msecs_to_jiffies(timeout));
0427 
0428     if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) {
0429         spin_lock_irqsave(&cmdq->cq.lock, flags);
0430         comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT;
0431         spin_unlock_irqrestore(&cmdq->cq.lock, flags);
0432         return -ETIME;
0433     }
0434 
0435     return 0;
0436 }
0437 
0438 void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op)
0439 {
0440     *hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) |
0441            FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op);
0442 }
0443 
0444 int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, u64 *req, u32 req_size,
0445             u64 *resp0, u64 *resp1)
0446 {
0447     struct erdma_comp_wait *comp_wait;
0448     int ret;
0449 
0450     if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
0451         return -ENODEV;
0452 
0453     down(&cmdq->credits);
0454 
0455     comp_wait = get_comp_wait(cmdq);
0456     if (IS_ERR(comp_wait)) {
0457         clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
0458         set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state);
0459         up(&cmdq->credits);
0460         return PTR_ERR(comp_wait);
0461     }
0462 
0463     spin_lock(&cmdq->sq.lock);
0464     push_cmdq_sqe(cmdq, req, req_size, comp_wait);
0465     spin_unlock(&cmdq->sq.lock);
0466 
0467     if (cmdq->use_event)
0468         ret = erdma_wait_cmd_completion(comp_wait, cmdq,
0469                         ERDMA_CMDQ_TIMEOUT_MS);
0470     else
0471         ret = erdma_poll_cmd_completion(comp_wait, cmdq,
0472                         ERDMA_CMDQ_TIMEOUT_MS);
0473 
0474     if (ret) {
0475         set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state);
0476         clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
0477         goto out;
0478     }
0479 
0480     if (comp_wait->comp_status)
0481         ret = -EIO;
0482 
0483     if (resp0 && resp1) {
0484         *resp0 = *((u64 *)&comp_wait->comp_data[0]);
0485         *resp1 = *((u64 *)&comp_wait->comp_data[2]);
0486     }
0487     put_comp_wait(cmdq, comp_wait);
0488 
0489 out:
0490     up(&cmdq->credits);
0491 
0492     return ret;
0493 }