Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
0003 #include <linux/init.h>
0004 #include <linux/kernel.h>
0005 #include <linux/module.h>
0006 #include <linux/pci.h>
0007 #include <uapi/linux/idxd.h>
0008 #include "idxd.h"
0009 #include "registers.h"
0010 
0011 static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
0012 {
0013     struct idxd_desc *desc;
0014     struct idxd_device *idxd = wq->idxd;
0015 
0016     desc = wq->descs[idx];
0017     memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
0018     memset(desc->completion, 0, idxd->data->compl_size);
0019     desc->cpu = cpu;
0020 
0021     if (device_pasid_enabled(idxd))
0022         desc->hw->pasid = idxd->pasid;
0023 
0024     return desc;
0025 }
0026 
0027 struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
0028 {
0029     int cpu, idx;
0030     struct idxd_device *idxd = wq->idxd;
0031     DEFINE_SBQ_WAIT(wait);
0032     struct sbq_wait_state *ws;
0033     struct sbitmap_queue *sbq;
0034 
0035     if (idxd->state != IDXD_DEV_ENABLED)
0036         return ERR_PTR(-EIO);
0037 
0038     sbq = &wq->sbq;
0039     idx = sbitmap_queue_get(sbq, &cpu);
0040     if (idx < 0) {
0041         if (optype == IDXD_OP_NONBLOCK)
0042             return ERR_PTR(-EAGAIN);
0043     } else {
0044         return __get_desc(wq, idx, cpu);
0045     }
0046 
0047     ws = &sbq->ws[0];
0048     for (;;) {
0049         sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE);
0050         if (signal_pending_state(TASK_INTERRUPTIBLE, current))
0051             break;
0052         idx = sbitmap_queue_get(sbq, &cpu);
0053         if (idx >= 0)
0054             break;
0055         schedule();
0056     }
0057 
0058     sbitmap_finish_wait(sbq, ws, &wait);
0059     if (idx < 0)
0060         return ERR_PTR(-EAGAIN);
0061 
0062     return __get_desc(wq, idx, cpu);
0063 }
0064 
0065 void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
0066 {
0067     int cpu = desc->cpu;
0068 
0069     desc->cpu = -1;
0070     sbitmap_queue_clear(&wq->sbq, desc->id, cpu);
0071 }
0072 
0073 static struct idxd_desc *list_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
0074                      struct idxd_desc *desc)
0075 {
0076     struct idxd_desc *d, *n;
0077 
0078     lockdep_assert_held(&ie->list_lock);
0079     list_for_each_entry_safe(d, n, &ie->work_list, list) {
0080         if (d == desc) {
0081             list_del(&d->list);
0082             return d;
0083         }
0084     }
0085 
0086     /*
0087      * At this point, the desc needs to be aborted is held by the completion
0088      * handler where it has taken it off the pending list but has not added to the
0089      * work list. It will be cleaned up by the interrupt handler when it sees the
0090      * IDXD_COMP_DESC_ABORT for completion status.
0091      */
0092     return NULL;
0093 }
0094 
0095 static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
0096                  struct idxd_desc *desc)
0097 {
0098     struct idxd_desc *d, *t, *found = NULL;
0099     struct llist_node *head;
0100     LIST_HEAD(flist);
0101 
0102     desc->completion->status = IDXD_COMP_DESC_ABORT;
0103     /*
0104      * Grab the list lock so it will block the irq thread handler. This allows the
0105      * abort code to locate the descriptor need to be aborted.
0106      */
0107     spin_lock(&ie->list_lock);
0108     head = llist_del_all(&ie->pending_llist);
0109     if (head) {
0110         llist_for_each_entry_safe(d, t, head, llnode) {
0111             if (d == desc) {
0112                 found = desc;
0113                 continue;
0114             }
0115 
0116             if (d->completion->status)
0117                 list_add_tail(&d->list, &flist);
0118             else
0119                 list_add_tail(&d->list, &ie->work_list);
0120         }
0121     }
0122 
0123     if (!found)
0124         found = list_abort_desc(wq, ie, desc);
0125     spin_unlock(&ie->list_lock);
0126 
0127     if (found)
0128         idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false);
0129 
0130     /*
0131      * completing the descriptor will return desc to allocator and
0132      * the desc can be acquired by a different process and the
0133      * desc->list can be modified.  Delete desc from list so the
0134      * list trasversing does not get corrupted by the other process.
0135      */
0136     list_for_each_entry_safe(d, t, &flist, list) {
0137         list_del_init(&d->list);
0138         idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true);
0139     }
0140 }
0141 
0142 /*
0143  * ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver
0144  * has better control of number of descriptors being submitted to a shared wq by limiting
0145  * the number of driver allocated descriptors to the wq size. However, when the swq is
0146  * exported to a guest kernel, it may be shared with multiple guest kernels. This means
0147  * the likelihood of getting busy returned on the swq when submitting goes significantly up.
0148  * Having a tunable retry mechanism allows the driver to keep trying for a bit before giving
0149  * up. The sysfs knob can be tuned by the system administrator.
0150  */
0151 int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
0152 {
0153     unsigned int retries = wq->enqcmds_retries;
0154     int rc;
0155 
0156     do {
0157         rc = enqcmds(portal, desc);
0158         if (rc == 0)
0159             break;
0160         cpu_relax();
0161     } while (retries--);
0162 
0163     return rc;
0164 }
0165 
0166 int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
0167 {
0168     struct idxd_device *idxd = wq->idxd;
0169     struct idxd_irq_entry *ie = NULL;
0170     u32 desc_flags = desc->hw->flags;
0171     void __iomem *portal;
0172     int rc;
0173 
0174     if (idxd->state != IDXD_DEV_ENABLED)
0175         return -EIO;
0176 
0177     if (!percpu_ref_tryget_live(&wq->wq_active)) {
0178         wait_for_completion(&wq->wq_resurrect);
0179         if (!percpu_ref_tryget_live(&wq->wq_active))
0180             return -ENXIO;
0181     }
0182 
0183     portal = idxd_wq_portal_addr(wq);
0184 
0185     /*
0186      * The wmb() flushes writes to coherent DMA data before
0187      * possibly triggering a DMA read. The wmb() is necessary
0188      * even on UP because the recipient is a device.
0189      */
0190     wmb();
0191 
0192     /*
0193      * Pending the descriptor to the lockless list for the irq_entry
0194      * that we designated the descriptor to.
0195      */
0196     if (desc_flags & IDXD_OP_FLAG_RCI) {
0197         ie = &wq->ie;
0198         desc->hw->int_handle = ie->int_handle;
0199         llist_add(&desc->llnode, &ie->pending_llist);
0200     }
0201 
0202     if (wq_dedicated(wq)) {
0203         iosubmit_cmds512(portal, desc->hw, 1);
0204     } else {
0205         rc = idxd_enqcmds(wq, portal, desc->hw);
0206         if (rc < 0) {
0207             percpu_ref_put(&wq->wq_active);
0208             /* abort operation frees the descriptor */
0209             if (ie)
0210                 llist_abort_desc(wq, ie, desc);
0211             return rc;
0212         }
0213     }
0214 
0215     percpu_ref_put(&wq->wq_active);
0216     return 0;
0217 }