Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
0003 #include <linux/io-64-nonatomic-lo-hi.h>
0004 #include <linux/moduleparam.h>
0005 #include <linux/module.h>
0006 #include <linux/delay.h>
0007 #include <linux/sizes.h>
0008 #include <linux/mutex.h>
0009 #include <linux/list.h>
0010 #include <linux/pci.h>
0011 #include <linux/pci-doe.h>
0012 #include <linux/io.h>
0013 #include "cxlmem.h"
0014 #include "cxlpci.h"
0015 #include "cxl.h"
0016 
0017 /**
0018  * DOC: cxl pci
0019  *
0020  * This implements the PCI exclusive functionality for a CXL device as it is
0021  * defined by the Compute Express Link specification. CXL devices may surface
0022  * certain functionality even if it isn't CXL enabled. While this driver is
0023  * focused around the PCI specific aspects of a CXL device, it binds to the
0024  * specific CXL memory device class code, and therefore the implementation of
0025  * cxl_pci is focused around CXL memory devices.
0026  *
0027  * The driver has several responsibilities, mainly:
0028  *  - Create the memX device and register on the CXL bus.
0029  *  - Enumerate device's register interface and map them.
0030  *  - Registers nvdimm bridge device with cxl_core.
0031  *  - Registers a CXL mailbox with cxl_core.
0032  */
0033 
0034 #define cxl_doorbell_busy(cxlds)                                                \
0035     (readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) &                  \
0036      CXLDEV_MBOX_CTRL_DOORBELL)
0037 
0038 /* CXL 2.0 - 8.2.8.4 */
0039 #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
0040 
0041 /*
0042  * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to
0043  * dictate how long to wait for the mailbox to become ready. The new
0044  * field allows the device to tell software the amount of time to wait
0045  * before mailbox ready. This field per the spec theoretically allows
0046  * for up to 255 seconds. 255 seconds is unreasonably long, its longer
0047  * than the maximum SATA port link recovery wait. Default to 60 seconds
0048  * until someone builds a CXL device that needs more time in practice.
0049  */
0050 static unsigned short mbox_ready_timeout = 60;
0051 module_param(mbox_ready_timeout, ushort, 0644);
0052 MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready");
0053 
0054 static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
0055 {
0056     const unsigned long start = jiffies;
0057     unsigned long end = start;
0058 
0059     while (cxl_doorbell_busy(cxlds)) {
0060         end = jiffies;
0061 
0062         if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) {
0063             /* Check again in case preempted before timeout test */
0064             if (!cxl_doorbell_busy(cxlds))
0065                 break;
0066             return -ETIMEDOUT;
0067         }
0068         cpu_relax();
0069     }
0070 
0071     dev_dbg(cxlds->dev, "Doorbell wait took %dms",
0072         jiffies_to_msecs(end) - jiffies_to_msecs(start));
0073     return 0;
0074 }
0075 
0076 #define cxl_err(dev, status, msg)                                        \
0077     dev_err_ratelimited(dev, msg ", device state %s%s\n",                  \
0078                 status & CXLMDEV_DEV_FATAL ? " fatal" : "",        \
0079                 status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
0080 
0081 #define cxl_cmd_err(dev, cmd, status, msg)                               \
0082     dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n",    \
0083                 (cmd)->opcode,                                     \
0084                 status & CXLMDEV_DEV_FATAL ? " fatal" : "",        \
0085                 status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
0086 
0087 /**
0088  * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
0089  * @cxlds: The device state to communicate with.
0090  * @mbox_cmd: Command to send to the memory device.
0091  *
0092  * Context: Any context. Expects mbox_mutex to be held.
0093  * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success.
0094  *         Caller should check the return code in @mbox_cmd to make sure it
0095  *         succeeded.
0096  *
0097  * This is a generic form of the CXL mailbox send command thus only using the
0098  * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory
0099  * devices, and perhaps other types of CXL devices may have further information
0100  * available upon error conditions. Driver facilities wishing to send mailbox
0101  * commands should use the wrapper command.
0102  *
0103  * The CXL spec allows for up to two mailboxes. The intention is for the primary
0104  * mailbox to be OS controlled and the secondary mailbox to be used by system
0105  * firmware. This allows the OS and firmware to communicate with the device and
0106  * not need to coordinate with each other. The driver only uses the primary
0107  * mailbox.
0108  */
0109 static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
0110                    struct cxl_mbox_cmd *mbox_cmd)
0111 {
0112     void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
0113     struct device *dev = cxlds->dev;
0114     u64 cmd_reg, status_reg;
0115     size_t out_len;
0116     int rc;
0117 
0118     lockdep_assert_held(&cxlds->mbox_mutex);
0119 
0120     /*
0121      * Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
0122      *   1. Caller reads MB Control Register to verify doorbell is clear
0123      *   2. Caller writes Command Register
0124      *   3. Caller writes Command Payload Registers if input payload is non-empty
0125      *   4. Caller writes MB Control Register to set doorbell
0126      *   5. Caller either polls for doorbell to be clear or waits for interrupt if configured
0127      *   6. Caller reads MB Status Register to fetch Return code
0128      *   7. If command successful, Caller reads Command Register to get Payload Length
0129      *   8. If output payload is non-empty, host reads Command Payload Registers
0130      *
0131      * Hardware is free to do whatever it wants before the doorbell is rung,
0132      * and isn't allowed to change anything after it clears the doorbell. As
0133      * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can
0134      * also happen in any order (though some orders might not make sense).
0135      */
0136 
0137     /* #1 */
0138     if (cxl_doorbell_busy(cxlds)) {
0139         u64 md_status =
0140             readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
0141 
0142         cxl_cmd_err(cxlds->dev, mbox_cmd, md_status,
0143                 "mailbox queue busy");
0144         return -EBUSY;
0145     }
0146 
0147     cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
0148                  mbox_cmd->opcode);
0149     if (mbox_cmd->size_in) {
0150         if (WARN_ON(!mbox_cmd->payload_in))
0151             return -EINVAL;
0152 
0153         cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK,
0154                       mbox_cmd->size_in);
0155         memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in);
0156     }
0157 
0158     /* #2, #3 */
0159     writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
0160 
0161     /* #4 */
0162     dev_dbg(dev, "Sending command\n");
0163     writel(CXLDEV_MBOX_CTRL_DOORBELL,
0164            cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
0165 
0166     /* #5 */
0167     rc = cxl_pci_mbox_wait_for_doorbell(cxlds);
0168     if (rc == -ETIMEDOUT) {
0169         u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
0170 
0171         cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout");
0172         return rc;
0173     }
0174 
0175     /* #6 */
0176     status_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
0177     mbox_cmd->return_code =
0178         FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
0179 
0180     if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) {
0181         dev_dbg(dev, "Mailbox operation had an error: %s\n",
0182             cxl_mbox_cmd_rc2str(mbox_cmd));
0183         return 0; /* completed but caller must check return_code */
0184     }
0185 
0186     /* #7 */
0187     cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
0188     out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
0189 
0190     /* #8 */
0191     if (out_len && mbox_cmd->payload_out) {
0192         /*
0193          * Sanitize the copy. If hardware misbehaves, out_len per the
0194          * spec can actually be greater than the max allowed size (21
0195          * bits available but spec defined 1M max). The caller also may
0196          * have requested less data than the hardware supplied even
0197          * within spec.
0198          */
0199         size_t n = min3(mbox_cmd->size_out, cxlds->payload_size, out_len);
0200 
0201         memcpy_fromio(mbox_cmd->payload_out, payload, n);
0202         mbox_cmd->size_out = n;
0203     } else {
0204         mbox_cmd->size_out = 0;
0205     }
0206 
0207     return 0;
0208 }
0209 
0210 static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
0211 {
0212     int rc;
0213 
0214     mutex_lock_io(&cxlds->mbox_mutex);
0215     rc = __cxl_pci_mbox_send_cmd(cxlds, cmd);
0216     mutex_unlock(&cxlds->mbox_mutex);
0217 
0218     return rc;
0219 }
0220 
0221 static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
0222 {
0223     const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
0224     unsigned long timeout;
0225     u64 md_status;
0226 
0227     timeout = jiffies + mbox_ready_timeout * HZ;
0228     do {
0229         md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
0230         if (md_status & CXLMDEV_MBOX_IF_READY)
0231             break;
0232         if (msleep_interruptible(100))
0233             break;
0234     } while (!time_after(jiffies, timeout));
0235 
0236     if (!(md_status & CXLMDEV_MBOX_IF_READY)) {
0237         cxl_err(cxlds->dev, md_status,
0238             "timeout awaiting mailbox ready");
0239         return -ETIMEDOUT;
0240     }
0241 
0242     /*
0243      * A command may be in flight from a previous driver instance,
0244      * think kexec, do one doorbell wait so that
0245      * __cxl_pci_mbox_send_cmd() can assume that it is the only
0246      * source for future doorbell busy events.
0247      */
0248     if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) {
0249         cxl_err(cxlds->dev, md_status, "timeout awaiting mailbox idle");
0250         return -ETIMEDOUT;
0251     }
0252 
0253     cxlds->mbox_send = cxl_pci_mbox_send;
0254     cxlds->payload_size =
0255         1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
0256 
0257     /*
0258      * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register
0259      *
0260      * If the size is too small, mandatory commands will not work and so
0261      * there's no point in going forward. If the size is too large, there's
0262      * no harm is soft limiting it.
0263      */
0264     cxlds->payload_size = min_t(size_t, cxlds->payload_size, SZ_1M);
0265     if (cxlds->payload_size < 256) {
0266         dev_err(cxlds->dev, "Mailbox is too small (%zub)",
0267             cxlds->payload_size);
0268         return -ENXIO;
0269     }
0270 
0271     dev_dbg(cxlds->dev, "Mailbox payload sized %zu",
0272         cxlds->payload_size);
0273 
0274     return 0;
0275 }
0276 
0277 static int cxl_map_regblock(struct pci_dev *pdev, struct cxl_register_map *map)
0278 {
0279     void __iomem *addr;
0280     int bar = map->barno;
0281     struct device *dev = &pdev->dev;
0282     resource_size_t offset = map->block_offset;
0283 
0284     /* Basic sanity check that BAR is big enough */
0285     if (pci_resource_len(pdev, bar) < offset) {
0286         dev_err(dev, "BAR%d: %pr: too small (offset: %pa)\n", bar,
0287             &pdev->resource[bar], &offset);
0288         return -ENXIO;
0289     }
0290 
0291     addr = pci_iomap(pdev, bar, 0);
0292     if (!addr) {
0293         dev_err(dev, "failed to map registers\n");
0294         return -ENOMEM;
0295     }
0296 
0297     dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %pa\n",
0298         bar, &offset);
0299 
0300     map->base = addr + map->block_offset;
0301     return 0;
0302 }
0303 
0304 static void cxl_unmap_regblock(struct pci_dev *pdev,
0305                    struct cxl_register_map *map)
0306 {
0307     pci_iounmap(pdev, map->base - map->block_offset);
0308     map->base = NULL;
0309 }
0310 
0311 static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map)
0312 {
0313     struct cxl_component_reg_map *comp_map;
0314     struct cxl_device_reg_map *dev_map;
0315     struct device *dev = &pdev->dev;
0316     void __iomem *base = map->base;
0317 
0318     switch (map->reg_type) {
0319     case CXL_REGLOC_RBI_COMPONENT:
0320         comp_map = &map->component_map;
0321         cxl_probe_component_regs(dev, base, comp_map);
0322         if (!comp_map->hdm_decoder.valid) {
0323             dev_err(dev, "HDM decoder registers not found\n");
0324             return -ENXIO;
0325         }
0326 
0327         dev_dbg(dev, "Set up component registers\n");
0328         break;
0329     case CXL_REGLOC_RBI_MEMDEV:
0330         dev_map = &map->device_map;
0331         cxl_probe_device_regs(dev, base, dev_map);
0332         if (!dev_map->status.valid || !dev_map->mbox.valid ||
0333             !dev_map->memdev.valid) {
0334             dev_err(dev, "registers not found: %s%s%s\n",
0335                 !dev_map->status.valid ? "status " : "",
0336                 !dev_map->mbox.valid ? "mbox " : "",
0337                 !dev_map->memdev.valid ? "memdev " : "");
0338             return -ENXIO;
0339         }
0340 
0341         dev_dbg(dev, "Probing device registers...\n");
0342         break;
0343     default:
0344         break;
0345     }
0346 
0347     return 0;
0348 }
0349 
0350 static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *map)
0351 {
0352     struct device *dev = cxlds->dev;
0353     struct pci_dev *pdev = to_pci_dev(dev);
0354 
0355     switch (map->reg_type) {
0356     case CXL_REGLOC_RBI_COMPONENT:
0357         cxl_map_component_regs(pdev, &cxlds->regs.component, map);
0358         dev_dbg(dev, "Mapping component registers...\n");
0359         break;
0360     case CXL_REGLOC_RBI_MEMDEV:
0361         cxl_map_device_regs(pdev, &cxlds->regs.device_regs, map);
0362         dev_dbg(dev, "Probing device registers...\n");
0363         break;
0364     default:
0365         break;
0366     }
0367 
0368     return 0;
0369 }
0370 
0371 static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
0372               struct cxl_register_map *map)
0373 {
0374     int rc;
0375 
0376     rc = cxl_find_regblock(pdev, type, map);
0377     if (rc)
0378         return rc;
0379 
0380     rc = cxl_map_regblock(pdev, map);
0381     if (rc)
0382         return rc;
0383 
0384     rc = cxl_probe_regs(pdev, map);
0385     cxl_unmap_regblock(pdev, map);
0386 
0387     return rc;
0388 }
0389 
0390 static void cxl_pci_destroy_doe(void *mbs)
0391 {
0392     xa_destroy(mbs);
0393 }
0394 
0395 static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
0396 {
0397     struct device *dev = cxlds->dev;
0398     struct pci_dev *pdev = to_pci_dev(dev);
0399     u16 off = 0;
0400 
0401     xa_init(&cxlds->doe_mbs);
0402     if (devm_add_action(&pdev->dev, cxl_pci_destroy_doe, &cxlds->doe_mbs)) {
0403         dev_err(dev, "Failed to create XArray for DOE's\n");
0404         return;
0405     }
0406 
0407     /*
0408      * Mailbox creation is best effort.  Higher layers must determine if
0409      * the lack of a mailbox for their protocol is a device failure or not.
0410      */
0411     pci_doe_for_each_off(pdev, off) {
0412         struct pci_doe_mb *doe_mb;
0413 
0414         doe_mb = pcim_doe_create_mb(pdev, off);
0415         if (IS_ERR(doe_mb)) {
0416             dev_err(dev, "Failed to create MB object for MB @ %x\n",
0417                 off);
0418             continue;
0419         }
0420 
0421         if (xa_insert(&cxlds->doe_mbs, off, doe_mb, GFP_KERNEL)) {
0422             dev_err(dev, "xa_insert failed to insert MB @ %x\n",
0423                 off);
0424             continue;
0425         }
0426 
0427         dev_dbg(dev, "Created DOE mailbox @%x\n", off);
0428     }
0429 }
0430 
0431 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
0432 {
0433     struct cxl_register_map map;
0434     struct cxl_memdev *cxlmd;
0435     struct cxl_dev_state *cxlds;
0436     int rc;
0437 
0438     /*
0439      * Double check the anonymous union trickery in struct cxl_regs
0440      * FIXME switch to struct_group()
0441      */
0442     BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) !=
0443              offsetof(struct cxl_regs, device_regs.memdev));
0444 
0445     rc = pcim_enable_device(pdev);
0446     if (rc)
0447         return rc;
0448 
0449     cxlds = cxl_dev_state_create(&pdev->dev);
0450     if (IS_ERR(cxlds))
0451         return PTR_ERR(cxlds);
0452 
0453     cxlds->serial = pci_get_dsn(pdev);
0454     cxlds->cxl_dvsec = pci_find_dvsec_capability(
0455         pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE);
0456     if (!cxlds->cxl_dvsec)
0457         dev_warn(&pdev->dev,
0458              "Device DVSEC not present, skip CXL.mem init\n");
0459 
0460     rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map);
0461     if (rc)
0462         return rc;
0463 
0464     rc = cxl_map_regs(cxlds, &map);
0465     if (rc)
0466         return rc;
0467 
0468     /*
0469      * If the component registers can't be found, the cxl_pci driver may
0470      * still be useful for management functions so don't return an error.
0471      */
0472     cxlds->component_reg_phys = CXL_RESOURCE_NONE;
0473     rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
0474     if (rc)
0475         dev_warn(&pdev->dev, "No component registers (%d)\n", rc);
0476 
0477     cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map);
0478 
0479     devm_cxl_pci_create_doe(cxlds);
0480 
0481     rc = cxl_pci_setup_mailbox(cxlds);
0482     if (rc)
0483         return rc;
0484 
0485     rc = cxl_enumerate_cmds(cxlds);
0486     if (rc)
0487         return rc;
0488 
0489     rc = cxl_dev_state_identify(cxlds);
0490     if (rc)
0491         return rc;
0492 
0493     rc = cxl_mem_create_range_info(cxlds);
0494     if (rc)
0495         return rc;
0496 
0497     cxlmd = devm_cxl_add_memdev(cxlds);
0498     if (IS_ERR(cxlmd))
0499         return PTR_ERR(cxlmd);
0500 
0501     if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM))
0502         rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
0503 
0504     return rc;
0505 }
0506 
0507 static const struct pci_device_id cxl_mem_pci_tbl[] = {
0508     /* PCI class code for CXL.mem Type-3 Devices */
0509     { PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)},
0510     { /* terminate list */ },
0511 };
0512 MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
0513 
0514 static struct pci_driver cxl_pci_driver = {
0515     .name           = KBUILD_MODNAME,
0516     .id_table       = cxl_mem_pci_tbl,
0517     .probe          = cxl_pci_probe,
0518     .driver = {
0519         .probe_type = PROBE_PREFER_ASYNCHRONOUS,
0520     },
0521 };
0522 
0523 MODULE_LICENSE("GPL v2");
0524 module_pci_driver(cxl_pci_driver);
0525 MODULE_IMPORT_NS(CXL);