Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0+
0002 // Copyright 2017 IBM Corp.
0003 #include <asm/pnv-ocxl.h>
0004 #include <asm/opal.h>
0005 #include <misc/ocxl-config.h>
0006 #include "pci.h"
0007 
0008 #define PNV_OCXL_TL_P9_RECV_CAP     0x000000000000000Full
0009 #define PNV_OCXL_ACTAG_MAX      64
0010 /* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */
0011 #define PNV_OCXL_PASID_BITS     15
0012 #define PNV_OCXL_PASID_MAX      ((1 << PNV_OCXL_PASID_BITS) - 1)
0013 
0014 #define AFU_PRESENT (1 << 31)
0015 #define AFU_INDEX_MASK 0x3F000000
0016 #define AFU_INDEX_SHIFT 24
0017 #define ACTAG_MASK 0xFFF
0018 
0019 
0020 struct actag_range {
0021     u16 start;
0022     u16 count;
0023 };
0024 
0025 struct npu_link {
0026     struct list_head list;
0027     int domain;
0028     int bus;
0029     int dev;
0030     u16 fn_desired_actags[8];
0031     struct actag_range fn_actags[8];
0032     bool assignment_done;
0033 };
0034 static struct list_head links_list = LIST_HEAD_INIT(links_list);
0035 static DEFINE_MUTEX(links_list_lock);
0036 
0037 
0038 /*
0039  * opencapi actags handling:
0040  *
0041  * When sending commands, the opencapi device references the memory
0042  * context it's targeting with an 'actag', which is really an alias
0043  * for a (BDF, pasid) combination. When it receives a command, the NPU
0044  * must do a lookup of the actag to identify the memory context. The
0045  * hardware supports a finite number of actags per link (64 for
0046  * POWER9).
0047  *
0048  * The device can carry multiple functions, and each function can have
0049  * multiple AFUs. Each AFU advertises in its config space the number
0050  * of desired actags. The host must configure in the config space of
0051  * the AFU how many actags the AFU is really allowed to use (which can
0052  * be less than what the AFU desires).
0053  *
0054  * When a PCI function is probed by the driver, it has no visibility
0055  * about the other PCI functions and how many actags they'd like,
0056  * which makes it impossible to distribute actags fairly among AFUs.
0057  *
0058  * Unfortunately, the only way to know how many actags a function
0059  * desires is by looking at the data for each AFU in the config space
0060  * and add them up. Similarly, the only way to know how many actags
0061  * all the functions of the physical device desire is by adding the
0062  * previously computed function counts. Then we can match that against
0063  * what the hardware supports.
0064  *
0065  * To get a comprehensive view, we use a 'pci fixup': at the end of
0066  * PCI enumeration, each function counts how many actags its AFUs
0067  * desire and we save it in a 'npu_link' structure, shared between all
0068  * the PCI functions of a same device. Therefore, when the first
0069  * function is probed by the driver, we can get an idea of the total
0070  * count of desired actags for the device, and assign the actags to
0071  * the AFUs, by pro-rating if needed.
0072  */
0073 
0074 static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos)
0075 {
0076     int vsec = pos;
0077     u16 vendor, id;
0078 
0079     while ((vsec = pci_find_next_ext_capability(dev, vsec,
0080                             OCXL_EXT_CAP_ID_DVSEC))) {
0081         pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
0082                 &vendor);
0083         pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
0084         if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
0085             return vsec;
0086     }
0087     return 0;
0088 }
0089 
0090 static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
0091 {
0092     int vsec = 0;
0093     u8 idx;
0094 
0095     while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID,
0096                        vsec))) {
0097         pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
0098                 &idx);
0099         if (idx == afu_idx)
0100             return vsec;
0101     }
0102     return 0;
0103 }
0104 
0105 static int get_max_afu_index(struct pci_dev *dev, int *afu_idx)
0106 {
0107     int pos;
0108     u32 val;
0109 
0110     pos = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM,
0111                     OCXL_DVSEC_FUNC_ID);
0112     if (!pos)
0113         return -ESRCH;
0114 
0115     pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
0116     if (val & AFU_PRESENT)
0117         *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT;
0118     else
0119         *afu_idx = -1;
0120     return 0;
0121 }
0122 
0123 static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag)
0124 {
0125     int pos;
0126     u16 actag_sup;
0127 
0128     pos = find_dvsec_afu_ctrl(dev, afu_idx);
0129     if (!pos)
0130         return -ESRCH;
0131 
0132     pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP,
0133             &actag_sup);
0134     *actag = actag_sup & ACTAG_MASK;
0135     return 0;
0136 }
0137 
0138 static struct npu_link *find_link(struct pci_dev *dev)
0139 {
0140     struct npu_link *link;
0141 
0142     list_for_each_entry(link, &links_list, list) {
0143         /* The functions of a device all share the same link */
0144         if (link->domain == pci_domain_nr(dev->bus) &&
0145             link->bus == dev->bus->number &&
0146             link->dev == PCI_SLOT(dev->devfn)) {
0147             return link;
0148         }
0149     }
0150 
0151     /* link doesn't exist yet. Allocate one */
0152     link = kzalloc(sizeof(struct npu_link), GFP_KERNEL);
0153     if (!link)
0154         return NULL;
0155     link->domain = pci_domain_nr(dev->bus);
0156     link->bus = dev->bus->number;
0157     link->dev = PCI_SLOT(dev->devfn);
0158     list_add(&link->list, &links_list);
0159     return link;
0160 }
0161 
0162 static void pnv_ocxl_fixup_actag(struct pci_dev *dev)
0163 {
0164     struct pci_controller *hose = pci_bus_to_host(dev->bus);
0165     struct pnv_phb *phb = hose->private_data;
0166     struct npu_link *link;
0167     int rc, afu_idx = -1, i, actag;
0168 
0169     if (!machine_is(powernv))
0170         return;
0171 
0172     if (phb->type != PNV_PHB_NPU_OCAPI)
0173         return;
0174 
0175     mutex_lock(&links_list_lock);
0176 
0177     link = find_link(dev);
0178     if (!link) {
0179         dev_warn(&dev->dev, "couldn't update actag information\n");
0180         mutex_unlock(&links_list_lock);
0181         return;
0182     }
0183 
0184     /*
0185      * Check how many actags are desired for the AFUs under that
0186      * function and add it to the count for the link
0187      */
0188     rc = get_max_afu_index(dev, &afu_idx);
0189     if (rc) {
0190         /* Most likely an invalid config space */
0191         dev_dbg(&dev->dev, "couldn't find AFU information\n");
0192         afu_idx = -1;
0193     }
0194 
0195     link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0;
0196     for (i = 0; i <= afu_idx; i++) {
0197         /*
0198          * AFU index 'holes' are allowed. So don't fail if we
0199          * can't read the actag info for an index
0200          */
0201         rc = get_actag_count(dev, i, &actag);
0202         if (rc)
0203             continue;
0204         link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag;
0205     }
0206     dev_dbg(&dev->dev, "total actags for function: %d\n",
0207         link->fn_desired_actags[PCI_FUNC(dev->devfn)]);
0208 
0209     mutex_unlock(&links_list_lock);
0210 }
0211 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag);
0212 
0213 static u16 assign_fn_actags(u16 desired, u16 total)
0214 {
0215     u16 count;
0216 
0217     if (total <= PNV_OCXL_ACTAG_MAX)
0218         count = desired;
0219     else
0220         count = PNV_OCXL_ACTAG_MAX * desired / total;
0221 
0222     return count;
0223 }
0224 
0225 static void assign_actags(struct npu_link *link)
0226 {
0227     u16 actag_count, range_start = 0, total_desired = 0;
0228     int i;
0229 
0230     for (i = 0; i < 8; i++)
0231         total_desired += link->fn_desired_actags[i];
0232 
0233     for (i = 0; i < 8; i++) {
0234         if (link->fn_desired_actags[i]) {
0235             actag_count = assign_fn_actags(
0236                 link->fn_desired_actags[i],
0237                 total_desired);
0238             link->fn_actags[i].start = range_start;
0239             link->fn_actags[i].count = actag_count;
0240             range_start += actag_count;
0241             WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX);
0242         }
0243         pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",
0244             link->domain, link->bus, link->dev, i,
0245             link->fn_actags[i].start, link->fn_actags[i].count,
0246             link->fn_desired_actags[i]);
0247     }
0248     link->assignment_done = true;
0249 }
0250 
0251 int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
0252         u16 *supported)
0253 {
0254     struct npu_link *link;
0255 
0256     mutex_lock(&links_list_lock);
0257 
0258     link = find_link(dev);
0259     if (!link) {
0260         dev_err(&dev->dev, "actag information not found\n");
0261         mutex_unlock(&links_list_lock);
0262         return -ENODEV;
0263     }
0264     /*
0265      * On p9, we only have 64 actags per link, so they must be
0266      * shared by all the functions of the same adapter. We counted
0267      * the desired actag counts during PCI enumeration, so that we
0268      * can allocate a pro-rated number of actags to each function.
0269      */
0270     if (!link->assignment_done)
0271         assign_actags(link);
0272 
0273     *base      = link->fn_actags[PCI_FUNC(dev->devfn)].start;
0274     *enabled   = link->fn_actags[PCI_FUNC(dev->devfn)].count;
0275     *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)];
0276 
0277     mutex_unlock(&links_list_lock);
0278     return 0;
0279 }
0280 EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag);
0281 
0282 int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)
0283 {
0284     struct npu_link *link;
0285     int i, rc = -EINVAL;
0286 
0287     /*
0288      * The number of PASIDs (process address space ID) which can
0289      * be used by a function depends on how many functions exist
0290      * on the device. The NPU needs to be configured to know how
0291      * many bits are available to PASIDs and how many are to be
0292      * used by the function BDF identifier.
0293      *
0294      * We only support one AFU-carrying function for now.
0295      */
0296     mutex_lock(&links_list_lock);
0297 
0298     link = find_link(dev);
0299     if (!link) {
0300         dev_err(&dev->dev, "actag information not found\n");
0301         mutex_unlock(&links_list_lock);
0302         return -ENODEV;
0303     }
0304 
0305     for (i = 0; i < 8; i++)
0306         if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) {
0307             *count = PNV_OCXL_PASID_MAX;
0308             rc = 0;
0309             break;
0310         }
0311 
0312     mutex_unlock(&links_list_lock);
0313     dev_dbg(&dev->dev, "%d PASIDs available for function\n",
0314         rc ? 0 : *count);
0315     return rc;
0316 }
0317 EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count);
0318 
0319 static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)
0320 {
0321     int shift, idx;
0322 
0323     WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE);
0324     idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2;
0325     shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2));
0326     buf[idx] |= rate << shift;
0327 }
0328 
0329 int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
0330             char *rate_buf, int rate_buf_size)
0331 {
0332     if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
0333         return -EINVAL;
0334     /*
0335      * The TL capabilities are a characteristic of the NPU, so
0336      * we go with hard-coded values.
0337      *
0338      * The receiving rate of each template is encoded on 4 bits.
0339      *
0340      * On P9:
0341      * - templates 0 -> 3 are supported
0342      * - templates 0, 1 and 3 have a 0 receiving rate
0343      * - template 2 has receiving rate of 1 (extra cycle)
0344      */
0345     memset(rate_buf, 0, rate_buf_size);
0346     set_templ_rate(2, 1, rate_buf);
0347     *cap = PNV_OCXL_TL_P9_RECV_CAP;
0348     return 0;
0349 }
0350 EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
0351 
0352 int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
0353             uint64_t rate_buf_phys, int rate_buf_size)
0354 {
0355     struct pci_controller *hose = pci_bus_to_host(dev->bus);
0356     struct pnv_phb *phb = hose->private_data;
0357     int rc;
0358 
0359     if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
0360         return -EINVAL;
0361 
0362     rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap,
0363             rate_buf_phys, rate_buf_size);
0364     if (rc) {
0365         dev_err(&dev->dev, "Can't configure host TL: %d\n", rc);
0366         return -EINVAL;
0367     }
0368     return 0;
0369 }
0370 EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
0371 
0372 int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq)
0373 {
0374     int rc;
0375 
0376     rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq);
0377     if (rc) {
0378         dev_err(&dev->dev,
0379             "Can't get translation interrupt for device\n");
0380         return rc;
0381     }
0382     return 0;
0383 }
0384 EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq);
0385 
0386 void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
0387             void __iomem *tfc, void __iomem *pe_handle)
0388 {
0389     iounmap(dsisr);
0390     iounmap(dar);
0391     iounmap(tfc);
0392     iounmap(pe_handle);
0393 }
0394 EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs);
0395 
0396 int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
0397             void __iomem **dar, void __iomem **tfc,
0398             void __iomem **pe_handle)
0399 {
0400     u64 reg;
0401     int i, j, rc = 0;
0402     void __iomem *regs[4];
0403 
0404     /*
0405      * opal stores the mmio addresses of the DSISR, DAR, TFC and
0406      * PE_HANDLE registers in a device tree property, in that
0407      * order
0408      */
0409     for (i = 0; i < 4; i++) {
0410         rc = of_property_read_u64_index(dev->dev.of_node,
0411                         "ibm,opal-xsl-mmio", i, &reg);
0412         if (rc)
0413             break;
0414         regs[i] = ioremap(reg, 8);
0415         if (!regs[i]) {
0416             rc = -EINVAL;
0417             break;
0418         }
0419     }
0420     if (rc) {
0421         dev_err(&dev->dev, "Can't map translation mmio registers\n");
0422         for (j = i - 1; j >= 0; j--)
0423             iounmap(regs[j]);
0424     } else {
0425         *dsisr = regs[0];
0426         *dar = regs[1];
0427         *tfc = regs[2];
0428         *pe_handle = regs[3];
0429     }
0430     return rc;
0431 }
0432 EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs);
0433 
0434 struct spa_data {
0435     u64 phb_opal_id;
0436     u32 bdfn;
0437 };
0438 
0439 int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
0440         void **platform_data)
0441 {
0442     struct pci_controller *hose = pci_bus_to_host(dev->bus);
0443     struct pnv_phb *phb = hose->private_data;
0444     struct spa_data *data;
0445     u32 bdfn;
0446     int rc;
0447 
0448     data = kzalloc(sizeof(*data), GFP_KERNEL);
0449     if (!data)
0450         return -ENOMEM;
0451 
0452     bdfn = (dev->bus->number << 8) | dev->devfn;
0453     rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),
0454                 PE_mask);
0455     if (rc) {
0456         dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc);
0457         kfree(data);
0458         return rc;
0459     }
0460     data->phb_opal_id = phb->opal_id;
0461     data->bdfn = bdfn;
0462     *platform_data = (void *) data;
0463     return 0;
0464 }
0465 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup);
0466 
0467 void pnv_ocxl_spa_release(void *platform_data)
0468 {
0469     struct spa_data *data = (struct spa_data *) platform_data;
0470     int rc;
0471 
0472     rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0);
0473     WARN_ON(rc);
0474     kfree(data);
0475 }
0476 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
0477 
0478 int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
0479 {
0480     struct spa_data *data = (struct spa_data *) platform_data;
0481     int rc;
0482 
0483     rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
0484     return rc;
0485 }
0486 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
0487 
0488 int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
0489               uint64_t lpcr, void __iomem **arva)
0490 {
0491     struct pci_controller *hose = pci_bus_to_host(dev->bus);
0492     struct pnv_phb *phb = hose->private_data;
0493     u64 mmio_atsd;
0494     int rc;
0495 
0496     /* ATSD physical address.
0497      * ATSD LAUNCH register: write access initiates a shoot down to
0498      * initiate the TLB Invalidate command.
0499      */
0500     rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
0501                     0, &mmio_atsd);
0502     if (rc) {
0503         dev_info(&dev->dev, "No available ATSD found\n");
0504         return rc;
0505     }
0506 
0507     /* Assign a register set to a Logical Partition and MMIO ATSD
0508      * LPARID register to the required value.
0509      */
0510     rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev),
0511                    lparid, lpcr);
0512     if (rc) {
0513         dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc);
0514         return rc;
0515     }
0516 
0517     *arva = ioremap(mmio_atsd, 24);
0518     if (!(*arva)) {
0519         dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd);
0520         rc = -ENOMEM;
0521     }
0522 
0523     return rc;
0524 }
0525 EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar);
0526 
0527 void pnv_ocxl_unmap_lpar(void __iomem *arva)
0528 {
0529     iounmap(arva);
0530 }
0531 EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar);
0532 
0533 void pnv_ocxl_tlb_invalidate(void __iomem *arva,
0534                  unsigned long pid,
0535                  unsigned long addr,
0536                  unsigned long page_size)
0537 {
0538     unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT);
0539     u64 val = 0ull;
0540     int pend;
0541     u8 size;
0542 
0543     if (!(arva))
0544         return;
0545 
0546     if (addr) {
0547         /* load Abbreviated Virtual Address register with
0548          * the necessary value
0549          */
0550         val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51));
0551         out_be64(arva + PNV_OCXL_ATSD_AVA, val);
0552     }
0553 
0554     /* Write access initiates a shoot down to initiate the
0555      * TLB Invalidate command
0556      */
0557     val = PNV_OCXL_ATSD_LNCH_R;
0558     val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10);
0559     if (addr)
0560         val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00);
0561     else {
0562         val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01);
0563         val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON;
0564     }
0565     val |= PNV_OCXL_ATSD_LNCH_PRS;
0566     /* Actual Page Size to be invalidated
0567      * 000 4KB
0568      * 101 64KB
0569      * 001 2MB
0570      * 010 1GB
0571      */
0572     size = 0b101;
0573     if (page_size == 0x1000)
0574         size = 0b000;
0575     if (page_size == 0x200000)
0576         size = 0b001;
0577     if (page_size == 0x40000000)
0578         size = 0b010;
0579     val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size);
0580     val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid);
0581     out_be64(arva + PNV_OCXL_ATSD_LNCH, val);
0582 
0583     /* Poll the ATSD status register to determine when the
0584      * TLB Invalidate has been completed.
0585      */
0586     val = in_be64(arva + PNV_OCXL_ATSD_STAT);
0587     pend = val >> 63;
0588 
0589     while (pend) {
0590         if (time_after_eq(jiffies, timeout)) {
0591             pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n",
0592                    __func__, val, pid);
0593             return;
0594         }
0595         cpu_relax();
0596         val = in_be64(arva + PNV_OCXL_ATSD_STAT);
0597         pend = val >> 63;
0598     }
0599 }
0600 EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate);