Back to home page

LXR

 
 

    


0001 /*
0002  * Copyright 2014 IBM Corp.
0003  *
0004  * This program is free software; you can redistribute it and/or
0005  * modify it under the terms of the GNU General Public License
0006  * as published by the Free Software Foundation; either version
0007  * 2 of the License, or (at your option) any later version.
0008  */
0009 
0010 #include <linux/pci.h>
0011 #include <linux/slab.h>
0012 #include <linux/file.h>
0013 #include <misc/cxl.h>
0014 #include <asm/pnv-pci.h>
0015 #include <linux/msi.h>
0016 #include <linux/module.h>
0017 #include <linux/mount.h>
0018 
0019 #include "cxl.h"
0020 
0021 /*
0022  * Since we want to track memory mappings to be able to force-unmap
0023  * when the AFU is no longer reachable, we need an inode. For devices
0024  * opened through the cxl user API, this is not a problem, but a
0025  * userland process can also get a cxl fd through the cxl_get_fd()
0026  * API, which is used by the cxlflash driver.
0027  *
0028  * Therefore we implement our own simple pseudo-filesystem and inode
0029  * allocator. We don't use the anonymous inode, as we need the
0030  * meta-data associated with it (address_space) and it is shared by
0031  * other drivers/processes, so it could lead to cxl unmapping VMAs
0032  * from random processes.
0033  */
0034 
0035 #define CXL_PSEUDO_FS_MAGIC 0x1697697f
0036 
0037 static int cxl_fs_cnt;
0038 static struct vfsmount *cxl_vfs_mount;
0039 
0040 static const struct dentry_operations cxl_fs_dops = {
0041     .d_dname    = simple_dname,
0042 };
0043 
0044 static struct dentry *cxl_fs_mount(struct file_system_type *fs_type, int flags,
0045                 const char *dev_name, void *data)
0046 {
0047     return mount_pseudo(fs_type, "cxl:", NULL, &cxl_fs_dops,
0048             CXL_PSEUDO_FS_MAGIC);
0049 }
0050 
0051 static struct file_system_type cxl_fs_type = {
0052     .name       = "cxl",
0053     .owner      = THIS_MODULE,
0054     .mount      = cxl_fs_mount,
0055     .kill_sb    = kill_anon_super,
0056 };
0057 
0058 
0059 void cxl_release_mapping(struct cxl_context *ctx)
0060 {
0061     if (ctx->kernelapi && ctx->mapping)
0062         simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
0063 }
0064 
0065 static struct file *cxl_getfile(const char *name,
0066                 const struct file_operations *fops,
0067                 void *priv, int flags)
0068 {
0069     struct qstr this;
0070     struct path path;
0071     struct file *file;
0072     struct inode *inode = NULL;
0073     int rc;
0074 
0075     /* strongly inspired by anon_inode_getfile() */
0076 
0077     if (fops->owner && !try_module_get(fops->owner))
0078         return ERR_PTR(-ENOENT);
0079 
0080     rc = simple_pin_fs(&cxl_fs_type, &cxl_vfs_mount, &cxl_fs_cnt);
0081     if (rc < 0) {
0082         pr_err("Cannot mount cxl pseudo filesystem: %d\n", rc);
0083         file = ERR_PTR(rc);
0084         goto err_module;
0085     }
0086 
0087     inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb);
0088     if (IS_ERR(inode)) {
0089         file = ERR_CAST(inode);
0090         goto err_fs;
0091     }
0092 
0093     file = ERR_PTR(-ENOMEM);
0094     this.name = name;
0095     this.len = strlen(name);
0096     this.hash = 0;
0097     path.dentry = d_alloc_pseudo(cxl_vfs_mount->mnt_sb, &this);
0098     if (!path.dentry)
0099         goto err_inode;
0100 
0101     path.mnt = mntget(cxl_vfs_mount);
0102     d_instantiate(path.dentry, inode);
0103 
0104     file = alloc_file(&path, OPEN_FMODE(flags), fops);
0105     if (IS_ERR(file))
0106         goto err_dput;
0107     file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
0108     file->private_data = priv;
0109 
0110     return file;
0111 
0112 err_dput:
0113     path_put(&path);
0114 err_inode:
0115     iput(inode);
0116 err_fs:
0117     simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
0118 err_module:
0119     module_put(fops->owner);
0120     return file;
0121 }
0122 
0123 struct cxl_context *cxl_dev_context_init(struct pci_dev *dev)
0124 {
0125     struct cxl_afu *afu;
0126     struct cxl_context  *ctx;
0127     int rc;
0128 
0129     afu = cxl_pci_to_afu(dev);
0130     if (IS_ERR(afu))
0131         return ERR_CAST(afu);
0132 
0133     ctx = cxl_context_alloc();
0134     if (!ctx)
0135         return ERR_PTR(-ENOMEM);
0136 
0137     ctx->kernelapi = true;
0138 
0139     /* Make it a slave context.  We can promote it later? */
0140     rc = cxl_context_init(ctx, afu, false);
0141     if (rc)
0142         goto err_ctx;
0143 
0144     return ctx;
0145 
0146 err_ctx:
0147     kfree(ctx);
0148     return ERR_PTR(rc);
0149 }
0150 EXPORT_SYMBOL_GPL(cxl_dev_context_init);
0151 
0152 struct cxl_context *cxl_get_context(struct pci_dev *dev)
0153 {
0154     return dev->dev.archdata.cxl_ctx;
0155 }
0156 EXPORT_SYMBOL_GPL(cxl_get_context);
0157 
0158 int cxl_release_context(struct cxl_context *ctx)
0159 {
0160     if (ctx->status >= STARTED)
0161         return -EBUSY;
0162 
0163     cxl_context_free(ctx);
0164 
0165     return 0;
0166 }
0167 EXPORT_SYMBOL_GPL(cxl_release_context);
0168 
0169 static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num)
0170 {
0171     __u16 range;
0172     int r;
0173 
0174     for (r = 0; r < CXL_IRQ_RANGES; r++) {
0175         range = ctx->irqs.range[r];
0176         if (num < range) {
0177             return ctx->irqs.offset[r] + num;
0178         }
0179         num -= range;
0180     }
0181     return 0;
0182 }
0183 
0184 int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq)
0185 {
0186     if (*ctx == NULL || *afu_irq == 0) {
0187         *afu_irq = 1;
0188         *ctx = cxl_get_context(pdev);
0189     } else {
0190         (*afu_irq)++;
0191         if (*afu_irq > cxl_get_max_irqs_per_process(pdev)) {
0192             *ctx = list_next_entry(*ctx, extra_irq_contexts);
0193             *afu_irq = 1;
0194         }
0195     }
0196     return cxl_find_afu_irq(*ctx, *afu_irq);
0197 }
0198 /* Exported via cxl_base */
0199 
0200 int cxl_set_priv(struct cxl_context *ctx, void *priv)
0201 {
0202     if (!ctx)
0203         return -EINVAL;
0204 
0205     ctx->priv = priv;
0206 
0207     return 0;
0208 }
0209 EXPORT_SYMBOL_GPL(cxl_set_priv);
0210 
0211 void *cxl_get_priv(struct cxl_context *ctx)
0212 {
0213     if (!ctx)
0214         return ERR_PTR(-EINVAL);
0215 
0216     return ctx->priv;
0217 }
0218 EXPORT_SYMBOL_GPL(cxl_get_priv);
0219 
0220 int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
0221 {
0222     int res;
0223     irq_hw_number_t hwirq;
0224 
0225     if (num == 0)
0226         num = ctx->afu->pp_irqs;
0227     res = afu_allocate_irqs(ctx, num);
0228     if (res)
0229         return res;
0230 
0231     if (!cpu_has_feature(CPU_FTR_HVMODE)) {
0232         /* In a guest, the PSL interrupt is not multiplexed. It was
0233          * allocated above, and we need to set its handler
0234          */
0235         hwirq = cxl_find_afu_irq(ctx, 0);
0236         if (hwirq)
0237             cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl");
0238     }
0239 
0240     if (ctx->status == STARTED) {
0241         if (cxl_ops->update_ivtes)
0242             cxl_ops->update_ivtes(ctx);
0243         else WARN(1, "BUG: cxl_allocate_afu_irqs must be called prior to starting the context on this platform\n");
0244     }
0245 
0246     return res;
0247 }
0248 EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
0249 
0250 void cxl_free_afu_irqs(struct cxl_context *ctx)
0251 {
0252     irq_hw_number_t hwirq;
0253     unsigned int virq;
0254 
0255     if (!cpu_has_feature(CPU_FTR_HVMODE)) {
0256         hwirq = cxl_find_afu_irq(ctx, 0);
0257         if (hwirq) {
0258             virq = irq_find_mapping(NULL, hwirq);
0259             if (virq)
0260                 cxl_unmap_irq(virq, ctx);
0261         }
0262     }
0263     afu_irq_name_free(ctx);
0264     cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
0265 }
0266 EXPORT_SYMBOL_GPL(cxl_free_afu_irqs);
0267 
0268 int cxl_map_afu_irq(struct cxl_context *ctx, int num,
0269             irq_handler_t handler, void *cookie, char *name)
0270 {
0271     irq_hw_number_t hwirq;
0272 
0273     /*
0274      * Find interrupt we are to register.
0275      */
0276     hwirq = cxl_find_afu_irq(ctx, num);
0277     if (!hwirq)
0278         return -ENOENT;
0279 
0280     return cxl_map_irq(ctx->afu->adapter, hwirq, handler, cookie, name);
0281 }
0282 EXPORT_SYMBOL_GPL(cxl_map_afu_irq);
0283 
0284 void cxl_unmap_afu_irq(struct cxl_context *ctx, int num, void *cookie)
0285 {
0286     irq_hw_number_t hwirq;
0287     unsigned int virq;
0288 
0289     hwirq = cxl_find_afu_irq(ctx, num);
0290     if (!hwirq)
0291         return;
0292 
0293     virq = irq_find_mapping(NULL, hwirq);
0294     if (virq)
0295         cxl_unmap_irq(virq, cookie);
0296 }
0297 EXPORT_SYMBOL_GPL(cxl_unmap_afu_irq);
0298 
0299 /*
0300  * Start a context
0301  * Code here similar to afu_ioctl_start_work().
0302  */
0303 int cxl_start_context(struct cxl_context *ctx, u64 wed,
0304               struct task_struct *task)
0305 {
0306     int rc = 0;
0307     bool kernel = true;
0308 
0309     pr_devel("%s: pe: %i\n", __func__, ctx->pe);
0310 
0311     mutex_lock(&ctx->status_mutex);
0312     if (ctx->status == STARTED)
0313         goto out; /* already started */
0314 
0315     /*
0316      * Increment the mapped context count for adapter. This also checks
0317      * if adapter_context_lock is taken.
0318      */
0319     rc = cxl_adapter_context_get(ctx->afu->adapter);
0320     if (rc)
0321         goto out;
0322 
0323     if (task) {
0324         ctx->pid = get_task_pid(task, PIDTYPE_PID);
0325         ctx->glpid = get_task_pid(task->group_leader, PIDTYPE_PID);
0326         kernel = false;
0327         ctx->real_mode = false;
0328     }
0329 
0330     cxl_ctx_get();
0331 
0332     if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
0333         put_pid(ctx->glpid);
0334         put_pid(ctx->pid);
0335         ctx->glpid = ctx->pid = NULL;
0336         cxl_adapter_context_put(ctx->afu->adapter);
0337         cxl_ctx_put();
0338         goto out;
0339     }
0340 
0341     ctx->status = STARTED;
0342 out:
0343     mutex_unlock(&ctx->status_mutex);
0344     return rc;
0345 }
0346 EXPORT_SYMBOL_GPL(cxl_start_context);
0347 
0348 int cxl_process_element(struct cxl_context *ctx)
0349 {
0350     return ctx->external_pe;
0351 }
0352 EXPORT_SYMBOL_GPL(cxl_process_element);
0353 
0354 /* Stop a context.  Returns 0 on success, otherwise -Errno */
0355 int cxl_stop_context(struct cxl_context *ctx)
0356 {
0357     return __detach_context(ctx);
0358 }
0359 EXPORT_SYMBOL_GPL(cxl_stop_context);
0360 
0361 void cxl_set_master(struct cxl_context *ctx)
0362 {
0363     ctx->master = true;
0364 }
0365 EXPORT_SYMBOL_GPL(cxl_set_master);
0366 
0367 int cxl_set_translation_mode(struct cxl_context *ctx, bool real_mode)
0368 {
0369     if (ctx->status == STARTED) {
0370         /*
0371          * We could potentially update the PE and issue an update LLCMD
0372          * to support this, but it doesn't seem to have a good use case
0373          * since it's trivial to just create a second kernel context
0374          * with different translation modes, so until someone convinces
0375          * me otherwise:
0376          */
0377         return -EBUSY;
0378     }
0379 
0380     ctx->real_mode = real_mode;
0381     return 0;
0382 }
0383 EXPORT_SYMBOL_GPL(cxl_set_translation_mode);
0384 
0385 /* wrappers around afu_* file ops which are EXPORTED */
0386 int cxl_fd_open(struct inode *inode, struct file *file)
0387 {
0388     return afu_open(inode, file);
0389 }
0390 EXPORT_SYMBOL_GPL(cxl_fd_open);
0391 int cxl_fd_release(struct inode *inode, struct file *file)
0392 {
0393     return afu_release(inode, file);
0394 }
0395 EXPORT_SYMBOL_GPL(cxl_fd_release);
0396 long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
0397 {
0398     return afu_ioctl(file, cmd, arg);
0399 }
0400 EXPORT_SYMBOL_GPL(cxl_fd_ioctl);
0401 int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm)
0402 {
0403     return afu_mmap(file, vm);
0404 }
0405 EXPORT_SYMBOL_GPL(cxl_fd_mmap);
0406 unsigned int cxl_fd_poll(struct file *file, struct poll_table_struct *poll)
0407 {
0408     return afu_poll(file, poll);
0409 }
0410 EXPORT_SYMBOL_GPL(cxl_fd_poll);
0411 ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count,
0412             loff_t *off)
0413 {
0414     return afu_read(file, buf, count, off);
0415 }
0416 EXPORT_SYMBOL_GPL(cxl_fd_read);
0417 
0418 #define PATCH_FOPS(NAME) if (!fops->NAME) fops->NAME = afu_fops.NAME
0419 
0420 /* Get a struct file and fd for a context and attach the ops */
0421 struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops,
0422             int *fd)
0423 {
0424     struct file *file;
0425     int rc, flags, fdtmp;
0426     char *name = NULL;
0427 
0428     /* only allow one per context */
0429     if (ctx->mapping)
0430         return ERR_PTR(-EEXIST);
0431 
0432     flags = O_RDWR | O_CLOEXEC;
0433 
0434     /* This code is similar to anon_inode_getfd() */
0435     rc = get_unused_fd_flags(flags);
0436     if (rc < 0)
0437         return ERR_PTR(rc);
0438     fdtmp = rc;
0439 
0440     /*
0441      * Patch the file ops.  Needs to be careful that this is rentrant safe.
0442      */
0443     if (fops) {
0444         PATCH_FOPS(open);
0445         PATCH_FOPS(poll);
0446         PATCH_FOPS(read);
0447         PATCH_FOPS(release);
0448         PATCH_FOPS(unlocked_ioctl);
0449         PATCH_FOPS(compat_ioctl);
0450         PATCH_FOPS(mmap);
0451     } else /* use default ops */
0452         fops = (struct file_operations *)&afu_fops;
0453 
0454     name = kasprintf(GFP_KERNEL, "cxl:%d", ctx->pe);
0455     file = cxl_getfile(name, fops, ctx, flags);
0456     kfree(name);
0457     if (IS_ERR(file))
0458         goto err_fd;
0459 
0460     cxl_context_set_mapping(ctx, file->f_mapping);
0461     *fd = fdtmp;
0462     return file;
0463 
0464 err_fd:
0465     put_unused_fd(fdtmp);
0466     return NULL;
0467 }
0468 EXPORT_SYMBOL_GPL(cxl_get_fd);
0469 
0470 struct cxl_context *cxl_fops_get_context(struct file *file)
0471 {
0472     return file->private_data;
0473 }
0474 EXPORT_SYMBOL_GPL(cxl_fops_get_context);
0475 
0476 void cxl_set_driver_ops(struct cxl_context *ctx,
0477             struct cxl_afu_driver_ops *ops)
0478 {
0479     WARN_ON(!ops->fetch_event || !ops->event_delivered);
0480     atomic_set(&ctx->afu_driver_events, 0);
0481     ctx->afu_driver_ops = ops;
0482 }
0483 EXPORT_SYMBOL_GPL(cxl_set_driver_ops);
0484 
0485 void cxl_context_events_pending(struct cxl_context *ctx,
0486                 unsigned int new_events)
0487 {
0488     atomic_add(new_events, &ctx->afu_driver_events);
0489     wake_up_all(&ctx->wq);
0490 }
0491 EXPORT_SYMBOL_GPL(cxl_context_events_pending);
0492 
0493 int cxl_start_work(struct cxl_context *ctx,
0494            struct cxl_ioctl_start_work *work)
0495 {
0496     int rc;
0497 
0498     /* code taken from afu_ioctl_start_work */
0499     if (!(work->flags & CXL_START_WORK_NUM_IRQS))
0500         work->num_interrupts = ctx->afu->pp_irqs;
0501     else if ((work->num_interrupts < ctx->afu->pp_irqs) ||
0502          (work->num_interrupts > ctx->afu->irqs_max)) {
0503         return -EINVAL;
0504     }
0505 
0506     rc = afu_register_irqs(ctx, work->num_interrupts);
0507     if (rc)
0508         return rc;
0509 
0510     rc = cxl_start_context(ctx, work->work_element_descriptor, current);
0511     if (rc < 0) {
0512         afu_release_irqs(ctx, ctx);
0513         return rc;
0514     }
0515 
0516     return 0;
0517 }
0518 EXPORT_SYMBOL_GPL(cxl_start_work);
0519 
0520 void __iomem *cxl_psa_map(struct cxl_context *ctx)
0521 {
0522     if (ctx->status != STARTED)
0523         return NULL;
0524 
0525     pr_devel("%s: psn_phys%llx size:%llx\n",
0526         __func__, ctx->psn_phys, ctx->psn_size);
0527     return ioremap(ctx->psn_phys, ctx->psn_size);
0528 }
0529 EXPORT_SYMBOL_GPL(cxl_psa_map);
0530 
0531 void cxl_psa_unmap(void __iomem *addr)
0532 {
0533     iounmap(addr);
0534 }
0535 EXPORT_SYMBOL_GPL(cxl_psa_unmap);
0536 
0537 int cxl_afu_reset(struct cxl_context *ctx)
0538 {
0539     struct cxl_afu *afu = ctx->afu;
0540     int rc;
0541 
0542     rc = cxl_ops->afu_reset(afu);
0543     if (rc)
0544         return rc;
0545 
0546     return cxl_ops->afu_check_and_enable(afu);
0547 }
0548 EXPORT_SYMBOL_GPL(cxl_afu_reset);
0549 
0550 void cxl_perst_reloads_same_image(struct cxl_afu *afu,
0551                   bool perst_reloads_same_image)
0552 {
0553     afu->adapter->perst_same_image = perst_reloads_same_image;
0554 }
0555 EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image);
0556 
0557 ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count)
0558 {
0559     struct cxl_afu *afu = cxl_pci_to_afu(dev);
0560     if (IS_ERR(afu))
0561         return -ENODEV;
0562 
0563     return cxl_ops->read_adapter_vpd(afu->adapter, buf, count);
0564 }
0565 EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd);
0566 
0567 int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs)
0568 {
0569     struct cxl_afu *afu = cxl_pci_to_afu(dev);
0570     if (IS_ERR(afu))
0571         return -ENODEV;
0572 
0573     if (irqs > afu->adapter->user_irqs)
0574         return -EINVAL;
0575 
0576     /* Limit user_irqs to prevent the user increasing this via sysfs */
0577     afu->adapter->user_irqs = irqs;
0578     afu->irqs_max = irqs;
0579 
0580     return 0;
0581 }
0582 EXPORT_SYMBOL_GPL(cxl_set_max_irqs_per_process);
0583 
0584 int cxl_get_max_irqs_per_process(struct pci_dev *dev)
0585 {
0586     struct cxl_afu *afu = cxl_pci_to_afu(dev);
0587     if (IS_ERR(afu))
0588         return -ENODEV;
0589 
0590     return afu->irqs_max;
0591 }
0592 EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process);
0593 
0594 /*
0595  * This is a special interrupt allocation routine called from the PHB's MSI
0596  * setup function. When capi interrupts are allocated in this manner they must
0597  * still be associated with a running context, but since the MSI APIs have no
0598  * way to specify this we use the default context associated with the device.
0599  *
0600  * The Mellanox CX4 has a hardware limitation that restricts the maximum AFU
0601  * interrupt number, so in order to overcome this their driver informs us of
0602  * the restriction by setting the maximum interrupts per context, and we
0603  * allocate additional contexts as necessary so that we can keep the AFU
0604  * interrupt number within the supported range.
0605  */
0606 int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
0607 {
0608     struct cxl_context *ctx, *new_ctx, *default_ctx;
0609     int remaining;
0610     int rc;
0611 
0612     ctx = default_ctx = cxl_get_context(pdev);
0613     if (WARN_ON(!default_ctx))
0614         return -ENODEV;
0615 
0616     remaining = nvec;
0617     while (remaining > 0) {
0618         rc = cxl_allocate_afu_irqs(ctx, min(remaining, ctx->afu->irqs_max));
0619         if (rc) {
0620             pr_warn("%s: Failed to find enough free MSIs\n", pci_name(pdev));
0621             return rc;
0622         }
0623         remaining -= ctx->afu->irqs_max;
0624 
0625         if (ctx != default_ctx && default_ctx->status == STARTED) {
0626             WARN_ON(cxl_start_context(ctx,
0627                 be64_to_cpu(default_ctx->elem->common.wed),
0628                 NULL));
0629         }
0630 
0631         if (remaining > 0) {
0632             new_ctx = cxl_dev_context_init(pdev);
0633             if (IS_ERR(new_ctx)) {
0634                 pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev));
0635                 return -ENOSPC;
0636             }
0637             list_add(&new_ctx->extra_irq_contexts, &ctx->extra_irq_contexts);
0638             ctx = new_ctx;
0639         }
0640     }
0641 
0642     return 0;
0643 }
0644 /* Exported via cxl_base */
0645 
0646 void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev)
0647 {
0648     struct cxl_context *ctx, *pos, *tmp;
0649 
0650     ctx = cxl_get_context(pdev);
0651     if (WARN_ON(!ctx))
0652         return;
0653 
0654     cxl_free_afu_irqs(ctx);
0655     list_for_each_entry_safe(pos, tmp, &ctx->extra_irq_contexts, extra_irq_contexts) {
0656         cxl_stop_context(pos);
0657         cxl_free_afu_irqs(pos);
0658         list_del(&pos->extra_irq_contexts);
0659         cxl_release_context(pos);
0660     }
0661 }
0662 /* Exported via cxl_base */