Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * Copyright 2020-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
0004  */
0005 
0006 /**
0007  * DOC: Nitro Enclaves (NE) PCI device driver.
0008  */
0009 
0010 #include <linux/delay.h>
0011 #include <linux/device.h>
0012 #include <linux/list.h>
0013 #include <linux/module.h>
0014 #include <linux/mutex.h>
0015 #include <linux/nitro_enclaves.h>
0016 #include <linux/pci.h>
0017 #include <linux/types.h>
0018 #include <linux/wait.h>
0019 
0020 #include "ne_misc_dev.h"
0021 #include "ne_pci_dev.h"
0022 
0023 /**
0024  * NE_DEFAULT_TIMEOUT_MSECS - Default timeout to wait for a reply from
0025  *                the NE PCI device.
0026  */
0027 #define NE_DEFAULT_TIMEOUT_MSECS    (120000) /* 120 sec */
0028 
0029 static const struct pci_device_id ne_pci_ids[] = {
0030     { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_NE) },
0031     { 0, }
0032 };
0033 
0034 MODULE_DEVICE_TABLE(pci, ne_pci_ids);
0035 
0036 /**
0037  * ne_submit_request() - Submit command request to the PCI device based on the
0038  *           command type.
0039  * @pdev:       PCI device to send the command to.
0040  * @cmd_type:       Command type of the request sent to the PCI device.
0041  * @cmd_request:    Command request payload.
0042  * @cmd_request_size:   Size of the command request payload.
0043  *
0044  * Context: Process context. This function is called with the ne_pci_dev mutex held.
0045  */
0046 static void ne_submit_request(struct pci_dev *pdev, enum ne_pci_dev_cmd_type cmd_type,
0047                   void *cmd_request, size_t cmd_request_size)
0048 {
0049     struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
0050 
0051     memcpy_toio(ne_pci_dev->iomem_base + NE_SEND_DATA, cmd_request, cmd_request_size);
0052 
0053     iowrite32(cmd_type, ne_pci_dev->iomem_base + NE_COMMAND);
0054 }
0055 
0056 /**
0057  * ne_retrieve_reply() - Retrieve reply from the PCI device.
0058  * @pdev:       PCI device to receive the reply from.
0059  * @cmd_reply:      Command reply payload.
0060  * @cmd_reply_size: Size of the command reply payload.
0061  *
0062  * Context: Process context. This function is called with the ne_pci_dev mutex held.
0063  */
0064 static void ne_retrieve_reply(struct pci_dev *pdev, struct ne_pci_dev_cmd_reply *cmd_reply,
0065                   size_t cmd_reply_size)
0066 {
0067     struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
0068 
0069     memcpy_fromio(cmd_reply, ne_pci_dev->iomem_base + NE_RECV_DATA, cmd_reply_size);
0070 }
0071 
0072 /**
0073  * ne_wait_for_reply() - Wait for a reply of a PCI device command.
0074  * @pdev:   PCI device for which a reply is waited.
0075  *
0076  * Context: Process context. This function is called with the ne_pci_dev mutex held.
0077  * Return:
0078  * * 0 on success.
0079  * * Negative return value on failure.
0080  */
0081 static int ne_wait_for_reply(struct pci_dev *pdev)
0082 {
0083     struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
0084     int rc = -EINVAL;
0085 
0086     /*
0087      * TODO: Update to _interruptible and handle interrupted wait event
0088      * e.g. -ERESTARTSYS, incoming signals + update timeout, if needed.
0089      */
0090     rc = wait_event_timeout(ne_pci_dev->cmd_reply_wait_q,
0091                 atomic_read(&ne_pci_dev->cmd_reply_avail) != 0,
0092                 msecs_to_jiffies(NE_DEFAULT_TIMEOUT_MSECS));
0093     if (!rc)
0094         return -ETIMEDOUT;
0095 
0096     return 0;
0097 }
0098 
0099 int ne_do_request(struct pci_dev *pdev, enum ne_pci_dev_cmd_type cmd_type,
0100           void *cmd_request, size_t cmd_request_size,
0101           struct ne_pci_dev_cmd_reply *cmd_reply, size_t cmd_reply_size)
0102 {
0103     struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
0104     int rc = -EINVAL;
0105 
0106     if (cmd_type <= INVALID_CMD || cmd_type >= MAX_CMD) {
0107         dev_err_ratelimited(&pdev->dev, "Invalid cmd type=%u\n", cmd_type);
0108 
0109         return -EINVAL;
0110     }
0111 
0112     if (!cmd_request) {
0113         dev_err_ratelimited(&pdev->dev, "Null cmd request for cmd type=%u\n",
0114                     cmd_type);
0115 
0116         return -EINVAL;
0117     }
0118 
0119     if (cmd_request_size > NE_SEND_DATA_SIZE) {
0120         dev_err_ratelimited(&pdev->dev, "Invalid req size=%zu for cmd type=%u\n",
0121                     cmd_request_size, cmd_type);
0122 
0123         return -EINVAL;
0124     }
0125 
0126     if (!cmd_reply) {
0127         dev_err_ratelimited(&pdev->dev, "Null cmd reply for cmd type=%u\n",
0128                     cmd_type);
0129 
0130         return -EINVAL;
0131     }
0132 
0133     if (cmd_reply_size > NE_RECV_DATA_SIZE) {
0134         dev_err_ratelimited(&pdev->dev, "Invalid reply size=%zu for cmd type=%u\n",
0135                     cmd_reply_size, cmd_type);
0136 
0137         return -EINVAL;
0138     }
0139 
0140     /*
0141      * Use this mutex so that the PCI device handles one command request at
0142      * a time.
0143      */
0144     mutex_lock(&ne_pci_dev->pci_dev_mutex);
0145 
0146     atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
0147 
0148     ne_submit_request(pdev, cmd_type, cmd_request, cmd_request_size);
0149 
0150     rc = ne_wait_for_reply(pdev);
0151     if (rc < 0) {
0152         dev_err_ratelimited(&pdev->dev, "Error in wait for reply for cmd type=%u [rc=%d]\n",
0153                     cmd_type, rc);
0154 
0155         goto unlock_mutex;
0156     }
0157 
0158     ne_retrieve_reply(pdev, cmd_reply, cmd_reply_size);
0159 
0160     atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
0161 
0162     if (cmd_reply->rc < 0) {
0163         rc = cmd_reply->rc;
0164 
0165         dev_err_ratelimited(&pdev->dev, "Error in cmd process logic, cmd type=%u [rc=%d]\n",
0166                     cmd_type, rc);
0167 
0168         goto unlock_mutex;
0169     }
0170 
0171     rc = 0;
0172 
0173 unlock_mutex:
0174     mutex_unlock(&ne_pci_dev->pci_dev_mutex);
0175 
0176     return rc;
0177 }
0178 
0179 /**
0180  * ne_reply_handler() - Interrupt handler for retrieving a reply matching a
0181  *          request sent to the PCI device for enclave lifetime
0182  *          management.
0183  * @irq:    Received interrupt for a reply sent by the PCI device.
0184  * @args:   PCI device private data structure.
0185  *
0186  * Context: Interrupt context.
0187  * Return:
0188  * * IRQ_HANDLED on handled interrupt.
0189  */
0190 static irqreturn_t ne_reply_handler(int irq, void *args)
0191 {
0192     struct ne_pci_dev *ne_pci_dev = (struct ne_pci_dev *)args;
0193 
0194     atomic_set(&ne_pci_dev->cmd_reply_avail, 1);
0195 
0196     /* TODO: Update to _interruptible. */
0197     wake_up(&ne_pci_dev->cmd_reply_wait_q);
0198 
0199     return IRQ_HANDLED;
0200 }
0201 
0202 /**
0203  * ne_event_work_handler() - Work queue handler for notifying enclaves on a
0204  *               state change received by the event interrupt
0205  *               handler.
0206  * @work:   Item containing the NE PCI device for which an out-of-band event
0207  *      was issued.
0208  *
0209  * An out-of-band event is being issued by the Nitro Hypervisor when at least
0210  * one enclave is changing state without client interaction.
0211  *
0212  * Context: Work queue context.
0213  */
0214 static void ne_event_work_handler(struct work_struct *work)
0215 {
0216     struct ne_pci_dev_cmd_reply cmd_reply = {};
0217     struct ne_enclave *ne_enclave = NULL;
0218     struct ne_pci_dev *ne_pci_dev =
0219         container_of(work, struct ne_pci_dev, notify_work);
0220     struct pci_dev *pdev = ne_pci_dev->pdev;
0221     int rc = -EINVAL;
0222     struct slot_info_req slot_info_req = {};
0223 
0224     mutex_lock(&ne_pci_dev->enclaves_list_mutex);
0225 
0226     /*
0227      * Iterate over all enclaves registered for the Nitro Enclaves
0228      * PCI device and determine for which enclave(s) the out-of-band event
0229      * is corresponding to.
0230      */
0231     list_for_each_entry(ne_enclave, &ne_pci_dev->enclaves_list, enclave_list_entry) {
0232         mutex_lock(&ne_enclave->enclave_info_mutex);
0233 
0234         /*
0235          * Enclaves that were never started cannot receive out-of-band
0236          * events.
0237          */
0238         if (ne_enclave->state != NE_STATE_RUNNING)
0239             goto unlock;
0240 
0241         slot_info_req.slot_uid = ne_enclave->slot_uid;
0242 
0243         rc = ne_do_request(pdev, SLOT_INFO,
0244                    &slot_info_req, sizeof(slot_info_req),
0245                    &cmd_reply, sizeof(cmd_reply));
0246         if (rc < 0)
0247             dev_err(&pdev->dev, "Error in slot info [rc=%d]\n", rc);
0248 
0249         /* Notify enclave process that the enclave state changed. */
0250         if (ne_enclave->state != cmd_reply.state) {
0251             ne_enclave->state = cmd_reply.state;
0252 
0253             ne_enclave->has_event = true;
0254 
0255             wake_up_interruptible(&ne_enclave->eventq);
0256         }
0257 
0258 unlock:
0259          mutex_unlock(&ne_enclave->enclave_info_mutex);
0260     }
0261 
0262     mutex_unlock(&ne_pci_dev->enclaves_list_mutex);
0263 }
0264 
0265 /**
0266  * ne_event_handler() - Interrupt handler for PCI device out-of-band events.
0267  *          This interrupt does not supply any data in the MMIO
0268  *          region. It notifies a change in the state of any of
0269  *          the launched enclaves.
0270  * @irq:    Received interrupt for an out-of-band event.
0271  * @args:   PCI device private data structure.
0272  *
0273  * Context: Interrupt context.
0274  * Return:
0275  * * IRQ_HANDLED on handled interrupt.
0276  */
0277 static irqreturn_t ne_event_handler(int irq, void *args)
0278 {
0279     struct ne_pci_dev *ne_pci_dev = (struct ne_pci_dev *)args;
0280 
0281     queue_work(ne_pci_dev->event_wq, &ne_pci_dev->notify_work);
0282 
0283     return IRQ_HANDLED;
0284 }
0285 
0286 /**
0287  * ne_setup_msix() - Setup MSI-X vectors for the PCI device.
0288  * @pdev:   PCI device to setup the MSI-X for.
0289  *
0290  * Context: Process context.
0291  * Return:
0292  * * 0 on success.
0293  * * Negative return value on failure.
0294  */
0295 static int ne_setup_msix(struct pci_dev *pdev)
0296 {
0297     struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
0298     int nr_vecs = 0;
0299     int rc = -EINVAL;
0300 
0301     nr_vecs = pci_msix_vec_count(pdev);
0302     if (nr_vecs < 0) {
0303         rc = nr_vecs;
0304 
0305         dev_err(&pdev->dev, "Error in getting vec count [rc=%d]\n", rc);
0306 
0307         return rc;
0308     }
0309 
0310     rc = pci_alloc_irq_vectors(pdev, nr_vecs, nr_vecs, PCI_IRQ_MSIX);
0311     if (rc < 0) {
0312         dev_err(&pdev->dev, "Error in alloc MSI-X vecs [rc=%d]\n", rc);
0313 
0314         return rc;
0315     }
0316 
0317     /*
0318      * This IRQ gets triggered every time the PCI device responds to a
0319      * command request. The reply is then retrieved, reading from the MMIO
0320      * space of the PCI device.
0321      */
0322     rc = request_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_reply_handler,
0323              0, "enclave_cmd", ne_pci_dev);
0324     if (rc < 0) {
0325         dev_err(&pdev->dev, "Error in request irq reply [rc=%d]\n", rc);
0326 
0327         goto free_irq_vectors;
0328     }
0329 
0330     ne_pci_dev->event_wq = create_singlethread_workqueue("ne_pci_dev_wq");
0331     if (!ne_pci_dev->event_wq) {
0332         rc = -ENOMEM;
0333 
0334         dev_err(&pdev->dev, "Cannot get wq for dev events [rc=%d]\n", rc);
0335 
0336         goto free_reply_irq_vec;
0337     }
0338 
0339     INIT_WORK(&ne_pci_dev->notify_work, ne_event_work_handler);
0340 
0341     /*
0342      * This IRQ gets triggered every time any enclave's state changes. Its
0343      * handler then scans for the changes and propagates them to the user
0344      * space.
0345      */
0346     rc = request_irq(pci_irq_vector(pdev, NE_VEC_EVENT), ne_event_handler,
0347              0, "enclave_evt", ne_pci_dev);
0348     if (rc < 0) {
0349         dev_err(&pdev->dev, "Error in request irq event [rc=%d]\n", rc);
0350 
0351         goto destroy_wq;
0352     }
0353 
0354     return 0;
0355 
0356 destroy_wq:
0357     destroy_workqueue(ne_pci_dev->event_wq);
0358 free_reply_irq_vec:
0359     free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
0360 free_irq_vectors:
0361     pci_free_irq_vectors(pdev);
0362 
0363     return rc;
0364 }
0365 
0366 /**
0367  * ne_teardown_msix() - Teardown MSI-X vectors for the PCI device.
0368  * @pdev:   PCI device to teardown the MSI-X for.
0369  *
0370  * Context: Process context.
0371  */
0372 static void ne_teardown_msix(struct pci_dev *pdev)
0373 {
0374     struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
0375 
0376     free_irq(pci_irq_vector(pdev, NE_VEC_EVENT), ne_pci_dev);
0377 
0378     flush_work(&ne_pci_dev->notify_work);
0379     destroy_workqueue(ne_pci_dev->event_wq);
0380 
0381     free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
0382 
0383     pci_free_irq_vectors(pdev);
0384 }
0385 
0386 /**
0387  * ne_pci_dev_enable() - Select the PCI device version and enable it.
0388  * @pdev:   PCI device to select version for and then enable.
0389  *
0390  * Context: Process context.
0391  * Return:
0392  * * 0 on success.
0393  * * Negative return value on failure.
0394  */
0395 static int ne_pci_dev_enable(struct pci_dev *pdev)
0396 {
0397     u8 dev_enable_reply = 0;
0398     u16 dev_version_reply = 0;
0399     struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
0400 
0401     iowrite16(NE_VERSION_MAX, ne_pci_dev->iomem_base + NE_VERSION);
0402 
0403     dev_version_reply = ioread16(ne_pci_dev->iomem_base + NE_VERSION);
0404     if (dev_version_reply != NE_VERSION_MAX) {
0405         dev_err(&pdev->dev, "Error in pci dev version cmd\n");
0406 
0407         return -EIO;
0408     }
0409 
0410     iowrite8(NE_ENABLE_ON, ne_pci_dev->iomem_base + NE_ENABLE);
0411 
0412     dev_enable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
0413     if (dev_enable_reply != NE_ENABLE_ON) {
0414         dev_err(&pdev->dev, "Error in pci dev enable cmd\n");
0415 
0416         return -EIO;
0417     }
0418 
0419     return 0;
0420 }
0421 
0422 /**
0423  * ne_pci_dev_disable() - Disable the PCI device.
0424  * @pdev:   PCI device to disable.
0425  *
0426  * Context: Process context.
0427  */
0428 static void ne_pci_dev_disable(struct pci_dev *pdev)
0429 {
0430     u8 dev_disable_reply = 0;
0431     struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
0432     const unsigned int sleep_time = 10; /* 10 ms */
0433     unsigned int sleep_time_count = 0;
0434 
0435     iowrite8(NE_ENABLE_OFF, ne_pci_dev->iomem_base + NE_ENABLE);
0436 
0437     /*
0438      * Check for NE_ENABLE_OFF in a loop, to handle cases when the device
0439      * state is not immediately set to disabled and going through a
0440      * transitory state of disabling.
0441      */
0442     while (sleep_time_count < NE_DEFAULT_TIMEOUT_MSECS) {
0443         dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
0444         if (dev_disable_reply == NE_ENABLE_OFF)
0445             return;
0446 
0447         msleep_interruptible(sleep_time);
0448         sleep_time_count += sleep_time;
0449     }
0450 
0451     dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
0452     if (dev_disable_reply != NE_ENABLE_OFF)
0453         dev_err(&pdev->dev, "Error in pci dev disable cmd\n");
0454 }
0455 
0456 /**
0457  * ne_pci_probe() - Probe function for the NE PCI device.
0458  * @pdev:   PCI device to match with the NE PCI driver.
0459  * @id :    PCI device id table associated with the NE PCI driver.
0460  *
0461  * Context: Process context.
0462  * Return:
0463  * * 0 on success.
0464  * * Negative return value on failure.
0465  */
0466 static int ne_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
0467 {
0468     struct ne_pci_dev *ne_pci_dev = NULL;
0469     int rc = -EINVAL;
0470 
0471     ne_pci_dev = kzalloc(sizeof(*ne_pci_dev), GFP_KERNEL);
0472     if (!ne_pci_dev)
0473         return -ENOMEM;
0474 
0475     rc = pci_enable_device(pdev);
0476     if (rc < 0) {
0477         dev_err(&pdev->dev, "Error in pci dev enable [rc=%d]\n", rc);
0478 
0479         goto free_ne_pci_dev;
0480     }
0481 
0482     pci_set_master(pdev);
0483 
0484     rc = pci_request_regions_exclusive(pdev, "nitro_enclaves");
0485     if (rc < 0) {
0486         dev_err(&pdev->dev, "Error in pci request regions [rc=%d]\n", rc);
0487 
0488         goto disable_pci_dev;
0489     }
0490 
0491     ne_pci_dev->iomem_base = pci_iomap(pdev, PCI_BAR_NE, 0);
0492     if (!ne_pci_dev->iomem_base) {
0493         rc = -ENOMEM;
0494 
0495         dev_err(&pdev->dev, "Error in pci iomap [rc=%d]\n", rc);
0496 
0497         goto release_pci_regions;
0498     }
0499 
0500     pci_set_drvdata(pdev, ne_pci_dev);
0501 
0502     rc = ne_setup_msix(pdev);
0503     if (rc < 0) {
0504         dev_err(&pdev->dev, "Error in pci dev msix setup [rc=%d]\n", rc);
0505 
0506         goto iounmap_pci_bar;
0507     }
0508 
0509     ne_pci_dev_disable(pdev);
0510 
0511     rc = ne_pci_dev_enable(pdev);
0512     if (rc < 0) {
0513         dev_err(&pdev->dev, "Error in ne_pci_dev enable [rc=%d]\n", rc);
0514 
0515         goto teardown_msix;
0516     }
0517 
0518     atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
0519     init_waitqueue_head(&ne_pci_dev->cmd_reply_wait_q);
0520     INIT_LIST_HEAD(&ne_pci_dev->enclaves_list);
0521     mutex_init(&ne_pci_dev->enclaves_list_mutex);
0522     mutex_init(&ne_pci_dev->pci_dev_mutex);
0523     ne_pci_dev->pdev = pdev;
0524 
0525     ne_devs.ne_pci_dev = ne_pci_dev;
0526 
0527     rc = misc_register(ne_devs.ne_misc_dev);
0528     if (rc < 0) {
0529         dev_err(&pdev->dev, "Error in misc dev register [rc=%d]\n", rc);
0530 
0531         goto disable_ne_pci_dev;
0532     }
0533 
0534     return 0;
0535 
0536 disable_ne_pci_dev:
0537     ne_devs.ne_pci_dev = NULL;
0538     ne_pci_dev_disable(pdev);
0539 teardown_msix:
0540     ne_teardown_msix(pdev);
0541 iounmap_pci_bar:
0542     pci_set_drvdata(pdev, NULL);
0543     pci_iounmap(pdev, ne_pci_dev->iomem_base);
0544 release_pci_regions:
0545     pci_release_regions(pdev);
0546 disable_pci_dev:
0547     pci_disable_device(pdev);
0548 free_ne_pci_dev:
0549     kfree(ne_pci_dev);
0550 
0551     return rc;
0552 }
0553 
0554 /**
0555  * ne_pci_remove() - Remove function for the NE PCI device.
0556  * @pdev:   PCI device associated with the NE PCI driver.
0557  *
0558  * Context: Process context.
0559  */
0560 static void ne_pci_remove(struct pci_dev *pdev)
0561 {
0562     struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
0563 
0564     misc_deregister(ne_devs.ne_misc_dev);
0565 
0566     ne_devs.ne_pci_dev = NULL;
0567 
0568     ne_pci_dev_disable(pdev);
0569 
0570     ne_teardown_msix(pdev);
0571 
0572     pci_set_drvdata(pdev, NULL);
0573 
0574     pci_iounmap(pdev, ne_pci_dev->iomem_base);
0575 
0576     pci_release_regions(pdev);
0577 
0578     pci_disable_device(pdev);
0579 
0580     kfree(ne_pci_dev);
0581 }
0582 
0583 /**
0584  * ne_pci_shutdown() - Shutdown function for the NE PCI device.
0585  * @pdev:   PCI device associated with the NE PCI driver.
0586  *
0587  * Context: Process context.
0588  */
0589 static void ne_pci_shutdown(struct pci_dev *pdev)
0590 {
0591     struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
0592 
0593     if (!ne_pci_dev)
0594         return;
0595 
0596     misc_deregister(ne_devs.ne_misc_dev);
0597 
0598     ne_devs.ne_pci_dev = NULL;
0599 
0600     ne_pci_dev_disable(pdev);
0601 
0602     ne_teardown_msix(pdev);
0603 
0604     pci_set_drvdata(pdev, NULL);
0605 
0606     pci_iounmap(pdev, ne_pci_dev->iomem_base);
0607 
0608     pci_release_regions(pdev);
0609 
0610     pci_disable_device(pdev);
0611 
0612     kfree(ne_pci_dev);
0613 }
0614 
0615 /*
0616  * TODO: Add suspend / resume functions for power management w/ CONFIG_PM, if
0617  * needed.
0618  */
0619 /* NE PCI device driver. */
0620 struct pci_driver ne_pci_driver = {
0621     .name       = "nitro_enclaves",
0622     .id_table   = ne_pci_ids,
0623     .probe      = ne_pci_probe,
0624     .remove     = ne_pci_remove,
0625     .shutdown   = ne_pci_shutdown,
0626 };