Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  *  Copyright IBM Corp. 2012
0004  *
0005  *  Author(s):
0006  *    Jan Glauber <jang@linux.vnet.ibm.com>
0007  */
0008 
0009 #define KMSG_COMPONENT "zpci"
0010 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
0011 
0012 #include <linux/kernel.h>
0013 #include <linux/pci.h>
0014 #include <asm/pci_debug.h>
0015 #include <asm/pci_dma.h>
0016 #include <asm/sclp.h>
0017 
0018 #include "pci_bus.h"
0019 
0020 /* Content Code Description for PCI Function Error */
0021 struct zpci_ccdf_err {
0022     u32 reserved1;
0023     u32 fh;             /* function handle */
0024     u32 fid;            /* function id */
0025     u32 ett     :  4;       /* expected table type */
0026     u32 mvn     : 12;       /* MSI vector number */
0027     u32 dmaas   :  8;       /* DMA address space */
0028     u32     :  6;
0029     u32 q       :  1;       /* event qualifier */
0030     u32 rw      :  1;       /* read/write */
0031     u64 faddr;          /* failing address */
0032     u32 reserved3;
0033     u16 reserved4;
0034     u16 pec;            /* PCI event code */
0035 } __packed;
0036 
0037 /* Content Code Description for PCI Function Availability */
0038 struct zpci_ccdf_avail {
0039     u32 reserved1;
0040     u32 fh;             /* function handle */
0041     u32 fid;            /* function id */
0042     u32 reserved2;
0043     u32 reserved3;
0044     u32 reserved4;
0045     u32 reserved5;
0046     u16 reserved6;
0047     u16 pec;            /* PCI event code */
0048 } __packed;
0049 
0050 static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
0051 {
0052     switch (ers_res) {
0053     case PCI_ERS_RESULT_CAN_RECOVER:
0054     case PCI_ERS_RESULT_RECOVERED:
0055     case PCI_ERS_RESULT_NEED_RESET:
0056         return false;
0057     default:
0058         return true;
0059     }
0060 }
0061 
0062 static bool is_passed_through(struct zpci_dev *zdev)
0063 {
0064     return zdev->s390_domain;
0065 }
0066 
0067 static bool is_driver_supported(struct pci_driver *driver)
0068 {
0069     if (!driver || !driver->err_handler)
0070         return false;
0071     if (!driver->err_handler->error_detected)
0072         return false;
0073     if (!driver->err_handler->slot_reset)
0074         return false;
0075     if (!driver->err_handler->resume)
0076         return false;
0077     return true;
0078 }
0079 
0080 static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
0081                              struct pci_driver *driver)
0082 {
0083     pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
0084 
0085     ers_res = driver->err_handler->error_detected(pdev,  pdev->error_state);
0086     if (ers_result_indicates_abort(ers_res))
0087         pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
0088     else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
0089         pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
0090 
0091     return ers_res;
0092 }
0093 
0094 static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
0095                             struct pci_driver *driver)
0096 {
0097     pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
0098     struct zpci_dev *zdev = to_zpci(pdev);
0099     int rc;
0100 
0101     pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
0102     rc = zpci_reset_load_store_blocked(zdev);
0103     if (rc) {
0104         pr_err("%s: Unblocking device access failed\n", pci_name(pdev));
0105         /* Let's try a full reset instead */
0106         return PCI_ERS_RESULT_NEED_RESET;
0107     }
0108 
0109     if (driver->err_handler->mmio_enabled) {
0110         ers_res = driver->err_handler->mmio_enabled(pdev);
0111         if (ers_result_indicates_abort(ers_res)) {
0112             pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
0113                 pci_name(pdev));
0114             return ers_res;
0115         } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
0116             pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
0117             return ers_res;
0118         }
0119     }
0120 
0121     pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
0122     rc = zpci_clear_error_state(zdev);
0123     if (!rc) {
0124         pdev->error_state = pci_channel_io_normal;
0125     } else {
0126         pr_err("%s: Unblocking DMA failed\n", pci_name(pdev));
0127         /* Let's try a full reset instead */
0128         return PCI_ERS_RESULT_NEED_RESET;
0129     }
0130 
0131     return ers_res;
0132 }
0133 
0134 static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
0135                         struct pci_driver *driver)
0136 {
0137     pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
0138 
0139     pr_info("%s: Initiating reset\n", pci_name(pdev));
0140     if (zpci_hot_reset_device(to_zpci(pdev))) {
0141         pr_err("%s: The reset request failed\n", pci_name(pdev));
0142         return ers_res;
0143     }
0144     pdev->error_state = pci_channel_io_normal;
0145     ers_res = driver->err_handler->slot_reset(pdev);
0146     if (ers_result_indicates_abort(ers_res)) {
0147         pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
0148         return ers_res;
0149     }
0150 
0151     return ers_res;
0152 }
0153 
0154 /* zpci_event_attempt_error_recovery - Try to recover the given PCI function
0155  * @pdev: PCI function to recover currently in the error state
0156  *
0157  * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst.
0158  * With the simplification that recovery always happens per function
0159  * and the platform determines which functions are affected for
0160  * multi-function devices.
0161  */
0162 static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
0163 {
0164     pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
0165     struct pci_driver *driver;
0166 
0167     /*
0168      * Ensure that the PCI function is not removed concurrently, no driver
0169      * is unbound or probed and that userspace can't access its
0170      * configuration space while we perform recovery.
0171      */
0172     pci_dev_lock(pdev);
0173     if (pdev->error_state == pci_channel_io_perm_failure) {
0174         ers_res = PCI_ERS_RESULT_DISCONNECT;
0175         goto out_unlock;
0176     }
0177     pdev->error_state = pci_channel_io_frozen;
0178 
0179     if (is_passed_through(to_zpci(pdev))) {
0180         pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
0181             pci_name(pdev));
0182         goto out_unlock;
0183     }
0184 
0185     driver = to_pci_driver(pdev->dev.driver);
0186     if (!is_driver_supported(driver)) {
0187         if (!driver)
0188             pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
0189                 pci_name(pdev));
0190         else
0191             pr_info("%s: The %s driver bound to the device does not support error recovery\n",
0192                 pci_name(pdev),
0193                 driver->name);
0194         goto out_unlock;
0195     }
0196 
0197     ers_res = zpci_event_notify_error_detected(pdev, driver);
0198     if (ers_result_indicates_abort(ers_res))
0199         goto out_unlock;
0200 
0201     if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
0202         ers_res = zpci_event_do_error_state_clear(pdev, driver);
0203         if (ers_result_indicates_abort(ers_res))
0204             goto out_unlock;
0205     }
0206 
0207     if (ers_res == PCI_ERS_RESULT_NEED_RESET)
0208         ers_res = zpci_event_do_reset(pdev, driver);
0209 
0210     if (ers_res != PCI_ERS_RESULT_RECOVERED) {
0211         pr_err("%s: Automatic recovery failed; operator intervention is required\n",
0212                pci_name(pdev));
0213         goto out_unlock;
0214     }
0215 
0216     pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
0217     if (driver->err_handler->resume)
0218         driver->err_handler->resume(pdev);
0219 out_unlock:
0220     pci_dev_unlock(pdev);
0221 
0222     return ers_res;
0223 }
0224 
0225 /* zpci_event_io_failure - Report PCI channel failure state to driver
0226  * @pdev: PCI function for which to report
0227  * @es: PCI channel failure state to report
0228  */
0229 static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
0230 {
0231     struct pci_driver *driver;
0232 
0233     pci_dev_lock(pdev);
0234     pdev->error_state = es;
0235     /**
0236      * While vfio-pci's error_detected callback notifies user-space QEMU
0237      * reacts to this by freezing the guest. In an s390 environment PCI
0238      * errors are rarely fatal so this is overkill. Instead in the future
0239      * we will inject the error event and let the guest recover the device
0240      * itself.
0241      */
0242     if (is_passed_through(to_zpci(pdev)))
0243         goto out;
0244     driver = to_pci_driver(pdev->dev.driver);
0245     if (driver && driver->err_handler && driver->err_handler->error_detected)
0246         driver->err_handler->error_detected(pdev, pdev->error_state);
0247 out:
0248     pci_dev_unlock(pdev);
0249 }
0250 
0251 static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
0252 {
0253     struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
0254     struct pci_dev *pdev = NULL;
0255     pci_ers_result_t ers_res;
0256 
0257     zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
0258          ccdf->fid, ccdf->fh, ccdf->pec);
0259     zpci_err("error CCDF:\n");
0260     zpci_err_hex(ccdf, sizeof(*ccdf));
0261 
0262     if (zdev) {
0263         zpci_update_fh(zdev, ccdf->fh);
0264         if (zdev->zbus->bus)
0265             pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
0266     }
0267 
0268     pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
0269            pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
0270 
0271     if (!pdev)
0272         goto no_pdev;
0273 
0274     switch (ccdf->pec) {
0275     case 0x003a: /* Service Action or Error Recovery Successful */
0276         ers_res = zpci_event_attempt_error_recovery(pdev);
0277         if (ers_res != PCI_ERS_RESULT_RECOVERED)
0278             zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
0279         break;
0280     default:
0281         /*
0282          * Mark as frozen not permanently failed because the device
0283          * could be subsequently recovered by the platform.
0284          */
0285         zpci_event_io_failure(pdev, pci_channel_io_frozen);
0286         break;
0287     }
0288     pci_dev_put(pdev);
0289 no_pdev:
0290     zpci_zdev_put(zdev);
0291 }
0292 
0293 void zpci_event_error(void *data)
0294 {
0295     if (zpci_is_enabled())
0296         __zpci_event_error(data);
0297 }
0298 
0299 static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
0300 {
0301     zpci_update_fh(zdev, fh);
0302     /* Give the driver a hint that the function is
0303      * already unusable.
0304      */
0305     zpci_bus_remove_device(zdev, true);
0306     /* Even though the device is already gone we still
0307      * need to free zPCI resources as part of the disable.
0308      */
0309     if (zdev->dma_table)
0310         zpci_dma_exit_device(zdev);
0311     if (zdev_enabled(zdev))
0312         zpci_disable_device(zdev);
0313     zdev->state = ZPCI_FN_STATE_STANDBY;
0314 }
0315 
0316 static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
0317 {
0318     struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
0319     bool existing_zdev = !!zdev;
0320     enum zpci_state state;
0321 
0322     zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
0323          ccdf->fid, ccdf->fh, ccdf->pec);
0324     switch (ccdf->pec) {
0325     case 0x0301: /* Reserved|Standby -> Configured */
0326         if (!zdev) {
0327             zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
0328             if (IS_ERR(zdev))
0329                 break;
0330         } else {
0331             /* the configuration request may be stale */
0332             if (zdev->state != ZPCI_FN_STATE_STANDBY)
0333                 break;
0334             zdev->state = ZPCI_FN_STATE_CONFIGURED;
0335         }
0336         zpci_scan_configured_device(zdev, ccdf->fh);
0337         break;
0338     case 0x0302: /* Reserved -> Standby */
0339         if (!zdev)
0340             zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
0341         else
0342             zpci_update_fh(zdev, ccdf->fh);
0343         break;
0344     case 0x0303: /* Deconfiguration requested */
0345         if (zdev) {
0346             /* The event may have been queued before we confirgured
0347              * the device.
0348              */
0349             if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
0350                 break;
0351             zpci_update_fh(zdev, ccdf->fh);
0352             zpci_deconfigure_device(zdev);
0353         }
0354         break;
0355     case 0x0304: /* Configured -> Standby|Reserved */
0356         if (zdev) {
0357             /* The event may have been queued before we confirgured
0358              * the device.:
0359              */
0360             if (zdev->state == ZPCI_FN_STATE_CONFIGURED)
0361                 zpci_event_hard_deconfigured(zdev, ccdf->fh);
0362             /* The 0x0304 event may immediately reserve the device */
0363             if (!clp_get_state(zdev->fid, &state) &&
0364                 state == ZPCI_FN_STATE_RESERVED) {
0365                 zpci_device_reserved(zdev);
0366             }
0367         }
0368         break;
0369     case 0x0306: /* 0x308 or 0x302 for multiple devices */
0370         zpci_remove_reserved_devices();
0371         clp_scan_pci_devices();
0372         break;
0373     case 0x0308: /* Standby -> Reserved */
0374         if (!zdev)
0375             break;
0376         zpci_device_reserved(zdev);
0377         break;
0378     default:
0379         break;
0380     }
0381     if (existing_zdev)
0382         zpci_zdev_put(zdev);
0383 }
0384 
0385 void zpci_event_availability(void *data)
0386 {
0387     if (zpci_is_enabled())
0388         __zpci_event_availability(data);
0389 }