Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /*
0003  * This file implements the error recovery as a core part of PCIe error
0004  * reporting. When a PCIe error is delivered, an error message will be
0005  * collected and printed to console, then, an error recovery procedure
0006  * will be executed by following the PCI error recovery rules.
0007  *
0008  * Copyright (C) 2006 Intel Corp.
0009  *  Tom Long Nguyen (tom.l.nguyen@intel.com)
0010  *  Zhang Yanmin (yanmin.zhang@intel.com)
0011  */
0012 
0013 #define dev_fmt(fmt) "AER: " fmt
0014 
0015 #include <linux/pci.h>
0016 #include <linux/module.h>
0017 #include <linux/kernel.h>
0018 #include <linux/errno.h>
0019 #include <linux/aer.h>
0020 #include "portdrv.h"
0021 #include "../pci.h"
0022 
0023 static pci_ers_result_t merge_result(enum pci_ers_result orig,
0024                   enum pci_ers_result new)
0025 {
0026     if (new == PCI_ERS_RESULT_NO_AER_DRIVER)
0027         return PCI_ERS_RESULT_NO_AER_DRIVER;
0028 
0029     if (new == PCI_ERS_RESULT_NONE)
0030         return orig;
0031 
0032     switch (orig) {
0033     case PCI_ERS_RESULT_CAN_RECOVER:
0034     case PCI_ERS_RESULT_RECOVERED:
0035         orig = new;
0036         break;
0037     case PCI_ERS_RESULT_DISCONNECT:
0038         if (new == PCI_ERS_RESULT_NEED_RESET)
0039             orig = PCI_ERS_RESULT_NEED_RESET;
0040         break;
0041     default:
0042         break;
0043     }
0044 
0045     return orig;
0046 }
0047 
0048 static int report_error_detected(struct pci_dev *dev,
0049                  pci_channel_state_t state,
0050                  enum pci_ers_result *result)
0051 {
0052     struct pci_driver *pdrv;
0053     pci_ers_result_t vote;
0054     const struct pci_error_handlers *err_handler;
0055 
0056     device_lock(&dev->dev);
0057     pdrv = dev->driver;
0058     if (pci_dev_is_disconnected(dev)) {
0059         vote = PCI_ERS_RESULT_DISCONNECT;
0060     } else if (!pci_dev_set_io_state(dev, state)) {
0061         pci_info(dev, "can't recover (state transition %u -> %u invalid)\n",
0062             dev->error_state, state);
0063         vote = PCI_ERS_RESULT_NONE;
0064     } else if (!pdrv || !pdrv->err_handler ||
0065            !pdrv->err_handler->error_detected) {
0066         /*
0067          * If any device in the subtree does not have an error_detected
0068          * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent
0069          * error callbacks of "any" device in the subtree, and will
0070          * exit in the disconnected error state.
0071          */
0072         if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
0073             vote = PCI_ERS_RESULT_NO_AER_DRIVER;
0074             pci_info(dev, "can't recover (no error_detected callback)\n");
0075         } else {
0076             vote = PCI_ERS_RESULT_NONE;
0077         }
0078     } else {
0079         err_handler = pdrv->err_handler;
0080         vote = err_handler->error_detected(dev, state);
0081     }
0082     pci_uevent_ers(dev, vote);
0083     *result = merge_result(*result, vote);
0084     device_unlock(&dev->dev);
0085     return 0;
0086 }
0087 
0088 static int report_frozen_detected(struct pci_dev *dev, void *data)
0089 {
0090     return report_error_detected(dev, pci_channel_io_frozen, data);
0091 }
0092 
0093 static int report_normal_detected(struct pci_dev *dev, void *data)
0094 {
0095     return report_error_detected(dev, pci_channel_io_normal, data);
0096 }
0097 
0098 static int report_mmio_enabled(struct pci_dev *dev, void *data)
0099 {
0100     struct pci_driver *pdrv;
0101     pci_ers_result_t vote, *result = data;
0102     const struct pci_error_handlers *err_handler;
0103 
0104     device_lock(&dev->dev);
0105     pdrv = dev->driver;
0106     if (!pdrv ||
0107         !pdrv->err_handler ||
0108         !pdrv->err_handler->mmio_enabled)
0109         goto out;
0110 
0111     err_handler = pdrv->err_handler;
0112     vote = err_handler->mmio_enabled(dev);
0113     *result = merge_result(*result, vote);
0114 out:
0115     device_unlock(&dev->dev);
0116     return 0;
0117 }
0118 
0119 static int report_slot_reset(struct pci_dev *dev, void *data)
0120 {
0121     struct pci_driver *pdrv;
0122     pci_ers_result_t vote, *result = data;
0123     const struct pci_error_handlers *err_handler;
0124 
0125     device_lock(&dev->dev);
0126     pdrv = dev->driver;
0127     if (!pdrv ||
0128         !pdrv->err_handler ||
0129         !pdrv->err_handler->slot_reset)
0130         goto out;
0131 
0132     err_handler = pdrv->err_handler;
0133     vote = err_handler->slot_reset(dev);
0134     *result = merge_result(*result, vote);
0135 out:
0136     device_unlock(&dev->dev);
0137     return 0;
0138 }
0139 
0140 static int report_resume(struct pci_dev *dev, void *data)
0141 {
0142     struct pci_driver *pdrv;
0143     const struct pci_error_handlers *err_handler;
0144 
0145     device_lock(&dev->dev);
0146     pdrv = dev->driver;
0147     if (!pci_dev_set_io_state(dev, pci_channel_io_normal) ||
0148         !pdrv ||
0149         !pdrv->err_handler ||
0150         !pdrv->err_handler->resume)
0151         goto out;
0152 
0153     err_handler = pdrv->err_handler;
0154     err_handler->resume(dev);
0155 out:
0156     pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
0157     device_unlock(&dev->dev);
0158     return 0;
0159 }
0160 
0161 /**
0162  * pci_walk_bridge - walk bridges potentially AER affected
0163  * @bridge: bridge which may be a Port, an RCEC, or an RCiEP
0164  * @cb:     callback to be called for each device found
0165  * @userdata:   arbitrary pointer to be passed to callback
0166  *
0167  * If the device provided is a bridge, walk the subordinate bus, including
0168  * any bridged devices on buses under this bus.  Call the provided callback
0169  * on each device found.
0170  *
0171  * If the device provided has no subordinate bus, e.g., an RCEC or RCiEP,
0172  * call the callback on the device itself.
0173  */
0174 static void pci_walk_bridge(struct pci_dev *bridge,
0175                 int (*cb)(struct pci_dev *, void *),
0176                 void *userdata)
0177 {
0178     if (bridge->subordinate)
0179         pci_walk_bus(bridge->subordinate, cb, userdata);
0180     else
0181         cb(bridge, userdata);
0182 }
0183 
0184 pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
0185         pci_channel_state_t state,
0186         pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev))
0187 {
0188     int type = pci_pcie_type(dev);
0189     struct pci_dev *bridge;
0190     pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER;
0191     struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
0192 
0193     /*
0194      * If the error was detected by a Root Port, Downstream Port, RCEC,
0195      * or RCiEP, recovery runs on the device itself.  For Ports, that
0196      * also includes any subordinate devices.
0197      *
0198      * If it was detected by another device (Endpoint, etc), recovery
0199      * runs on the device and anything else under the same Port, i.e.,
0200      * everything under "bridge".
0201      */
0202     if (type == PCI_EXP_TYPE_ROOT_PORT ||
0203         type == PCI_EXP_TYPE_DOWNSTREAM ||
0204         type == PCI_EXP_TYPE_RC_EC ||
0205         type == PCI_EXP_TYPE_RC_END)
0206         bridge = dev;
0207     else
0208         bridge = pci_upstream_bridge(dev);
0209 
0210     pci_dbg(bridge, "broadcast error_detected message\n");
0211     if (state == pci_channel_io_frozen) {
0212         pci_walk_bridge(bridge, report_frozen_detected, &status);
0213         if (reset_subordinates(bridge) != PCI_ERS_RESULT_RECOVERED) {
0214             pci_warn(bridge, "subordinate device reset failed\n");
0215             goto failed;
0216         }
0217     } else {
0218         pci_walk_bridge(bridge, report_normal_detected, &status);
0219     }
0220 
0221     if (status == PCI_ERS_RESULT_CAN_RECOVER) {
0222         status = PCI_ERS_RESULT_RECOVERED;
0223         pci_dbg(bridge, "broadcast mmio_enabled message\n");
0224         pci_walk_bridge(bridge, report_mmio_enabled, &status);
0225     }
0226 
0227     if (status == PCI_ERS_RESULT_NEED_RESET) {
0228         /*
0229          * TODO: Should call platform-specific
0230          * functions to reset slot before calling
0231          * drivers' slot_reset callbacks?
0232          */
0233         status = PCI_ERS_RESULT_RECOVERED;
0234         pci_dbg(bridge, "broadcast slot_reset message\n");
0235         pci_walk_bridge(bridge, report_slot_reset, &status);
0236     }
0237 
0238     if (status != PCI_ERS_RESULT_RECOVERED)
0239         goto failed;
0240 
0241     pci_dbg(bridge, "broadcast resume message\n");
0242     pci_walk_bridge(bridge, report_resume, &status);
0243 
0244     /*
0245      * If we have native control of AER, clear error status in the device
0246      * that detected the error.  If the platform retained control of AER,
0247      * it is responsible for clearing this status.  In that case, the
0248      * signaling device may not even be visible to the OS.
0249      */
0250     if (host->native_aer || pcie_ports_native) {
0251         pcie_clear_device_status(dev);
0252         pci_aer_clear_nonfatal_status(dev);
0253     }
0254     pci_info(bridge, "device recovery successful\n");
0255     return status;
0256 
0257 failed:
0258     pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT);
0259 
0260     /* TODO: Should kernel panic here? */
0261     pci_info(bridge, "device recovery failed\n");
0262 
0263     return status;
0264 }