Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * PowerNV Platform dependent EEH operations
0004  *
0005  * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
0006  */
0007 
0008 #include <linux/atomic.h>
0009 #include <linux/debugfs.h>
0010 #include <linux/delay.h>
0011 #include <linux/export.h>
0012 #include <linux/init.h>
0013 #include <linux/interrupt.h>
0014 #include <linux/irqdomain.h>
0015 #include <linux/list.h>
0016 #include <linux/msi.h>
0017 #include <linux/of.h>
0018 #include <linux/pci.h>
0019 #include <linux/proc_fs.h>
0020 #include <linux/rbtree.h>
0021 #include <linux/sched.h>
0022 #include <linux/seq_file.h>
0023 #include <linux/spinlock.h>
0024 
0025 #include <asm/eeh.h>
0026 #include <asm/eeh_event.h>
0027 #include <asm/firmware.h>
0028 #include <asm/io.h>
0029 #include <asm/iommu.h>
0030 #include <asm/machdep.h>
0031 #include <asm/msi_bitmap.h>
0032 #include <asm/opal.h>
0033 #include <asm/ppc-pci.h>
0034 #include <asm/pnv-pci.h>
0035 
0036 #include "powernv.h"
0037 #include "pci.h"
0038 #include "../../../../drivers/pci/pci.h"
0039 
0040 static int eeh_event_irq = -EINVAL;
0041 
0042 static void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
0043 {
0044     dev_dbg(&pdev->dev, "EEH: Setting up device\n");
0045     eeh_probe_device(pdev);
0046 }
0047 
0048 static irqreturn_t pnv_eeh_event(int irq, void *data)
0049 {
0050     /*
0051      * We simply send a special EEH event if EEH has been
0052      * enabled. We don't care about EEH events until we've
0053      * finished processing the outstanding ones. Event processing
0054      * gets unmasked in next_error() if EEH is enabled.
0055      */
0056     disable_irq_nosync(irq);
0057 
0058     if (eeh_enabled())
0059         eeh_send_failure_event(NULL);
0060 
0061     return IRQ_HANDLED;
0062 }
0063 
0064 #ifdef CONFIG_DEBUG_FS
0065 static ssize_t pnv_eeh_ei_write(struct file *filp,
0066                 const char __user *user_buf,
0067                 size_t count, loff_t *ppos)
0068 {
0069     struct pci_controller *hose = filp->private_data;
0070     struct eeh_pe *pe;
0071     int pe_no, type, func;
0072     unsigned long addr, mask;
0073     char buf[50];
0074     int ret;
0075 
0076     if (!eeh_ops || !eeh_ops->err_inject)
0077         return -ENXIO;
0078 
0079     /* Copy over argument buffer */
0080     ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
0081     if (!ret)
0082         return -EFAULT;
0083 
0084     /* Retrieve parameters */
0085     ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
0086              &pe_no, &type, &func, &addr, &mask);
0087     if (ret != 5)
0088         return -EINVAL;
0089 
0090     /* Retrieve PE */
0091     pe = eeh_pe_get(hose, pe_no);
0092     if (!pe)
0093         return -ENODEV;
0094 
0095     /* Do error injection */
0096     ret = eeh_ops->err_inject(pe, type, func, addr, mask);
0097     return ret < 0 ? ret : count;
0098 }
0099 
0100 static const struct file_operations pnv_eeh_ei_fops = {
0101     .open   = simple_open,
0102     .llseek = no_llseek,
0103     .write  = pnv_eeh_ei_write,
0104 };
0105 
0106 static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val)
0107 {
0108     struct pci_controller *hose = data;
0109     struct pnv_phb *phb = hose->private_data;
0110 
0111     out_be64(phb->regs + offset, val);
0112     return 0;
0113 }
0114 
0115 static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val)
0116 {
0117     struct pci_controller *hose = data;
0118     struct pnv_phb *phb = hose->private_data;
0119 
0120     *val = in_be64(phb->regs + offset);
0121     return 0;
0122 }
0123 
0124 #define PNV_EEH_DBGFS_ENTRY(name, reg)              \
0125 static int pnv_eeh_dbgfs_set_##name(void *data, u64 val)    \
0126 {                               \
0127     return pnv_eeh_dbgfs_set(data, reg, val);       \
0128 }                               \
0129                                 \
0130 static int pnv_eeh_dbgfs_get_##name(void *data, u64 *val)   \
0131 {                               \
0132     return pnv_eeh_dbgfs_get(data, reg, val);       \
0133 }                               \
0134                                 \
0135 DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_dbgfs_ops_##name,       \
0136             pnv_eeh_dbgfs_get_##name,       \
0137                         pnv_eeh_dbgfs_set_##name,       \
0138             "0x%llx\n")
0139 
0140 PNV_EEH_DBGFS_ENTRY(outb, 0xD10);
0141 PNV_EEH_DBGFS_ENTRY(inbA, 0xD90);
0142 PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
0143 
0144 #endif /* CONFIG_DEBUG_FS */
0145 
0146 static void pnv_eeh_enable_phbs(void)
0147 {
0148     struct pci_controller *hose;
0149     struct pnv_phb *phb;
0150 
0151     list_for_each_entry(hose, &hose_list, list_node) {
0152         phb = hose->private_data;
0153         /*
0154          * If EEH is enabled, we're going to rely on that.
0155          * Otherwise, we restore to conventional mechanism
0156          * to clear frozen PE during PCI config access.
0157          */
0158         if (eeh_enabled())
0159             phb->flags |= PNV_PHB_FLAG_EEH;
0160         else
0161             phb->flags &= ~PNV_PHB_FLAG_EEH;
0162     }
0163 }
0164 
0165 /**
0166  * pnv_eeh_post_init - EEH platform dependent post initialization
0167  *
0168  * EEH platform dependent post initialization on powernv. When
0169  * the function is called, the EEH PEs and devices should have
0170  * been built. If the I/O cache staff has been built, EEH is
0171  * ready to supply service.
0172  */
0173 int pnv_eeh_post_init(void)
0174 {
0175     struct pci_controller *hose;
0176     struct pnv_phb *phb;
0177     int ret = 0;
0178 
0179     eeh_show_enabled();
0180 
0181     /* Register OPAL event notifier */
0182     eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
0183     if (eeh_event_irq < 0) {
0184         pr_err("%s: Can't register OPAL event interrupt (%d)\n",
0185                __func__, eeh_event_irq);
0186         return eeh_event_irq;
0187     }
0188 
0189     ret = request_irq(eeh_event_irq, pnv_eeh_event,
0190               IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL);
0191     if (ret < 0) {
0192         irq_dispose_mapping(eeh_event_irq);
0193         pr_err("%s: Can't request OPAL event interrupt (%d)\n",
0194                __func__, eeh_event_irq);
0195         return ret;
0196     }
0197 
0198     if (!eeh_enabled())
0199         disable_irq(eeh_event_irq);
0200 
0201     pnv_eeh_enable_phbs();
0202 
0203     list_for_each_entry(hose, &hose_list, list_node) {
0204         phb = hose->private_data;
0205 
0206         /* Create debugfs entries */
0207 #ifdef CONFIG_DEBUG_FS
0208         if (phb->has_dbgfs || !phb->dbgfs)
0209             continue;
0210 
0211         phb->has_dbgfs = 1;
0212         debugfs_create_file("err_injct", 0200,
0213                     phb->dbgfs, hose,
0214                     &pnv_eeh_ei_fops);
0215 
0216         debugfs_create_file("err_injct_outbound", 0600,
0217                     phb->dbgfs, hose,
0218                     &pnv_eeh_dbgfs_ops_outb);
0219         debugfs_create_file("err_injct_inboundA", 0600,
0220                     phb->dbgfs, hose,
0221                     &pnv_eeh_dbgfs_ops_inbA);
0222         debugfs_create_file("err_injct_inboundB", 0600,
0223                     phb->dbgfs, hose,
0224                     &pnv_eeh_dbgfs_ops_inbB);
0225 #endif /* CONFIG_DEBUG_FS */
0226     }
0227 
0228     return ret;
0229 }
0230 
0231 static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap)
0232 {
0233     int pos = PCI_CAPABILITY_LIST;
0234     int cnt = 48;   /* Maximal number of capabilities */
0235     u32 status, id;
0236 
0237     if (!pdn)
0238         return 0;
0239 
0240     /* Check if the device supports capabilities */
0241     pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status);
0242     if (!(status & PCI_STATUS_CAP_LIST))
0243         return 0;
0244 
0245     while (cnt--) {
0246         pnv_pci_cfg_read(pdn, pos, 1, &pos);
0247         if (pos < 0x40)
0248             break;
0249 
0250         pos &= ~3;
0251         pnv_pci_cfg_read(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
0252         if (id == 0xff)
0253             break;
0254 
0255         /* Found */
0256         if (id == cap)
0257             return pos;
0258 
0259         /* Next one */
0260         pos += PCI_CAP_LIST_NEXT;
0261     }
0262 
0263     return 0;
0264 }
0265 
0266 static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap)
0267 {
0268     struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
0269     u32 header;
0270     int pos = 256, ttl = (4096 - 256) / 8;
0271 
0272     if (!edev || !edev->pcie_cap)
0273         return 0;
0274     if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
0275         return 0;
0276     else if (!header)
0277         return 0;
0278 
0279     while (ttl-- > 0) {
0280         if (PCI_EXT_CAP_ID(header) == cap && pos)
0281             return pos;
0282 
0283         pos = PCI_EXT_CAP_NEXT(header);
0284         if (pos < 256)
0285             break;
0286 
0287         if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
0288             break;
0289     }
0290 
0291     return 0;
0292 }
0293 
0294 static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev)
0295 {
0296     struct pci_controller *hose = pdev->bus->sysdata;
0297     struct pnv_phb *phb = hose->private_data;
0298     struct pci_dev *parent = pdev->bus->self;
0299 
0300 #ifdef CONFIG_PCI_IOV
0301     /* for VFs we use the PF's PE as the upstream PE */
0302     if (pdev->is_virtfn)
0303         parent = pdev->physfn;
0304 #endif
0305 
0306     /* otherwise use the PE of our parent bridge */
0307     if (parent) {
0308         struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent);
0309 
0310         return eeh_pe_get(phb->hose, ioda_pe->pe_number);
0311     }
0312 
0313     return NULL;
0314 }
0315 
0316 /**
0317  * pnv_eeh_probe - Do probe on PCI device
0318  * @pdev: pci_dev to probe
0319  *
0320  * Create, or find the existing, eeh_dev for this pci_dev.
0321  */
0322 static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev)
0323 {
0324     struct pci_dn *pdn = pci_get_pdn(pdev);
0325     struct pci_controller *hose = pdn->phb;
0326     struct pnv_phb *phb = hose->private_data;
0327     struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
0328     struct eeh_pe *upstream_pe;
0329     uint32_t pcie_flags;
0330     int ret;
0331     int config_addr = (pdn->busno << 8) | (pdn->devfn);
0332 
0333     /*
0334      * When probing the root bridge, which doesn't have any
0335      * subordinate PCI devices. We don't have OF node for
0336      * the root bridge. So it's not reasonable to continue
0337      * the probing.
0338      */
0339     if (!edev || edev->pe)
0340         return NULL;
0341 
0342     /* already configured? */
0343     if (edev->pdev) {
0344         pr_debug("%s: found existing edev for %04x:%02x:%02x.%01x\n",
0345             __func__, hose->global_number, config_addr >> 8,
0346             PCI_SLOT(config_addr), PCI_FUNC(config_addr));
0347         return edev;
0348     }
0349 
0350     /* Skip for PCI-ISA bridge */
0351     if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
0352         return NULL;
0353 
0354     eeh_edev_dbg(edev, "Probing device\n");
0355 
0356     /* Initialize eeh device */
0357     edev->mode  &= 0xFFFFFF00;
0358     edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
0359     edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
0360     edev->af_cap   = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF);
0361     edev->aer_cap  = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
0362     if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
0363         edev->mode |= EEH_DEV_BRIDGE;
0364         if (edev->pcie_cap) {
0365             pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
0366                      2, &pcie_flags);
0367             pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
0368             if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
0369                 edev->mode |= EEH_DEV_ROOT_PORT;
0370             else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
0371                 edev->mode |= EEH_DEV_DS_PORT;
0372         }
0373     }
0374 
0375     edev->pe_config_addr = phb->ioda.pe_rmap[config_addr];
0376 
0377     upstream_pe = pnv_eeh_get_upstream_pe(pdev);
0378 
0379     /* Create PE */
0380     ret = eeh_pe_tree_insert(edev, upstream_pe);
0381     if (ret) {
0382         eeh_edev_warn(edev, "Failed to add device to PE (code %d)\n", ret);
0383         return NULL;
0384     }
0385 
0386     /*
0387      * If the PE contains any one of following adapters, the
0388      * PCI config space can't be accessed when dumping EEH log.
0389      * Otherwise, we will run into fenced PHB caused by shortage
0390      * of outbound credits in the adapter. The PCI config access
0391      * should be blocked until PE reset. MMIO access is dropped
0392      * by hardware certainly. In order to drop PCI config requests,
0393      * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
0394      * will be checked in the backend for PE state retrieval. If
0395      * the PE becomes frozen for the first time and the flag has
0396      * been set for the PE, we will set EEH_PE_CFG_BLOCKED for
0397      * that PE to block its config space.
0398      *
0399      * Broadcom BCM5718 2-ports NICs (14e4:1656)
0400      * Broadcom Austin 4-ports NICs (14e4:1657)
0401      * Broadcom Shiner 4-ports 1G NICs (14e4:168a)
0402      * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
0403      */
0404     if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
0405          pdn->device_id == 0x1656) ||
0406         (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
0407          pdn->device_id == 0x1657) ||
0408         (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
0409          pdn->device_id == 0x168a) ||
0410         (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
0411          pdn->device_id == 0x168e))
0412         edev->pe->state |= EEH_PE_CFG_RESTRICTED;
0413 
0414     /*
0415      * Cache the PE primary bus, which can't be fetched when
0416      * full hotplug is in progress. In that case, all child
0417      * PCI devices of the PE are expected to be removed prior
0418      * to PE reset.
0419      */
0420     if (!(edev->pe->state & EEH_PE_PRI_BUS)) {
0421         edev->pe->bus = pci_find_bus(hose->global_number,
0422                          pdn->busno);
0423         if (edev->pe->bus)
0424             edev->pe->state |= EEH_PE_PRI_BUS;
0425     }
0426 
0427     /*
0428      * Enable EEH explicitly so that we will do EEH check
0429      * while accessing I/O stuff
0430      */
0431     if (!eeh_has_flag(EEH_ENABLED)) {
0432         enable_irq(eeh_event_irq);
0433         pnv_eeh_enable_phbs();
0434         eeh_add_flag(EEH_ENABLED);
0435     }
0436 
0437     /* Save memory bars */
0438     eeh_save_bars(edev);
0439 
0440     eeh_edev_dbg(edev, "EEH enabled on device\n");
0441 
0442     return edev;
0443 }
0444 
0445 /**
0446  * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
0447  * @pe: EEH PE
0448  * @option: operation to be issued
0449  *
0450  * The function is used to control the EEH functionality globally.
0451  * Currently, following options are support according to PAPR:
0452  * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
0453  */
0454 static int pnv_eeh_set_option(struct eeh_pe *pe, int option)
0455 {
0456     struct pci_controller *hose = pe->phb;
0457     struct pnv_phb *phb = hose->private_data;
0458     bool freeze_pe = false;
0459     int opt;
0460     s64 rc;
0461 
0462     switch (option) {
0463     case EEH_OPT_DISABLE:
0464         return -EPERM;
0465     case EEH_OPT_ENABLE:
0466         return 0;
0467     case EEH_OPT_THAW_MMIO:
0468         opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
0469         break;
0470     case EEH_OPT_THAW_DMA:
0471         opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
0472         break;
0473     case EEH_OPT_FREEZE_PE:
0474         freeze_pe = true;
0475         opt = OPAL_EEH_ACTION_SET_FREEZE_ALL;
0476         break;
0477     default:
0478         pr_warn("%s: Invalid option %d\n", __func__, option);
0479         return -EINVAL;
0480     }
0481 
0482     /* Freeze master and slave PEs if PHB supports compound PEs */
0483     if (freeze_pe) {
0484         if (phb->freeze_pe) {
0485             phb->freeze_pe(phb, pe->addr);
0486             return 0;
0487         }
0488 
0489         rc = opal_pci_eeh_freeze_set(phb->opal_id, pe->addr, opt);
0490         if (rc != OPAL_SUCCESS) {
0491             pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
0492                 __func__, rc, phb->hose->global_number,
0493                 pe->addr);
0494             return -EIO;
0495         }
0496 
0497         return 0;
0498     }
0499 
0500     /* Unfreeze master and slave PEs if PHB supports */
0501     if (phb->unfreeze_pe)
0502         return phb->unfreeze_pe(phb, pe->addr, opt);
0503 
0504     rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe->addr, opt);
0505     if (rc != OPAL_SUCCESS) {
0506         pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n",
0507             __func__, rc, option, phb->hose->global_number,
0508             pe->addr);
0509         return -EIO;
0510     }
0511 
0512     return 0;
0513 }
0514 
0515 static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
0516 {
0517     struct pnv_phb *phb = pe->phb->private_data;
0518     s64 rc;
0519 
0520     rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
0521                      phb->diag_data_size);
0522     if (rc != OPAL_SUCCESS)
0523         pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",
0524             __func__, rc, pe->phb->global_number);
0525 }
0526 
0527 static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
0528 {
0529     struct pnv_phb *phb = pe->phb->private_data;
0530     u8 fstate = 0;
0531     __be16 pcierr = 0;
0532     s64 rc;
0533     int result = 0;
0534 
0535     rc = opal_pci_eeh_freeze_status(phb->opal_id,
0536                     pe->addr,
0537                     &fstate,
0538                     &pcierr,
0539                     NULL);
0540     if (rc != OPAL_SUCCESS) {
0541         pr_warn("%s: Failure %lld getting PHB#%x state\n",
0542             __func__, rc, phb->hose->global_number);
0543         return EEH_STATE_NOT_SUPPORT;
0544     }
0545 
0546     /*
0547      * Check PHB state. If the PHB is frozen for the
0548      * first time, to dump the PHB diag-data.
0549      */
0550     if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
0551         result = (EEH_STATE_MMIO_ACTIVE  |
0552               EEH_STATE_DMA_ACTIVE   |
0553               EEH_STATE_MMIO_ENABLED |
0554               EEH_STATE_DMA_ENABLED);
0555     } else if (!(pe->state & EEH_PE_ISOLATED)) {
0556         eeh_pe_mark_isolated(pe);
0557         pnv_eeh_get_phb_diag(pe);
0558 
0559         if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
0560             pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
0561     }
0562 
0563     return result;
0564 }
0565 
0566 static int pnv_eeh_get_pe_state(struct eeh_pe *pe)
0567 {
0568     struct pnv_phb *phb = pe->phb->private_data;
0569     u8 fstate = 0;
0570     __be16 pcierr = 0;
0571     s64 rc;
0572     int result;
0573 
0574     /*
0575      * We don't clobber hardware frozen state until PE
0576      * reset is completed. In order to keep EEH core
0577      * moving forward, we have to return operational
0578      * state during PE reset.
0579      */
0580     if (pe->state & EEH_PE_RESET) {
0581         result = (EEH_STATE_MMIO_ACTIVE  |
0582               EEH_STATE_DMA_ACTIVE   |
0583               EEH_STATE_MMIO_ENABLED |
0584               EEH_STATE_DMA_ENABLED);
0585         return result;
0586     }
0587 
0588     /*
0589      * Fetch PE state from hardware. If the PHB
0590      * supports compound PE, let it handle that.
0591      */
0592     if (phb->get_pe_state) {
0593         fstate = phb->get_pe_state(phb, pe->addr);
0594     } else {
0595         rc = opal_pci_eeh_freeze_status(phb->opal_id,
0596                         pe->addr,
0597                         &fstate,
0598                         &pcierr,
0599                         NULL);
0600         if (rc != OPAL_SUCCESS) {
0601             pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
0602                 __func__, rc, phb->hose->global_number,
0603                 pe->addr);
0604             return EEH_STATE_NOT_SUPPORT;
0605         }
0606     }
0607 
0608     /* Figure out state */
0609     switch (fstate) {
0610     case OPAL_EEH_STOPPED_NOT_FROZEN:
0611         result = (EEH_STATE_MMIO_ACTIVE  |
0612               EEH_STATE_DMA_ACTIVE   |
0613               EEH_STATE_MMIO_ENABLED |
0614               EEH_STATE_DMA_ENABLED);
0615         break;
0616     case OPAL_EEH_STOPPED_MMIO_FREEZE:
0617         result = (EEH_STATE_DMA_ACTIVE |
0618               EEH_STATE_DMA_ENABLED);
0619         break;
0620     case OPAL_EEH_STOPPED_DMA_FREEZE:
0621         result = (EEH_STATE_MMIO_ACTIVE |
0622               EEH_STATE_MMIO_ENABLED);
0623         break;
0624     case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
0625         result = 0;
0626         break;
0627     case OPAL_EEH_STOPPED_RESET:
0628         result = EEH_STATE_RESET_ACTIVE;
0629         break;
0630     case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
0631         result = EEH_STATE_UNAVAILABLE;
0632         break;
0633     case OPAL_EEH_STOPPED_PERM_UNAVAIL:
0634         result = EEH_STATE_NOT_SUPPORT;
0635         break;
0636     default:
0637         result = EEH_STATE_NOT_SUPPORT;
0638         pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
0639             __func__, phb->hose->global_number,
0640             pe->addr, fstate);
0641     }
0642 
0643     /*
0644      * If PHB supports compound PE, to freeze all
0645      * slave PEs for consistency.
0646      *
0647      * If the PE is switching to frozen state for the
0648      * first time, to dump the PHB diag-data.
0649      */
0650     if (!(result & EEH_STATE_NOT_SUPPORT) &&
0651         !(result & EEH_STATE_UNAVAILABLE) &&
0652         !(result & EEH_STATE_MMIO_ACTIVE) &&
0653         !(result & EEH_STATE_DMA_ACTIVE)  &&
0654         !(pe->state & EEH_PE_ISOLATED)) {
0655         if (phb->freeze_pe)
0656             phb->freeze_pe(phb, pe->addr);
0657 
0658         eeh_pe_mark_isolated(pe);
0659         pnv_eeh_get_phb_diag(pe);
0660 
0661         if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
0662             pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
0663     }
0664 
0665     return result;
0666 }
0667 
0668 /**
0669  * pnv_eeh_get_state - Retrieve PE state
0670  * @pe: EEH PE
0671  * @delay: delay while PE state is temporarily unavailable
0672  *
0673  * Retrieve the state of the specified PE. For IODA-compitable
0674  * platform, it should be retrieved from IODA table. Therefore,
0675  * we prefer passing down to hardware implementation to handle
0676  * it.
0677  */
0678 static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay)
0679 {
0680     int ret;
0681 
0682     if (pe->type & EEH_PE_PHB)
0683         ret = pnv_eeh_get_phb_state(pe);
0684     else
0685         ret = pnv_eeh_get_pe_state(pe);
0686 
0687     if (!delay)
0688         return ret;
0689 
0690     /*
0691      * If the PE state is temporarily unavailable,
0692      * to inform the EEH core delay for default
0693      * period (1 second)
0694      */
0695     *delay = 0;
0696     if (ret & EEH_STATE_UNAVAILABLE)
0697         *delay = 1000;
0698 
0699     return ret;
0700 }
0701 
0702 static s64 pnv_eeh_poll(unsigned long id)
0703 {
0704     s64 rc = OPAL_HARDWARE;
0705 
0706     while (1) {
0707         rc = opal_pci_poll(id);
0708         if (rc <= 0)
0709             break;
0710 
0711         if (system_state < SYSTEM_RUNNING)
0712             udelay(1000 * rc);
0713         else
0714             msleep(rc);
0715     }
0716 
0717     return rc;
0718 }
0719 
0720 int pnv_eeh_phb_reset(struct pci_controller *hose, int option)
0721 {
0722     struct pnv_phb *phb = hose->private_data;
0723     s64 rc = OPAL_HARDWARE;
0724 
0725     pr_debug("%s: Reset PHB#%x, option=%d\n",
0726          __func__, hose->global_number, option);
0727 
0728     /* Issue PHB complete reset request */
0729     if (option == EEH_RESET_FUNDAMENTAL ||
0730         option == EEH_RESET_HOT)
0731         rc = opal_pci_reset(phb->opal_id,
0732                     OPAL_RESET_PHB_COMPLETE,
0733                     OPAL_ASSERT_RESET);
0734     else if (option == EEH_RESET_DEACTIVATE)
0735         rc = opal_pci_reset(phb->opal_id,
0736                     OPAL_RESET_PHB_COMPLETE,
0737                     OPAL_DEASSERT_RESET);
0738     if (rc < 0)
0739         goto out;
0740 
0741     /*
0742      * Poll state of the PHB until the request is done
0743      * successfully. The PHB reset is usually PHB complete
0744      * reset followed by hot reset on root bus. So we also
0745      * need the PCI bus settlement delay.
0746      */
0747     if (rc > 0)
0748         rc = pnv_eeh_poll(phb->opal_id);
0749     if (option == EEH_RESET_DEACTIVATE) {
0750         if (system_state < SYSTEM_RUNNING)
0751             udelay(1000 * EEH_PE_RST_SETTLE_TIME);
0752         else
0753             msleep(EEH_PE_RST_SETTLE_TIME);
0754     }
0755 out:
0756     if (rc != OPAL_SUCCESS)
0757         return -EIO;
0758 
0759     return 0;
0760 }
0761 
0762 static int pnv_eeh_root_reset(struct pci_controller *hose, int option)
0763 {
0764     struct pnv_phb *phb = hose->private_data;
0765     s64 rc = OPAL_HARDWARE;
0766 
0767     pr_debug("%s: Reset PHB#%x, option=%d\n",
0768          __func__, hose->global_number, option);
0769 
0770     /*
0771      * During the reset deassert time, we needn't care
0772      * the reset scope because the firmware does nothing
0773      * for fundamental or hot reset during deassert phase.
0774      */
0775     if (option == EEH_RESET_FUNDAMENTAL)
0776         rc = opal_pci_reset(phb->opal_id,
0777                     OPAL_RESET_PCI_FUNDAMENTAL,
0778                     OPAL_ASSERT_RESET);
0779     else if (option == EEH_RESET_HOT)
0780         rc = opal_pci_reset(phb->opal_id,
0781                     OPAL_RESET_PCI_HOT,
0782                     OPAL_ASSERT_RESET);
0783     else if (option == EEH_RESET_DEACTIVATE)
0784         rc = opal_pci_reset(phb->opal_id,
0785                     OPAL_RESET_PCI_HOT,
0786                     OPAL_DEASSERT_RESET);
0787     if (rc < 0)
0788         goto out;
0789 
0790     /* Poll state of the PHB until the request is done */
0791     if (rc > 0)
0792         rc = pnv_eeh_poll(phb->opal_id);
0793     if (option == EEH_RESET_DEACTIVATE)
0794         msleep(EEH_PE_RST_SETTLE_TIME);
0795 out:
0796     if (rc != OPAL_SUCCESS)
0797         return -EIO;
0798 
0799     return 0;
0800 }
0801 
0802 static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
0803 {
0804     struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
0805     struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
0806     int aer = edev ? edev->aer_cap : 0;
0807     u32 ctrl;
0808 
0809     pr_debug("%s: Secondary Reset PCI bus %04x:%02x with option %d\n",
0810          __func__, pci_domain_nr(dev->bus),
0811          dev->bus->number, option);
0812 
0813     switch (option) {
0814     case EEH_RESET_FUNDAMENTAL:
0815     case EEH_RESET_HOT:
0816         /* Don't report linkDown event */
0817         if (aer) {
0818             eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK,
0819                          4, &ctrl);
0820             ctrl |= PCI_ERR_UNC_SURPDN;
0821             eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK,
0822                           4, ctrl);
0823         }
0824 
0825         eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl);
0826         ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
0827         eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl);
0828 
0829         msleep(EEH_PE_RST_HOLD_TIME);
0830         break;
0831     case EEH_RESET_DEACTIVATE:
0832         eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl);
0833         ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
0834         eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl);
0835 
0836         msleep(EEH_PE_RST_SETTLE_TIME);
0837 
0838         /* Continue reporting linkDown event */
0839         if (aer) {
0840             eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK,
0841                          4, &ctrl);
0842             ctrl &= ~PCI_ERR_UNC_SURPDN;
0843             eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK,
0844                           4, ctrl);
0845         }
0846 
0847         break;
0848     }
0849 
0850     return 0;
0851 }
0852 
0853 static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option)
0854 {
0855     struct pci_controller *hose = pci_bus_to_host(pdev->bus);
0856     struct pnv_phb *phb = hose->private_data;
0857     struct device_node *dn = pci_device_to_OF_node(pdev);
0858     uint64_t id = PCI_SLOT_ID(phb->opal_id,
0859                   (pdev->bus->number << 8) | pdev->devfn);
0860     uint8_t scope;
0861     int64_t rc;
0862 
0863     /* Hot reset to the bus if firmware cannot handle */
0864     if (!dn || !of_get_property(dn, "ibm,reset-by-firmware", NULL))
0865         return __pnv_eeh_bridge_reset(pdev, option);
0866 
0867     pr_debug("%s: FW reset PCI bus %04x:%02x with option %d\n",
0868          __func__, pci_domain_nr(pdev->bus),
0869          pdev->bus->number, option);
0870 
0871     switch (option) {
0872     case EEH_RESET_FUNDAMENTAL:
0873         scope = OPAL_RESET_PCI_FUNDAMENTAL;
0874         break;
0875     case EEH_RESET_HOT:
0876         scope = OPAL_RESET_PCI_HOT;
0877         break;
0878     case EEH_RESET_DEACTIVATE:
0879         return 0;
0880     default:
0881         dev_dbg(&pdev->dev, "%s: Unsupported reset %d\n",
0882             __func__, option);
0883         return -EINVAL;
0884     }
0885 
0886     rc = opal_pci_reset(id, scope, OPAL_ASSERT_RESET);
0887     if (rc <= OPAL_SUCCESS)
0888         goto out;
0889 
0890     rc = pnv_eeh_poll(id);
0891 out:
0892     return (rc == OPAL_SUCCESS) ? 0 : -EIO;
0893 }
0894 
0895 void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
0896 {
0897     struct pci_controller *hose;
0898 
0899     if (pci_is_root_bus(dev->bus)) {
0900         hose = pci_bus_to_host(dev->bus);
0901         pnv_eeh_root_reset(hose, EEH_RESET_HOT);
0902         pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
0903     } else {
0904         pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
0905         pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
0906     }
0907 }
0908 
0909 static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type,
0910                      int pos, u16 mask)
0911 {
0912     struct eeh_dev *edev = pdn->edev;
0913     int i, status = 0;
0914 
0915     /* Wait for Transaction Pending bit to be cleared */
0916     for (i = 0; i < 4; i++) {
0917         eeh_ops->read_config(edev, pos, 2, &status);
0918         if (!(status & mask))
0919             return;
0920 
0921         msleep((1 << i) * 100);
0922     }
0923 
0924     pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n",
0925         __func__, type,
0926         pdn->phb->global_number, pdn->busno,
0927         PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
0928 }
0929 
0930 static int pnv_eeh_do_flr(struct pci_dn *pdn, int option)
0931 {
0932     struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
0933     u32 reg = 0;
0934 
0935     if (WARN_ON(!edev->pcie_cap))
0936         return -ENOTTY;
0937 
0938     eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCAP, 4, &reg);
0939     if (!(reg & PCI_EXP_DEVCAP_FLR))
0940         return -ENOTTY;
0941 
0942     switch (option) {
0943     case EEH_RESET_HOT:
0944     case EEH_RESET_FUNDAMENTAL:
0945         pnv_eeh_wait_for_pending(pdn, "",
0946                      edev->pcie_cap + PCI_EXP_DEVSTA,
0947                      PCI_EXP_DEVSTA_TRPND);
0948         eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
0949                      4, &reg);
0950         reg |= PCI_EXP_DEVCTL_BCR_FLR;
0951         eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
0952                       4, reg);
0953         msleep(EEH_PE_RST_HOLD_TIME);
0954         break;
0955     case EEH_RESET_DEACTIVATE:
0956         eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
0957                      4, &reg);
0958         reg &= ~PCI_EXP_DEVCTL_BCR_FLR;
0959         eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
0960                       4, reg);
0961         msleep(EEH_PE_RST_SETTLE_TIME);
0962         break;
0963     }
0964 
0965     return 0;
0966 }
0967 
0968 static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option)
0969 {
0970     struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
0971     u32 cap = 0;
0972 
0973     if (WARN_ON(!edev->af_cap))
0974         return -ENOTTY;
0975 
0976     eeh_ops->read_config(edev, edev->af_cap + PCI_AF_CAP, 1, &cap);
0977     if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR))
0978         return -ENOTTY;
0979 
0980     switch (option) {
0981     case EEH_RESET_HOT:
0982     case EEH_RESET_FUNDAMENTAL:
0983         /*
0984          * Wait for Transaction Pending bit to clear. A word-aligned
0985          * test is used, so we use the control offset rather than status
0986          * and shift the test bit to match.
0987          */
0988         pnv_eeh_wait_for_pending(pdn, "AF",
0989                      edev->af_cap + PCI_AF_CTRL,
0990                      PCI_AF_STATUS_TP << 8);
0991         eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL,
0992                       1, PCI_AF_CTRL_FLR);
0993         msleep(EEH_PE_RST_HOLD_TIME);
0994         break;
0995     case EEH_RESET_DEACTIVATE:
0996         eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL, 1, 0);
0997         msleep(EEH_PE_RST_SETTLE_TIME);
0998         break;
0999     }
1000 
1001     return 0;
1002 }
1003 
1004 static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option)
1005 {
1006     struct eeh_dev *edev;
1007     struct pci_dn *pdn;
1008     int ret;
1009 
1010     /* The VF PE should have only one child device */
1011     edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
1012     pdn = eeh_dev_to_pdn(edev);
1013     if (!pdn)
1014         return -ENXIO;
1015 
1016     ret = pnv_eeh_do_flr(pdn, option);
1017     if (!ret)
1018         return ret;
1019 
1020     return pnv_eeh_do_af_flr(pdn, option);
1021 }
1022 
1023 /**
1024  * pnv_eeh_reset - Reset the specified PE
1025  * @pe: EEH PE
1026  * @option: reset option
1027  *
1028  * Do reset on the indicated PE. For PCI bus sensitive PE,
1029  * we need to reset the parent p2p bridge. The PHB has to
1030  * be reinitialized if the p2p bridge is root bridge. For
1031  * PCI device sensitive PE, we will try to reset the device
1032  * through FLR. For now, we don't have OPAL APIs to do HARD
1033  * reset yet, so all reset would be SOFT (HOT) reset.
1034  */
1035 static int pnv_eeh_reset(struct eeh_pe *pe, int option)
1036 {
1037     struct pci_controller *hose = pe->phb;
1038     struct pnv_phb *phb;
1039     struct pci_bus *bus;
1040     int64_t rc;
1041 
1042     /*
1043      * For PHB reset, we always have complete reset. For those PEs whose
1044      * primary bus derived from root complex (root bus) or root port
1045      * (usually bus#1), we apply hot or fundamental reset on the root port.
1046      * For other PEs, we always have hot reset on the PE primary bus.
1047      *
1048      * Here, we have different design to pHyp, which always clear the
1049      * frozen state during PE reset. However, the good idea here from
1050      * benh is to keep frozen state before we get PE reset done completely
1051      * (until BAR restore). With the frozen state, HW drops illegal IO
1052      * or MMIO access, which can incur recursive frozen PE during PE
1053      * reset. The side effect is that EEH core has to clear the frozen
1054      * state explicitly after BAR restore.
1055      */
1056     if (pe->type & EEH_PE_PHB)
1057         return pnv_eeh_phb_reset(hose, option);
1058 
1059     /*
1060      * The frozen PE might be caused by PAPR error injection
1061      * registers, which are expected to be cleared after hitting
1062      * frozen PE as stated in the hardware spec. Unfortunately,
1063      * that's not true on P7IOC. So we have to clear it manually
1064      * to avoid recursive EEH errors during recovery.
1065      */
1066     phb = hose->private_data;
1067     if (phb->model == PNV_PHB_MODEL_P7IOC &&
1068         (option == EEH_RESET_HOT ||
1069          option == EEH_RESET_FUNDAMENTAL)) {
1070         rc = opal_pci_reset(phb->opal_id,
1071                     OPAL_RESET_PHB_ERROR,
1072                     OPAL_ASSERT_RESET);
1073         if (rc != OPAL_SUCCESS) {
1074             pr_warn("%s: Failure %lld clearing error injection registers\n",
1075                 __func__, rc);
1076             return -EIO;
1077         }
1078     }
1079 
1080     if (pe->type & EEH_PE_VF)
1081         return pnv_eeh_reset_vf_pe(pe, option);
1082 
1083     bus = eeh_pe_bus_get(pe);
1084     if (!bus) {
1085         pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
1086             __func__, pe->phb->global_number, pe->addr);
1087         return -EIO;
1088     }
1089 
1090     if (pci_is_root_bus(bus))
1091         return pnv_eeh_root_reset(hose, option);
1092 
1093     /*
1094      * For hot resets try use the generic PCI error recovery reset
1095      * functions. These correctly handles the case where the secondary
1096      * bus is behind a hotplug slot and it will use the slot provided
1097      * reset methods to prevent spurious hotplug events during the reset.
1098      *
1099      * Fundamental resets need to be handled internally to EEH since the
1100      * PCI core doesn't really have a concept of a fundamental reset,
1101      * mainly because there's no standard way to generate one. Only a
1102      * few devices require an FRESET so it should be fine.
1103      */
1104     if (option != EEH_RESET_FUNDAMENTAL) {
1105         /*
1106          * NB: Skiboot and pnv_eeh_bridge_reset() also no-op the
1107          *     de-assert step. It's like the OPAL reset API was
1108          *     poorly designed or something...
1109          */
1110         if (option == EEH_RESET_DEACTIVATE)
1111             return 0;
1112 
1113         rc = pci_bus_error_reset(bus->self);
1114         if (!rc)
1115             return 0;
1116     }
1117 
1118     /* otherwise, use the generic bridge reset. this might call into FW */
1119     if (pci_is_root_bus(bus->parent))
1120         return pnv_eeh_root_reset(hose, option);
1121     return pnv_eeh_bridge_reset(bus->self, option);
1122 }
1123 
1124 /**
1125  * pnv_eeh_get_log - Retrieve error log
1126  * @pe: EEH PE
1127  * @severity: temporary or permanent error log
1128  * @drv_log: driver log to be combined with retrieved error log
1129  * @len: length of driver log
1130  *
1131  * Retrieve the temporary or permanent error from the PE.
1132  */
1133 static int pnv_eeh_get_log(struct eeh_pe *pe, int severity,
1134                char *drv_log, unsigned long len)
1135 {
1136     if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
1137         pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
1138 
1139     return 0;
1140 }
1141 
1142 /**
1143  * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
1144  * @pe: EEH PE
1145  *
1146  * The function will be called to reconfigure the bridges included
1147  * in the specified PE so that the mulfunctional PE would be recovered
1148  * again.
1149  */
1150 static int pnv_eeh_configure_bridge(struct eeh_pe *pe)
1151 {
1152     return 0;
1153 }
1154 
1155 /**
1156  * pnv_pe_err_inject - Inject specified error to the indicated PE
1157  * @pe: the indicated PE
1158  * @type: error type
1159  * @func: specific error type
1160  * @addr: address
1161  * @mask: address mask
1162  *
1163  * The routine is called to inject specified error, which is
1164  * determined by @type and @func, to the indicated PE for
1165  * testing purpose.
1166  */
1167 static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
1168                   unsigned long addr, unsigned long mask)
1169 {
1170     struct pci_controller *hose = pe->phb;
1171     struct pnv_phb *phb = hose->private_data;
1172     s64 rc;
1173 
1174     if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
1175         type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
1176         pr_warn("%s: Invalid error type %d\n",
1177             __func__, type);
1178         return -ERANGE;
1179     }
1180 
1181     if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||
1182         func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
1183         pr_warn("%s: Invalid error function %d\n",
1184             __func__, func);
1185         return -ERANGE;
1186     }
1187 
1188     /* Firmware supports error injection ? */
1189     if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
1190         pr_warn("%s: Firmware doesn't support error injection\n",
1191             __func__);
1192         return -ENXIO;
1193     }
1194 
1195     /* Do error injection */
1196     rc = opal_pci_err_inject(phb->opal_id, pe->addr,
1197                  type, func, addr, mask);
1198     if (rc != OPAL_SUCCESS) {
1199         pr_warn("%s: Failure %lld injecting error "
1200             "%d-%d to PHB#%x-PE#%x\n",
1201             __func__, rc, type, func,
1202             hose->global_number, pe->addr);
1203         return -EIO;
1204     }
1205 
1206     return 0;
1207 }
1208 
1209 static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn)
1210 {
1211     struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
1212 
1213     if (!edev || !edev->pe)
1214         return false;
1215 
1216     /*
1217      * We will issue FLR or AF FLR to all VFs, which are contained
1218      * in VF PE. It relies on the EEH PCI config accessors. So we
1219      * can't block them during the window.
1220      */
1221     if (edev->physfn && (edev->pe->state & EEH_PE_RESET))
1222         return false;
1223 
1224     if (edev->pe->state & EEH_PE_CFG_BLOCKED)
1225         return true;
1226 
1227     return false;
1228 }
1229 
1230 static int pnv_eeh_read_config(struct eeh_dev *edev,
1231                    int where, int size, u32 *val)
1232 {
1233     struct pci_dn *pdn = eeh_dev_to_pdn(edev);
1234 
1235     if (!pdn)
1236         return PCIBIOS_DEVICE_NOT_FOUND;
1237 
1238     if (pnv_eeh_cfg_blocked(pdn)) {
1239         *val = 0xFFFFFFFF;
1240         return PCIBIOS_SET_FAILED;
1241     }
1242 
1243     return pnv_pci_cfg_read(pdn, where, size, val);
1244 }
1245 
1246 static int pnv_eeh_write_config(struct eeh_dev *edev,
1247                 int where, int size, u32 val)
1248 {
1249     struct pci_dn *pdn = eeh_dev_to_pdn(edev);
1250 
1251     if (!pdn)
1252         return PCIBIOS_DEVICE_NOT_FOUND;
1253 
1254     if (pnv_eeh_cfg_blocked(pdn))
1255         return PCIBIOS_SET_FAILED;
1256 
1257     return pnv_pci_cfg_write(pdn, where, size, val);
1258 }
1259 
1260 static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data)
1261 {
1262     /* GEM */
1263     if (data->gemXfir || data->gemRfir ||
1264         data->gemRirqfir || data->gemMask || data->gemRwof)
1265         pr_info("  GEM: %016llx %016llx %016llx %016llx %016llx\n",
1266             be64_to_cpu(data->gemXfir),
1267             be64_to_cpu(data->gemRfir),
1268             be64_to_cpu(data->gemRirqfir),
1269             be64_to_cpu(data->gemMask),
1270             be64_to_cpu(data->gemRwof));
1271 
1272     /* LEM */
1273     if (data->lemFir || data->lemErrMask ||
1274         data->lemAction0 || data->lemAction1 || data->lemWof)
1275         pr_info("  LEM: %016llx %016llx %016llx %016llx %016llx\n",
1276             be64_to_cpu(data->lemFir),
1277             be64_to_cpu(data->lemErrMask),
1278             be64_to_cpu(data->lemAction0),
1279             be64_to_cpu(data->lemAction1),
1280             be64_to_cpu(data->lemWof));
1281 }
1282 
1283 static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose)
1284 {
1285     struct pnv_phb *phb = hose->private_data;
1286     struct OpalIoP7IOCErrorData *data =
1287         (struct OpalIoP7IOCErrorData*)phb->diag_data;
1288     long rc;
1289 
1290     rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
1291     if (rc != OPAL_SUCCESS) {
1292         pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
1293             __func__, phb->hub_id, rc);
1294         return;
1295     }
1296 
1297     switch (be16_to_cpu(data->type)) {
1298     case OPAL_P7IOC_DIAG_TYPE_RGC:
1299         pr_info("P7IOC diag-data for RGC\n\n");
1300         pnv_eeh_dump_hub_diag_common(data);
1301         if (data->rgc.rgcStatus || data->rgc.rgcLdcp)
1302             pr_info("  RGC: %016llx %016llx\n",
1303                 be64_to_cpu(data->rgc.rgcStatus),
1304                 be64_to_cpu(data->rgc.rgcLdcp));
1305         break;
1306     case OPAL_P7IOC_DIAG_TYPE_BI:
1307         pr_info("P7IOC diag-data for BI %s\n\n",
1308             data->bi.biDownbound ? "Downbound" : "Upbound");
1309         pnv_eeh_dump_hub_diag_common(data);
1310         if (data->bi.biLdcp0 || data->bi.biLdcp1 ||
1311             data->bi.biLdcp2 || data->bi.biFenceStatus)
1312             pr_info("  BI:  %016llx %016llx %016llx %016llx\n",
1313                 be64_to_cpu(data->bi.biLdcp0),
1314                 be64_to_cpu(data->bi.biLdcp1),
1315                 be64_to_cpu(data->bi.biLdcp2),
1316                 be64_to_cpu(data->bi.biFenceStatus));
1317         break;
1318     case OPAL_P7IOC_DIAG_TYPE_CI:
1319         pr_info("P7IOC diag-data for CI Port %d\n\n",
1320             data->ci.ciPort);
1321         pnv_eeh_dump_hub_diag_common(data);
1322         if (data->ci.ciPortStatus || data->ci.ciPortLdcp)
1323             pr_info("  CI:  %016llx %016llx\n",
1324                 be64_to_cpu(data->ci.ciPortStatus),
1325                 be64_to_cpu(data->ci.ciPortLdcp));
1326         break;
1327     case OPAL_P7IOC_DIAG_TYPE_MISC:
1328         pr_info("P7IOC diag-data for MISC\n\n");
1329         pnv_eeh_dump_hub_diag_common(data);
1330         break;
1331     case OPAL_P7IOC_DIAG_TYPE_I2C:
1332         pr_info("P7IOC diag-data for I2C\n\n");
1333         pnv_eeh_dump_hub_diag_common(data);
1334         break;
1335     default:
1336         pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
1337             __func__, phb->hub_id, data->type);
1338     }
1339 }
1340 
1341 static int pnv_eeh_get_pe(struct pci_controller *hose,
1342               u16 pe_no, struct eeh_pe **pe)
1343 {
1344     struct pnv_phb *phb = hose->private_data;
1345     struct pnv_ioda_pe *pnv_pe;
1346     struct eeh_pe *dev_pe;
1347 
1348     /*
1349      * If PHB supports compound PE, to fetch
1350      * the master PE because slave PE is invisible
1351      * to EEH core.
1352      */
1353     pnv_pe = &phb->ioda.pe_array[pe_no];
1354     if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
1355         pnv_pe = pnv_pe->master;
1356         WARN_ON(!pnv_pe ||
1357             !(pnv_pe->flags & PNV_IODA_PE_MASTER));
1358         pe_no = pnv_pe->pe_number;
1359     }
1360 
1361     /* Find the PE according to PE# */
1362     dev_pe = eeh_pe_get(hose, pe_no);
1363     if (!dev_pe)
1364         return -EEXIST;
1365 
1366     /* Freeze the (compound) PE */
1367     *pe = dev_pe;
1368     if (!(dev_pe->state & EEH_PE_ISOLATED))
1369         phb->freeze_pe(phb, pe_no);
1370 
1371     /*
1372      * At this point, we're sure the (compound) PE should
1373      * have been frozen. However, we still need poke until
1374      * hitting the frozen PE on top level.
1375      */
1376     dev_pe = dev_pe->parent;
1377     while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
1378         int ret;
1379         ret = eeh_ops->get_state(dev_pe, NULL);
1380         if (ret <= 0 || eeh_state_active(ret)) {
1381             dev_pe = dev_pe->parent;
1382             continue;
1383         }
1384 
1385         /* Frozen parent PE */
1386         *pe = dev_pe;
1387         if (!(dev_pe->state & EEH_PE_ISOLATED))
1388             phb->freeze_pe(phb, dev_pe->addr);
1389 
1390         /* Next one */
1391         dev_pe = dev_pe->parent;
1392     }
1393 
1394     return 0;
1395 }
1396 
1397 /**
1398  * pnv_eeh_next_error - Retrieve next EEH error to handle
1399  * @pe: Affected PE
1400  *
1401  * The function is expected to be called by EEH core while it gets
1402  * special EEH event (without binding PE). The function calls to
1403  * OPAL APIs for next error to handle. The informational error is
1404  * handled internally by platform. However, the dead IOC, dead PHB,
1405  * fenced PHB and frozen PE should be handled by EEH core eventually.
1406  */
1407 static int pnv_eeh_next_error(struct eeh_pe **pe)
1408 {
1409     struct pci_controller *hose;
1410     struct pnv_phb *phb;
1411     struct eeh_pe *phb_pe, *parent_pe;
1412     __be64 frozen_pe_no;
1413     __be16 err_type, severity;
1414     long rc;
1415     int state, ret = EEH_NEXT_ERR_NONE;
1416 
1417     /*
1418      * While running here, it's safe to purge the event queue. The
1419      * event should still be masked.
1420      */
1421     eeh_remove_event(NULL, false);
1422 
1423     list_for_each_entry(hose, &hose_list, list_node) {
1424         /*
1425          * If the subordinate PCI buses of the PHB has been
1426          * removed or is exactly under error recovery, we
1427          * needn't take care of it any more.
1428          */
1429         phb = hose->private_data;
1430         phb_pe = eeh_phb_pe_get(hose);
1431         if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
1432             continue;
1433 
1434         rc = opal_pci_next_error(phb->opal_id,
1435                      &frozen_pe_no, &err_type, &severity);
1436         if (rc != OPAL_SUCCESS) {
1437             pr_devel("%s: Invalid return value on "
1438                  "PHB#%x (0x%lx) from opal_pci_next_error",
1439                  __func__, hose->global_number, rc);
1440             continue;
1441         }
1442 
1443         /* If the PHB doesn't have error, stop processing */
1444         if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
1445             be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
1446             pr_devel("%s: No error found on PHB#%x\n",
1447                  __func__, hose->global_number);
1448             continue;
1449         }
1450 
1451         /*
1452          * Processing the error. We're expecting the error with
1453          * highest priority reported upon multiple errors on the
1454          * specific PHB.
1455          */
1456         pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
1457             __func__, be16_to_cpu(err_type),
1458             be16_to_cpu(severity), be64_to_cpu(frozen_pe_no),
1459             hose->global_number);
1460         switch (be16_to_cpu(err_type)) {
1461         case OPAL_EEH_IOC_ERROR:
1462             if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
1463                 pr_err("EEH: dead IOC detected\n");
1464                 ret = EEH_NEXT_ERR_DEAD_IOC;
1465             } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
1466                 pr_info("EEH: IOC informative error "
1467                     "detected\n");
1468                 pnv_eeh_get_and_dump_hub_diag(hose);
1469                 ret = EEH_NEXT_ERR_NONE;
1470             }
1471 
1472             break;
1473         case OPAL_EEH_PHB_ERROR:
1474             if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
1475                 *pe = phb_pe;
1476                 pr_err("EEH: dead PHB#%x detected, "
1477                        "location: %s\n",
1478                     hose->global_number,
1479                     eeh_pe_loc_get(phb_pe));
1480                 ret = EEH_NEXT_ERR_DEAD_PHB;
1481             } else if (be16_to_cpu(severity) ==
1482                    OPAL_EEH_SEV_PHB_FENCED) {
1483                 *pe = phb_pe;
1484                 pr_err("EEH: Fenced PHB#%x detected, "
1485                        "location: %s\n",
1486                     hose->global_number,
1487                     eeh_pe_loc_get(phb_pe));
1488                 ret = EEH_NEXT_ERR_FENCED_PHB;
1489             } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
1490                 pr_info("EEH: PHB#%x informative error "
1491                     "detected, location: %s\n",
1492                     hose->global_number,
1493                     eeh_pe_loc_get(phb_pe));
1494                 pnv_eeh_get_phb_diag(phb_pe);
1495                 pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
1496                 ret = EEH_NEXT_ERR_NONE;
1497             }
1498 
1499             break;
1500         case OPAL_EEH_PE_ERROR:
1501             /*
1502              * If we can't find the corresponding PE, we
1503              * just try to unfreeze.
1504              */
1505             if (pnv_eeh_get_pe(hose,
1506                 be64_to_cpu(frozen_pe_no), pe)) {
1507                 pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
1508                     hose->global_number, be64_to_cpu(frozen_pe_no));
1509                 pr_info("EEH: PHB location: %s\n",
1510                     eeh_pe_loc_get(phb_pe));
1511 
1512                 /* Dump PHB diag-data */
1513                 rc = opal_pci_get_phb_diag_data2(phb->opal_id,
1514                     phb->diag_data, phb->diag_data_size);
1515                 if (rc == OPAL_SUCCESS)
1516                     pnv_pci_dump_phb_diag_data(hose,
1517                             phb->diag_data);
1518 
1519                 /* Try best to clear it */
1520                 opal_pci_eeh_freeze_clear(phb->opal_id,
1521                     be64_to_cpu(frozen_pe_no),
1522                     OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
1523                 ret = EEH_NEXT_ERR_NONE;
1524             } else if ((*pe)->state & EEH_PE_ISOLATED ||
1525                    eeh_pe_passed(*pe)) {
1526                 ret = EEH_NEXT_ERR_NONE;
1527             } else {
1528                 pr_err("EEH: Frozen PE#%x "
1529                        "on PHB#%x detected\n",
1530                        (*pe)->addr,
1531                     (*pe)->phb->global_number);
1532                 pr_err("EEH: PE location: %s, "
1533                        "PHB location: %s\n",
1534                        eeh_pe_loc_get(*pe),
1535                        eeh_pe_loc_get(phb_pe));
1536                 ret = EEH_NEXT_ERR_FROZEN_PE;
1537             }
1538 
1539             break;
1540         default:
1541             pr_warn("%s: Unexpected error type %d\n",
1542                 __func__, be16_to_cpu(err_type));
1543         }
1544 
1545         /*
1546          * EEH core will try recover from fenced PHB or
1547          * frozen PE. In the time for frozen PE, EEH core
1548          * enable IO path for that before collecting logs,
1549          * but it ruins the site. So we have to dump the
1550          * log in advance here.
1551          */
1552         if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
1553             ret == EEH_NEXT_ERR_FENCED_PHB) &&
1554             !((*pe)->state & EEH_PE_ISOLATED)) {
1555             eeh_pe_mark_isolated(*pe);
1556             pnv_eeh_get_phb_diag(*pe);
1557 
1558             if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
1559                 pnv_pci_dump_phb_diag_data((*pe)->phb,
1560                                (*pe)->data);
1561         }
1562 
1563         /*
1564          * We probably have the frozen parent PE out there and
1565          * we need have to handle frozen parent PE firstly.
1566          */
1567         if (ret == EEH_NEXT_ERR_FROZEN_PE) {
1568             parent_pe = (*pe)->parent;
1569             while (parent_pe) {
1570                 /* Hit the ceiling ? */
1571                 if (parent_pe->type & EEH_PE_PHB)
1572                     break;
1573 
1574                 /* Frozen parent PE ? */
1575                 state = eeh_ops->get_state(parent_pe, NULL);
1576                 if (state > 0 && !eeh_state_active(state))
1577                     *pe = parent_pe;
1578 
1579                 /* Next parent level */
1580                 parent_pe = parent_pe->parent;
1581             }
1582 
1583             /* We possibly migrate to another PE */
1584             eeh_pe_mark_isolated(*pe);
1585         }
1586 
1587         /*
1588          * If we have no errors on the specific PHB or only
1589          * informative error there, we continue poking it.
1590          * Otherwise, we need actions to be taken by upper
1591          * layer.
1592          */
1593         if (ret > EEH_NEXT_ERR_INF)
1594             break;
1595     }
1596 
1597     /* Unmask the event */
1598     if (ret == EEH_NEXT_ERR_NONE && eeh_enabled())
1599         enable_irq(eeh_event_irq);
1600 
1601     return ret;
1602 }
1603 
1604 static int pnv_eeh_restore_config(struct eeh_dev *edev)
1605 {
1606     struct pnv_phb *phb;
1607     s64 ret = 0;
1608 
1609     if (!edev)
1610         return -EEXIST;
1611 
1612     if (edev->physfn)
1613         return 0;
1614 
1615     phb = edev->controller->private_data;
1616     ret = opal_pci_reinit(phb->opal_id,
1617                   OPAL_REINIT_PCI_DEV, edev->bdfn);
1618 
1619     if (ret) {
1620         pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
1621             __func__, edev->bdfn, ret);
1622         return -EIO;
1623     }
1624 
1625     return ret;
1626 }
1627 
1628 static struct eeh_ops pnv_eeh_ops = {
1629     .name                   = "powernv",
1630     .probe          = pnv_eeh_probe,
1631     .set_option             = pnv_eeh_set_option,
1632     .get_state              = pnv_eeh_get_state,
1633     .reset                  = pnv_eeh_reset,
1634     .get_log                = pnv_eeh_get_log,
1635     .configure_bridge       = pnv_eeh_configure_bridge,
1636     .err_inject     = pnv_eeh_err_inject,
1637     .read_config            = pnv_eeh_read_config,
1638     .write_config           = pnv_eeh_write_config,
1639     .next_error     = pnv_eeh_next_error,
1640     .restore_config     = pnv_eeh_restore_config,
1641     .notify_resume      = NULL
1642 };
1643 
1644 /**
1645  * eeh_powernv_init - Register platform dependent EEH operations
1646  *
1647  * EEH initialization on powernv platform. This function should be
1648  * called before any EEH related functions.
1649  */
1650 static int __init eeh_powernv_init(void)
1651 {
1652     int max_diag_size = PNV_PCI_DIAG_BUF_SIZE;
1653     struct pci_controller *hose;
1654     struct pnv_phb *phb;
1655     int ret = -EINVAL;
1656 
1657     if (!firmware_has_feature(FW_FEATURE_OPAL)) {
1658         pr_warn("%s: OPAL is required !\n", __func__);
1659         return -EINVAL;
1660     }
1661 
1662     /* Set probe mode */
1663     eeh_add_flag(EEH_PROBE_MODE_DEV);
1664 
1665     /*
1666      * P7IOC blocks PCI config access to frozen PE, but PHB3
1667      * doesn't do that. So we have to selectively enable I/O
1668      * prior to collecting error log.
1669      */
1670     list_for_each_entry(hose, &hose_list, list_node) {
1671         phb = hose->private_data;
1672 
1673         if (phb->model == PNV_PHB_MODEL_P7IOC)
1674             eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
1675 
1676         if (phb->diag_data_size > max_diag_size)
1677             max_diag_size = phb->diag_data_size;
1678 
1679         break;
1680     }
1681 
1682     /*
1683      * eeh_init() allocates the eeh_pe and its aux data buf so the
1684      * size needs to be set before calling eeh_init().
1685      */
1686     eeh_set_pe_aux_size(max_diag_size);
1687     ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;
1688 
1689     ret = eeh_init(&pnv_eeh_ops);
1690     if (!ret)
1691         pr_info("EEH: PowerNV platform initialized\n");
1692     else
1693         pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);
1694 
1695     return ret;
1696 }
1697 machine_arch_initcall(powernv, eeh_powernv_init);