Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *
0004  * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
0005  */
0006 
0007 #include <linux/delay.h>
0008 #include <linux/list.h>
0009 #include <linux/sched.h>
0010 #include <linux/semaphore.h>
0011 #include <linux/pci.h>
0012 #include <linux/slab.h>
0013 #include <linux/kthread.h>
0014 #include <asm/eeh_event.h>
0015 #include <asm/ppc-pci.h>
0016 
0017 /** Overview:
0018  *  EEH error states may be detected within exception handlers;
0019  *  however, the recovery processing needs to occur asynchronously
0020  *  in a normal kernel context and not an interrupt context.
0021  *  This pair of routines creates an event and queues it onto a
0022  *  work-queue, where a worker thread can drive recovery.
0023  */
0024 
0025 static DEFINE_SPINLOCK(eeh_eventlist_lock);
0026 static DECLARE_COMPLETION(eeh_eventlist_event);
0027 static LIST_HEAD(eeh_eventlist);
0028 
0029 /**
0030  * eeh_event_handler - Dispatch EEH events.
0031  * @dummy - unused
0032  *
0033  * The detection of a frozen slot can occur inside an interrupt,
0034  * where it can be hard to do anything about it.  The goal of this
0035  * routine is to pull these detection events out of the context
0036  * of the interrupt handler, and re-dispatch them for processing
0037  * at a later time in a normal context.
0038  */
0039 static int eeh_event_handler(void * dummy)
0040 {
0041     unsigned long flags;
0042     struct eeh_event *event;
0043 
0044     while (!kthread_should_stop()) {
0045         if (wait_for_completion_interruptible(&eeh_eventlist_event))
0046             break;
0047 
0048         /* Fetch EEH event from the queue */
0049         spin_lock_irqsave(&eeh_eventlist_lock, flags);
0050         event = NULL;
0051         if (!list_empty(&eeh_eventlist)) {
0052             event = list_entry(eeh_eventlist.next,
0053                        struct eeh_event, list);
0054             list_del(&event->list);
0055         }
0056         spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
0057         if (!event)
0058             continue;
0059 
0060         /* We might have event without binding PE */
0061         if (event->pe)
0062             eeh_handle_normal_event(event->pe);
0063         else
0064             eeh_handle_special_event();
0065 
0066         kfree(event);
0067     }
0068 
0069     return 0;
0070 }
0071 
0072 /**
0073  * eeh_event_init - Start kernel thread to handle EEH events
0074  *
0075  * This routine is called to start the kernel thread for processing
0076  * EEH event.
0077  */
0078 int eeh_event_init(void)
0079 {
0080     struct task_struct *t;
0081     int ret = 0;
0082 
0083     t = kthread_run(eeh_event_handler, NULL, "eehd");
0084     if (IS_ERR(t)) {
0085         ret = PTR_ERR(t);
0086         pr_err("%s: Failed to start EEH daemon (%d)\n",
0087             __func__, ret);
0088         return ret;
0089     }
0090 
0091     return 0;
0092 }
0093 
0094 /**
0095  * eeh_send_failure_event - Generate a PCI error event
0096  * @pe: EEH PE
0097  *
0098  * This routine can be called within an interrupt context;
0099  * the actual event will be delivered in a normal context
0100  * (from a workqueue).
0101  */
0102 int __eeh_send_failure_event(struct eeh_pe *pe)
0103 {
0104     unsigned long flags;
0105     struct eeh_event *event;
0106 
0107     event = kzalloc(sizeof(*event), GFP_ATOMIC);
0108     if (!event) {
0109         pr_err("EEH: out of memory, event not handled\n");
0110         return -ENOMEM;
0111     }
0112     event->pe = pe;
0113 
0114     /*
0115      * Mark the PE as recovering before inserting it in the queue.
0116      * This prevents the PE from being free()ed by a hotplug driver
0117      * while the PE is sitting in the event queue.
0118      */
0119     if (pe) {
0120 #ifdef CONFIG_STACKTRACE
0121         /*
0122          * Save the current stack trace so we can dump it from the
0123          * event handler thread.
0124          */
0125         pe->trace_entries = stack_trace_save(pe->stack_trace,
0126                      ARRAY_SIZE(pe->stack_trace), 0);
0127 #endif /* CONFIG_STACKTRACE */
0128 
0129         eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
0130     }
0131 
0132     /* We may or may not be called in an interrupt context */
0133     spin_lock_irqsave(&eeh_eventlist_lock, flags);
0134     list_add(&event->list, &eeh_eventlist);
0135     spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
0136 
0137     /* For EEH deamon to knick in */
0138     complete(&eeh_eventlist_event);
0139 
0140     return 0;
0141 }
0142 
0143 int eeh_send_failure_event(struct eeh_pe *pe)
0144 {
0145     /*
0146      * If we've manually suppressed recovery events via debugfs
0147      * then just drop it on the floor.
0148      */
0149     if (eeh_debugfs_no_recover) {
0150         pr_err("EEH: Event dropped due to no_recover setting\n");
0151         return 0;
0152     }
0153 
0154     return __eeh_send_failure_event(pe);
0155 }
0156 
0157 /**
0158  * eeh_remove_event - Remove EEH event from the queue
0159  * @pe: Event binding to the PE
0160  * @force: Event will be removed unconditionally
0161  *
0162  * On PowerNV platform, we might have subsequent coming events
0163  * is part of the former one. For that case, those subsequent
0164  * coming events are totally duplicated and unnecessary, thus
0165  * they should be removed.
0166  */
0167 void eeh_remove_event(struct eeh_pe *pe, bool force)
0168 {
0169     unsigned long flags;
0170     struct eeh_event *event, *tmp;
0171 
0172     /*
0173      * If we have NULL PE passed in, we have dead IOC
0174      * or we're sure we can report all existing errors
0175      * by the caller.
0176      *
0177      * With "force", the event with associated PE that
0178      * have been isolated, the event won't be removed
0179      * to avoid event lost.
0180      */
0181     spin_lock_irqsave(&eeh_eventlist_lock, flags);
0182     list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) {
0183         if (!force && event->pe &&
0184             (event->pe->state & EEH_PE_ISOLATED))
0185             continue;
0186 
0187         if (!pe) {
0188             list_del(&event->list);
0189             kfree(event);
0190         } else if (pe->type & EEH_PE_PHB) {
0191             if (event->pe && event->pe->phb == pe->phb) {
0192                 list_del(&event->list);
0193                 kfree(event);
0194             }
0195         } else if (event->pe == pe) {
0196             list_del(&event->list);
0197             kfree(event);
0198         }
0199     }
0200     spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
0201 }