Back to home page

OSCL-LXR

 
 

    


0001 /*
0002  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
0003  *
0004  * This software is available to you under a choice of one of two
0005  * licenses.  You may choose to be licensed under the terms of the GNU
0006  * General Public License (GPL) Version 2, available from the file
0007  * COPYING in the main directory of this source tree, or the
0008  * OpenIB.org BSD license below:
0009  *
0010  *     Redistribution and use in source and binary forms, with or
0011  *     without modification, are permitted provided that the following
0012  *     conditions are met:
0013  *
0014  *      - Redistributions of source code must retain the above
0015  *        copyright notice, this list of conditions and the following
0016  *        disclaimer.
0017  *
0018  *      - Redistributions in binary form must reproduce the above
0019  *        copyright notice, this list of conditions and the following
0020  *        disclaimer in the documentation and/or other materials
0021  *        provided with the distribution.
0022  *
0023  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
0024  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
0025  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
0026  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
0027  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
0028  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
0029  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
0030  * SOFTWARE.
0031  */
0032 
0033 #include <linux/jiffies.h>
0034 #include <linux/module.h>
0035 #include <linux/timer.h>
0036 #include <linux/workqueue.h>
0037 
0038 #include "mthca_dev.h"
0039 
0040 enum {
0041     MTHCA_CATAS_POLL_INTERVAL   = 5 * HZ,
0042 
0043     MTHCA_CATAS_TYPE_INTERNAL   = 0,
0044     MTHCA_CATAS_TYPE_UPLINK     = 3,
0045     MTHCA_CATAS_TYPE_DDR        = 4,
0046     MTHCA_CATAS_TYPE_PARITY     = 5,
0047 };
0048 
0049 static DEFINE_SPINLOCK(catas_lock);
0050 
0051 static LIST_HEAD(catas_list);
0052 static struct workqueue_struct *catas_wq;
0053 static struct work_struct catas_work;
0054 
0055 static int catas_reset_disable;
0056 module_param_named(catas_reset_disable, catas_reset_disable, int, 0644);
0057 MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if nonzero");
0058 
0059 static void catas_reset(struct work_struct *work)
0060 {
0061     struct mthca_dev *dev, *tmpdev;
0062     LIST_HEAD(tlist);
0063     int ret;
0064 
0065     mutex_lock(&mthca_device_mutex);
0066 
0067     spin_lock_irq(&catas_lock);
0068     list_splice_init(&catas_list, &tlist);
0069     spin_unlock_irq(&catas_lock);
0070 
0071     list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
0072         struct pci_dev *pdev = dev->pdev;
0073         ret = __mthca_restart_one(dev->pdev);
0074         /* 'dev' now is not valid */
0075         if (ret)
0076             printk(KERN_ERR "mthca %s: Reset failed (%d)\n",
0077                    pci_name(pdev), ret);
0078         else {
0079             struct mthca_dev *d = pci_get_drvdata(pdev);
0080             mthca_dbg(d, "Reset succeeded\n");
0081         }
0082     }
0083 
0084     mutex_unlock(&mthca_device_mutex);
0085 }
0086 
0087 static void handle_catas(struct mthca_dev *dev)
0088 {
0089     struct ib_event event;
0090     unsigned long flags;
0091     const char *type;
0092     int i;
0093 
0094     event.device = &dev->ib_dev;
0095     event.event  = IB_EVENT_DEVICE_FATAL;
0096     event.element.port_num = 0;
0097     dev->active = false;
0098 
0099     ib_dispatch_event(&event);
0100 
0101     switch (swab32(readl(dev->catas_err.map)) >> 24) {
0102     case MTHCA_CATAS_TYPE_INTERNAL:
0103         type = "internal error";
0104         break;
0105     case MTHCA_CATAS_TYPE_UPLINK:
0106         type = "uplink bus error";
0107         break;
0108     case MTHCA_CATAS_TYPE_DDR:
0109         type = "DDR data error";
0110         break;
0111     case MTHCA_CATAS_TYPE_PARITY:
0112         type = "internal parity error";
0113         break;
0114     default:
0115         type = "unknown error";
0116         break;
0117     }
0118 
0119     mthca_err(dev, "Catastrophic error detected: %s\n", type);
0120     for (i = 0; i < dev->catas_err.size; ++i)
0121         mthca_err(dev, "  buf[%02x]: %08x\n",
0122               i, swab32(readl(dev->catas_err.map + i)));
0123 
0124     if (catas_reset_disable)
0125         return;
0126 
0127     spin_lock_irqsave(&catas_lock, flags);
0128     list_add(&dev->catas_err.list, &catas_list);
0129     queue_work(catas_wq, &catas_work);
0130     spin_unlock_irqrestore(&catas_lock, flags);
0131 }
0132 
0133 static void poll_catas(struct timer_list *t)
0134 {
0135     struct mthca_dev *dev = from_timer(dev, t, catas_err.timer);
0136     int i;
0137 
0138     for (i = 0; i < dev->catas_err.size; ++i)
0139         if (readl(dev->catas_err.map + i)) {
0140             handle_catas(dev);
0141             return;
0142         }
0143 
0144     mod_timer(&dev->catas_err.timer,
0145           round_jiffies(jiffies + MTHCA_CATAS_POLL_INTERVAL));
0146 }
0147 
0148 void mthca_start_catas_poll(struct mthca_dev *dev)
0149 {
0150     phys_addr_t addr;
0151 
0152     timer_setup(&dev->catas_err.timer, poll_catas, 0);
0153     dev->catas_err.map  = NULL;
0154 
0155     addr = pci_resource_start(dev->pdev, 0) +
0156         ((pci_resource_len(dev->pdev, 0) - 1) &
0157          dev->catas_err.addr);
0158 
0159     dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4);
0160     if (!dev->catas_err.map) {
0161         mthca_warn(dev, "couldn't map catastrophic error region "
0162                "at 0x%llx/0x%x\n", (unsigned long long) addr,
0163                dev->catas_err.size * 4);
0164         return;
0165     }
0166 
0167     dev->catas_err.timer.expires  = jiffies + MTHCA_CATAS_POLL_INTERVAL;
0168     INIT_LIST_HEAD(&dev->catas_err.list);
0169     add_timer(&dev->catas_err.timer);
0170 }
0171 
0172 void mthca_stop_catas_poll(struct mthca_dev *dev)
0173 {
0174     del_timer_sync(&dev->catas_err.timer);
0175 
0176     if (dev->catas_err.map)
0177         iounmap(dev->catas_err.map);
0178 
0179     spin_lock_irq(&catas_lock);
0180     list_del(&dev->catas_err.list);
0181     spin_unlock_irq(&catas_lock);
0182 }
0183 
0184 int __init mthca_catas_init(void)
0185 {
0186     INIT_WORK(&catas_work, catas_reset);
0187 
0188     catas_wq = alloc_ordered_workqueue("mthca_catas", WQ_MEM_RECLAIM);
0189     if (!catas_wq)
0190         return -ENOMEM;
0191 
0192     return 0;
0193 }
0194 
0195 void mthca_catas_cleanup(void)
0196 {
0197     destroy_workqueue(catas_wq);
0198 }