Back to home page

OSCL-LXR

 
 

    


0001 /******************************************************************************
0002  * mcelog.c
0003  * Driver for receiving and transferring machine check error infomation
0004  *
0005  * Copyright (c) 2012 Intel Corporation
0006  * Author: Liu, Jinsong <jinsong.liu@intel.com>
0007  * Author: Jiang, Yunhong <yunhong.jiang@intel.com>
0008  * Author: Ke, Liping <liping.ke@intel.com>
0009  *
0010  * This program is free software; you can redistribute it and/or
0011  * modify it under the terms of the GNU General Public License version 2
0012  * as published by the Free Software Foundation; or, when distributed
0013  * separately from the Linux kernel or incorporated into other
0014  * software packages, subject to the following license:
0015  *
0016  * Permission is hereby granted, free of charge, to any person obtaining a copy
0017  * of this source file (the "Software"), to deal in the Software without
0018  * restriction, including without limitation the rights to use, copy, modify,
0019  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
0020  * and to permit persons to whom the Software is furnished to do so, subject to
0021  * the following conditions:
0022  *
0023  * The above copyright notice and this permission notice shall be included in
0024  * all copies or substantial portions of the Software.
0025  *
0026  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0027  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0028  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
0029  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
0030  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
0031  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
0032  * IN THE SOFTWARE.
0033  */
0034 
0035 #define pr_fmt(fmt) "xen_mcelog: " fmt
0036 
0037 #include <linux/init.h>
0038 #include <linux/types.h>
0039 #include <linux/kernel.h>
0040 #include <linux/slab.h>
0041 #include <linux/fs.h>
0042 #include <linux/device.h>
0043 #include <linux/miscdevice.h>
0044 #include <linux/uaccess.h>
0045 #include <linux/capability.h>
0046 #include <linux/poll.h>
0047 #include <linux/sched.h>
0048 
0049 #include <xen/interface/xen.h>
0050 #include <xen/events.h>
0051 #include <xen/interface/vcpu.h>
0052 #include <xen/xen.h>
0053 #include <asm/xen/hypercall.h>
0054 #include <asm/xen/hypervisor.h>
0055 
0056 static struct mc_info g_mi;
0057 static struct mcinfo_logical_cpu *g_physinfo;
0058 static uint32_t ncpus;
0059 
0060 static DEFINE_MUTEX(mcelog_lock);
0061 
0062 static struct xen_mce_log xen_mcelog = {
0063     .signature  = XEN_MCE_LOG_SIGNATURE,
0064     .len        = XEN_MCE_LOG_LEN,
0065     .recordlen  = sizeof(struct xen_mce),
0066 };
0067 
0068 static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock);
0069 static int xen_mce_chrdev_open_count;   /* #times opened */
0070 static int xen_mce_chrdev_open_exclu;   /* already open exclusive? */
0071 
0072 static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait);
0073 
0074 static int xen_mce_chrdev_open(struct inode *inode, struct file *file)
0075 {
0076     spin_lock(&xen_mce_chrdev_state_lock);
0077 
0078     if (xen_mce_chrdev_open_exclu ||
0079         (xen_mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
0080         spin_unlock(&xen_mce_chrdev_state_lock);
0081 
0082         return -EBUSY;
0083     }
0084 
0085     if (file->f_flags & O_EXCL)
0086         xen_mce_chrdev_open_exclu = 1;
0087     xen_mce_chrdev_open_count++;
0088 
0089     spin_unlock(&xen_mce_chrdev_state_lock);
0090 
0091     return nonseekable_open(inode, file);
0092 }
0093 
0094 static int xen_mce_chrdev_release(struct inode *inode, struct file *file)
0095 {
0096     spin_lock(&xen_mce_chrdev_state_lock);
0097 
0098     xen_mce_chrdev_open_count--;
0099     xen_mce_chrdev_open_exclu = 0;
0100 
0101     spin_unlock(&xen_mce_chrdev_state_lock);
0102 
0103     return 0;
0104 }
0105 
0106 static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf,
0107                 size_t usize, loff_t *off)
0108 {
0109     char __user *buf = ubuf;
0110     unsigned num;
0111     int i, err;
0112 
0113     mutex_lock(&mcelog_lock);
0114 
0115     num = xen_mcelog.next;
0116 
0117     /* Only supports full reads right now */
0118     err = -EINVAL;
0119     if (*off != 0 || usize < XEN_MCE_LOG_LEN*sizeof(struct xen_mce))
0120         goto out;
0121 
0122     err = 0;
0123     for (i = 0; i < num; i++) {
0124         struct xen_mce *m = &xen_mcelog.entry[i];
0125 
0126         err |= copy_to_user(buf, m, sizeof(*m));
0127         buf += sizeof(*m);
0128     }
0129 
0130     memset(xen_mcelog.entry, 0, num * sizeof(struct xen_mce));
0131     xen_mcelog.next = 0;
0132 
0133     if (err)
0134         err = -EFAULT;
0135 
0136 out:
0137     mutex_unlock(&mcelog_lock);
0138 
0139     return err ? err : buf - ubuf;
0140 }
0141 
0142 static __poll_t xen_mce_chrdev_poll(struct file *file, poll_table *wait)
0143 {
0144     poll_wait(file, &xen_mce_chrdev_wait, wait);
0145 
0146     if (xen_mcelog.next)
0147         return EPOLLIN | EPOLLRDNORM;
0148 
0149     return 0;
0150 }
0151 
0152 static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd,
0153                 unsigned long arg)
0154 {
0155     int __user *p = (int __user *)arg;
0156 
0157     if (!capable(CAP_SYS_ADMIN))
0158         return -EPERM;
0159 
0160     switch (cmd) {
0161     case MCE_GET_RECORD_LEN:
0162         return put_user(sizeof(struct xen_mce), p);
0163     case MCE_GET_LOG_LEN:
0164         return put_user(XEN_MCE_LOG_LEN, p);
0165     case MCE_GETCLEAR_FLAGS: {
0166         unsigned flags;
0167 
0168         do {
0169             flags = xen_mcelog.flags;
0170         } while (cmpxchg(&xen_mcelog.flags, flags, 0) != flags);
0171 
0172         return put_user(flags, p);
0173     }
0174     default:
0175         return -ENOTTY;
0176     }
0177 }
0178 
0179 static const struct file_operations xen_mce_chrdev_ops = {
0180     .open           = xen_mce_chrdev_open,
0181     .release        = xen_mce_chrdev_release,
0182     .read           = xen_mce_chrdev_read,
0183     .poll           = xen_mce_chrdev_poll,
0184     .unlocked_ioctl     = xen_mce_chrdev_ioctl,
0185     .llseek         = no_llseek,
0186 };
0187 
0188 static struct miscdevice xen_mce_chrdev_device = {
0189     MISC_MCELOG_MINOR,
0190     "mcelog",
0191     &xen_mce_chrdev_ops,
0192 };
0193 
0194 /*
0195  * Caller should hold the mcelog_lock
0196  */
0197 static void xen_mce_log(struct xen_mce *mce)
0198 {
0199     unsigned entry;
0200 
0201     entry = xen_mcelog.next;
0202 
0203     /*
0204      * When the buffer fills up discard new entries.
0205      * Assume that the earlier errors are the more
0206      * interesting ones:
0207      */
0208     if (entry >= XEN_MCE_LOG_LEN) {
0209         set_bit(XEN_MCE_OVERFLOW,
0210             (unsigned long *)&xen_mcelog.flags);
0211         return;
0212     }
0213 
0214     memcpy(xen_mcelog.entry + entry, mce, sizeof(struct xen_mce));
0215 
0216     xen_mcelog.next++;
0217 }
0218 
0219 static int convert_log(struct mc_info *mi)
0220 {
0221     struct mcinfo_common *mic;
0222     struct mcinfo_global *mc_global;
0223     struct mcinfo_bank *mc_bank;
0224     struct xen_mce m;
0225     unsigned int i, j;
0226 
0227     mic = NULL;
0228     x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
0229     if (unlikely(!mic)) {
0230         pr_warn("Failed to find global error info\n");
0231         return -ENODEV;
0232     }
0233 
0234     memset(&m, 0, sizeof(struct xen_mce));
0235 
0236     mc_global = (struct mcinfo_global *)mic;
0237     m.mcgstatus = mc_global->mc_gstatus;
0238     m.apicid = mc_global->mc_apicid;
0239 
0240     for (i = 0; i < ncpus; i++)
0241         if (g_physinfo[i].mc_apicid == m.apicid)
0242             break;
0243     if (unlikely(i == ncpus)) {
0244         pr_warn("Failed to match cpu with apicid %d\n", m.apicid);
0245         return -ENODEV;
0246     }
0247 
0248     m.socketid = g_physinfo[i].mc_chipid;
0249     m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
0250     m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
0251     for (j = 0; j < g_physinfo[i].mc_nmsrvals; ++j)
0252         switch (g_physinfo[i].mc_msrvalues[j].reg) {
0253         case MSR_IA32_MCG_CAP:
0254             m.mcgcap = g_physinfo[i].mc_msrvalues[j].value;
0255             break;
0256 
0257         case MSR_PPIN:
0258         case MSR_AMD_PPIN:
0259             m.ppin = g_physinfo[i].mc_msrvalues[j].value;
0260             break;
0261         }
0262 
0263     mic = NULL;
0264     x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
0265     if (unlikely(!mic)) {
0266         pr_warn("Fail to find bank error info\n");
0267         return -ENODEV;
0268     }
0269 
0270     do {
0271         if ((!mic) || (mic->size == 0) ||
0272             (mic->type != MC_TYPE_GLOBAL   &&
0273              mic->type != MC_TYPE_BANK     &&
0274              mic->type != MC_TYPE_EXTENDED &&
0275              mic->type != MC_TYPE_RECOVERY))
0276             break;
0277 
0278         if (mic->type == MC_TYPE_BANK) {
0279             mc_bank = (struct mcinfo_bank *)mic;
0280             m.misc = mc_bank->mc_misc;
0281             m.status = mc_bank->mc_status;
0282             m.addr = mc_bank->mc_addr;
0283             m.tsc = mc_bank->mc_tsc;
0284             m.bank = mc_bank->mc_bank;
0285             m.finished = 1;
0286             /*log this record*/
0287             xen_mce_log(&m);
0288         }
0289         mic = x86_mcinfo_next(mic);
0290     } while (1);
0291 
0292     return 0;
0293 }
0294 
0295 static int mc_queue_handle(uint32_t flags)
0296 {
0297     struct xen_mc mc_op;
0298     int ret = 0;
0299 
0300     mc_op.cmd = XEN_MC_fetch;
0301     set_xen_guest_handle(mc_op.u.mc_fetch.data, &g_mi);
0302     do {
0303         mc_op.u.mc_fetch.flags = flags;
0304         ret = HYPERVISOR_mca(&mc_op);
0305         if (ret) {
0306             pr_err("Failed to fetch %surgent error log\n",
0307                    flags == XEN_MC_URGENT ? "" : "non");
0308             break;
0309         }
0310 
0311         if (mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
0312             mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
0313             break;
0314         else {
0315             ret = convert_log(&g_mi);
0316             if (ret)
0317                 pr_warn("Failed to convert this error log, continue acking it anyway\n");
0318 
0319             mc_op.u.mc_fetch.flags = flags | XEN_MC_ACK;
0320             ret = HYPERVISOR_mca(&mc_op);
0321             if (ret) {
0322                 pr_err("Failed to ack previous error log\n");
0323                 break;
0324             }
0325         }
0326     } while (1);
0327 
0328     return ret;
0329 }
0330 
0331 /* virq handler for machine check error info*/
0332 static void xen_mce_work_fn(struct work_struct *work)
0333 {
0334     int err;
0335 
0336     mutex_lock(&mcelog_lock);
0337 
0338     /* urgent mc_info */
0339     err = mc_queue_handle(XEN_MC_URGENT);
0340     if (err)
0341         pr_err("Failed to handle urgent mc_info queue, continue handling nonurgent mc_info queue anyway\n");
0342 
0343     /* nonurgent mc_info */
0344     err = mc_queue_handle(XEN_MC_NONURGENT);
0345     if (err)
0346         pr_err("Failed to handle nonurgent mc_info queue\n");
0347 
0348     /* wake processes polling /dev/mcelog */
0349     wake_up_interruptible(&xen_mce_chrdev_wait);
0350 
0351     mutex_unlock(&mcelog_lock);
0352 }
0353 static DECLARE_WORK(xen_mce_work, xen_mce_work_fn);
0354 
0355 static irqreturn_t xen_mce_interrupt(int irq, void *dev_id)
0356 {
0357     schedule_work(&xen_mce_work);
0358     return IRQ_HANDLED;
0359 }
0360 
0361 static int bind_virq_for_mce(void)
0362 {
0363     int ret;
0364     struct xen_mc mc_op;
0365 
0366     memset(&mc_op, 0, sizeof(struct xen_mc));
0367 
0368     /* Fetch physical CPU Numbers */
0369     mc_op.cmd = XEN_MC_physcpuinfo;
0370     set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
0371     ret = HYPERVISOR_mca(&mc_op);
0372     if (ret) {
0373         pr_err("Failed to get CPU numbers\n");
0374         return ret;
0375     }
0376 
0377     /* Fetch each CPU Physical Info for later reference*/
0378     ncpus = mc_op.u.mc_physcpuinfo.ncpus;
0379     g_physinfo = kcalloc(ncpus, sizeof(struct mcinfo_logical_cpu),
0380                  GFP_KERNEL);
0381     if (!g_physinfo)
0382         return -ENOMEM;
0383     set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
0384     ret = HYPERVISOR_mca(&mc_op);
0385     if (ret) {
0386         pr_err("Failed to get CPU info\n");
0387         kfree(g_physinfo);
0388         return ret;
0389     }
0390 
0391     ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
0392                        xen_mce_interrupt, 0, "mce", NULL);
0393     if (ret < 0) {
0394         pr_err("Failed to bind virq\n");
0395         kfree(g_physinfo);
0396         return ret;
0397     }
0398 
0399     return 0;
0400 }
0401 
0402 static int __init xen_late_init_mcelog(void)
0403 {
0404     int ret;
0405 
0406     /* Only DOM0 is responsible for MCE logging */
0407     if (!xen_initial_domain())
0408         return -ENODEV;
0409 
0410     /* register character device /dev/mcelog for xen mcelog */
0411     ret = misc_register(&xen_mce_chrdev_device);
0412     if (ret)
0413         return ret;
0414 
0415     ret = bind_virq_for_mce();
0416     if (ret)
0417         goto deregister;
0418 
0419     pr_info("/dev/mcelog registered by Xen\n");
0420 
0421     return 0;
0422 
0423 deregister:
0424     misc_deregister(&xen_mce_chrdev_device);
0425     return ret;
0426 }
0427 device_initcall(xen_late_init_mcelog);