0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035 #define pr_fmt(fmt) "xen_mcelog: " fmt
0036
0037 #include <linux/init.h>
0038 #include <linux/types.h>
0039 #include <linux/kernel.h>
0040 #include <linux/slab.h>
0041 #include <linux/fs.h>
0042 #include <linux/device.h>
0043 #include <linux/miscdevice.h>
0044 #include <linux/uaccess.h>
0045 #include <linux/capability.h>
0046 #include <linux/poll.h>
0047 #include <linux/sched.h>
0048
0049 #include <xen/interface/xen.h>
0050 #include <xen/events.h>
0051 #include <xen/interface/vcpu.h>
0052 #include <xen/xen.h>
0053 #include <asm/xen/hypercall.h>
0054 #include <asm/xen/hypervisor.h>
0055
0056 static struct mc_info g_mi;
0057 static struct mcinfo_logical_cpu *g_physinfo;
0058 static uint32_t ncpus;
0059
0060 static DEFINE_MUTEX(mcelog_lock);
0061
0062 static struct xen_mce_log xen_mcelog = {
0063 .signature = XEN_MCE_LOG_SIGNATURE,
0064 .len = XEN_MCE_LOG_LEN,
0065 .recordlen = sizeof(struct xen_mce),
0066 };
0067
0068 static DEFINE_SPINLOCK(xen_mce_chrdev_state_lock);
0069 static int xen_mce_chrdev_open_count;
0070 static int xen_mce_chrdev_open_exclu;
0071
0072 static DECLARE_WAIT_QUEUE_HEAD(xen_mce_chrdev_wait);
0073
0074 static int xen_mce_chrdev_open(struct inode *inode, struct file *file)
0075 {
0076 spin_lock(&xen_mce_chrdev_state_lock);
0077
0078 if (xen_mce_chrdev_open_exclu ||
0079 (xen_mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
0080 spin_unlock(&xen_mce_chrdev_state_lock);
0081
0082 return -EBUSY;
0083 }
0084
0085 if (file->f_flags & O_EXCL)
0086 xen_mce_chrdev_open_exclu = 1;
0087 xen_mce_chrdev_open_count++;
0088
0089 spin_unlock(&xen_mce_chrdev_state_lock);
0090
0091 return nonseekable_open(inode, file);
0092 }
0093
0094 static int xen_mce_chrdev_release(struct inode *inode, struct file *file)
0095 {
0096 spin_lock(&xen_mce_chrdev_state_lock);
0097
0098 xen_mce_chrdev_open_count--;
0099 xen_mce_chrdev_open_exclu = 0;
0100
0101 spin_unlock(&xen_mce_chrdev_state_lock);
0102
0103 return 0;
0104 }
0105
0106 static ssize_t xen_mce_chrdev_read(struct file *filp, char __user *ubuf,
0107 size_t usize, loff_t *off)
0108 {
0109 char __user *buf = ubuf;
0110 unsigned num;
0111 int i, err;
0112
0113 mutex_lock(&mcelog_lock);
0114
0115 num = xen_mcelog.next;
0116
0117
0118 err = -EINVAL;
0119 if (*off != 0 || usize < XEN_MCE_LOG_LEN*sizeof(struct xen_mce))
0120 goto out;
0121
0122 err = 0;
0123 for (i = 0; i < num; i++) {
0124 struct xen_mce *m = &xen_mcelog.entry[i];
0125
0126 err |= copy_to_user(buf, m, sizeof(*m));
0127 buf += sizeof(*m);
0128 }
0129
0130 memset(xen_mcelog.entry, 0, num * sizeof(struct xen_mce));
0131 xen_mcelog.next = 0;
0132
0133 if (err)
0134 err = -EFAULT;
0135
0136 out:
0137 mutex_unlock(&mcelog_lock);
0138
0139 return err ? err : buf - ubuf;
0140 }
0141
0142 static __poll_t xen_mce_chrdev_poll(struct file *file, poll_table *wait)
0143 {
0144 poll_wait(file, &xen_mce_chrdev_wait, wait);
0145
0146 if (xen_mcelog.next)
0147 return EPOLLIN | EPOLLRDNORM;
0148
0149 return 0;
0150 }
0151
0152 static long xen_mce_chrdev_ioctl(struct file *f, unsigned int cmd,
0153 unsigned long arg)
0154 {
0155 int __user *p = (int __user *)arg;
0156
0157 if (!capable(CAP_SYS_ADMIN))
0158 return -EPERM;
0159
0160 switch (cmd) {
0161 case MCE_GET_RECORD_LEN:
0162 return put_user(sizeof(struct xen_mce), p);
0163 case MCE_GET_LOG_LEN:
0164 return put_user(XEN_MCE_LOG_LEN, p);
0165 case MCE_GETCLEAR_FLAGS: {
0166 unsigned flags;
0167
0168 do {
0169 flags = xen_mcelog.flags;
0170 } while (cmpxchg(&xen_mcelog.flags, flags, 0) != flags);
0171
0172 return put_user(flags, p);
0173 }
0174 default:
0175 return -ENOTTY;
0176 }
0177 }
0178
0179 static const struct file_operations xen_mce_chrdev_ops = {
0180 .open = xen_mce_chrdev_open,
0181 .release = xen_mce_chrdev_release,
0182 .read = xen_mce_chrdev_read,
0183 .poll = xen_mce_chrdev_poll,
0184 .unlocked_ioctl = xen_mce_chrdev_ioctl,
0185 .llseek = no_llseek,
0186 };
0187
0188 static struct miscdevice xen_mce_chrdev_device = {
0189 MISC_MCELOG_MINOR,
0190 "mcelog",
0191 &xen_mce_chrdev_ops,
0192 };
0193
0194
0195
0196
0197 static void xen_mce_log(struct xen_mce *mce)
0198 {
0199 unsigned entry;
0200
0201 entry = xen_mcelog.next;
0202
0203
0204
0205
0206
0207
0208 if (entry >= XEN_MCE_LOG_LEN) {
0209 set_bit(XEN_MCE_OVERFLOW,
0210 (unsigned long *)&xen_mcelog.flags);
0211 return;
0212 }
0213
0214 memcpy(xen_mcelog.entry + entry, mce, sizeof(struct xen_mce));
0215
0216 xen_mcelog.next++;
0217 }
0218
0219 static int convert_log(struct mc_info *mi)
0220 {
0221 struct mcinfo_common *mic;
0222 struct mcinfo_global *mc_global;
0223 struct mcinfo_bank *mc_bank;
0224 struct xen_mce m;
0225 unsigned int i, j;
0226
0227 mic = NULL;
0228 x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
0229 if (unlikely(!mic)) {
0230 pr_warn("Failed to find global error info\n");
0231 return -ENODEV;
0232 }
0233
0234 memset(&m, 0, sizeof(struct xen_mce));
0235
0236 mc_global = (struct mcinfo_global *)mic;
0237 m.mcgstatus = mc_global->mc_gstatus;
0238 m.apicid = mc_global->mc_apicid;
0239
0240 for (i = 0; i < ncpus; i++)
0241 if (g_physinfo[i].mc_apicid == m.apicid)
0242 break;
0243 if (unlikely(i == ncpus)) {
0244 pr_warn("Failed to match cpu with apicid %d\n", m.apicid);
0245 return -ENODEV;
0246 }
0247
0248 m.socketid = g_physinfo[i].mc_chipid;
0249 m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
0250 m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
0251 for (j = 0; j < g_physinfo[i].mc_nmsrvals; ++j)
0252 switch (g_physinfo[i].mc_msrvalues[j].reg) {
0253 case MSR_IA32_MCG_CAP:
0254 m.mcgcap = g_physinfo[i].mc_msrvalues[j].value;
0255 break;
0256
0257 case MSR_PPIN:
0258 case MSR_AMD_PPIN:
0259 m.ppin = g_physinfo[i].mc_msrvalues[j].value;
0260 break;
0261 }
0262
0263 mic = NULL;
0264 x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
0265 if (unlikely(!mic)) {
0266 pr_warn("Fail to find bank error info\n");
0267 return -ENODEV;
0268 }
0269
0270 do {
0271 if ((!mic) || (mic->size == 0) ||
0272 (mic->type != MC_TYPE_GLOBAL &&
0273 mic->type != MC_TYPE_BANK &&
0274 mic->type != MC_TYPE_EXTENDED &&
0275 mic->type != MC_TYPE_RECOVERY))
0276 break;
0277
0278 if (mic->type == MC_TYPE_BANK) {
0279 mc_bank = (struct mcinfo_bank *)mic;
0280 m.misc = mc_bank->mc_misc;
0281 m.status = mc_bank->mc_status;
0282 m.addr = mc_bank->mc_addr;
0283 m.tsc = mc_bank->mc_tsc;
0284 m.bank = mc_bank->mc_bank;
0285 m.finished = 1;
0286
0287 xen_mce_log(&m);
0288 }
0289 mic = x86_mcinfo_next(mic);
0290 } while (1);
0291
0292 return 0;
0293 }
0294
0295 static int mc_queue_handle(uint32_t flags)
0296 {
0297 struct xen_mc mc_op;
0298 int ret = 0;
0299
0300 mc_op.cmd = XEN_MC_fetch;
0301 set_xen_guest_handle(mc_op.u.mc_fetch.data, &g_mi);
0302 do {
0303 mc_op.u.mc_fetch.flags = flags;
0304 ret = HYPERVISOR_mca(&mc_op);
0305 if (ret) {
0306 pr_err("Failed to fetch %surgent error log\n",
0307 flags == XEN_MC_URGENT ? "" : "non");
0308 break;
0309 }
0310
0311 if (mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
0312 mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
0313 break;
0314 else {
0315 ret = convert_log(&g_mi);
0316 if (ret)
0317 pr_warn("Failed to convert this error log, continue acking it anyway\n");
0318
0319 mc_op.u.mc_fetch.flags = flags | XEN_MC_ACK;
0320 ret = HYPERVISOR_mca(&mc_op);
0321 if (ret) {
0322 pr_err("Failed to ack previous error log\n");
0323 break;
0324 }
0325 }
0326 } while (1);
0327
0328 return ret;
0329 }
0330
0331
0332 static void xen_mce_work_fn(struct work_struct *work)
0333 {
0334 int err;
0335
0336 mutex_lock(&mcelog_lock);
0337
0338
0339 err = mc_queue_handle(XEN_MC_URGENT);
0340 if (err)
0341 pr_err("Failed to handle urgent mc_info queue, continue handling nonurgent mc_info queue anyway\n");
0342
0343
0344 err = mc_queue_handle(XEN_MC_NONURGENT);
0345 if (err)
0346 pr_err("Failed to handle nonurgent mc_info queue\n");
0347
0348
0349 wake_up_interruptible(&xen_mce_chrdev_wait);
0350
0351 mutex_unlock(&mcelog_lock);
0352 }
0353 static DECLARE_WORK(xen_mce_work, xen_mce_work_fn);
0354
0355 static irqreturn_t xen_mce_interrupt(int irq, void *dev_id)
0356 {
0357 schedule_work(&xen_mce_work);
0358 return IRQ_HANDLED;
0359 }
0360
0361 static int bind_virq_for_mce(void)
0362 {
0363 int ret;
0364 struct xen_mc mc_op;
0365
0366 memset(&mc_op, 0, sizeof(struct xen_mc));
0367
0368
0369 mc_op.cmd = XEN_MC_physcpuinfo;
0370 set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
0371 ret = HYPERVISOR_mca(&mc_op);
0372 if (ret) {
0373 pr_err("Failed to get CPU numbers\n");
0374 return ret;
0375 }
0376
0377
0378 ncpus = mc_op.u.mc_physcpuinfo.ncpus;
0379 g_physinfo = kcalloc(ncpus, sizeof(struct mcinfo_logical_cpu),
0380 GFP_KERNEL);
0381 if (!g_physinfo)
0382 return -ENOMEM;
0383 set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
0384 ret = HYPERVISOR_mca(&mc_op);
0385 if (ret) {
0386 pr_err("Failed to get CPU info\n");
0387 kfree(g_physinfo);
0388 return ret;
0389 }
0390
0391 ret = bind_virq_to_irqhandler(VIRQ_MCA, 0,
0392 xen_mce_interrupt, 0, "mce", NULL);
0393 if (ret < 0) {
0394 pr_err("Failed to bind virq\n");
0395 kfree(g_physinfo);
0396 return ret;
0397 }
0398
0399 return 0;
0400 }
0401
0402 static int __init xen_late_init_mcelog(void)
0403 {
0404 int ret;
0405
0406
0407 if (!xen_initial_domain())
0408 return -ENODEV;
0409
0410
0411 ret = misc_register(&xen_mce_chrdev_device);
0412 if (ret)
0413 return ret;
0414
0415 ret = bind_virq_for_mce();
0416 if (ret)
0417 goto deregister;
0418
0419 pr_info("/dev/mcelog registered by Xen\n");
0420
0421 return 0;
0422
0423 deregister:
0424 misc_deregister(&xen_mce_chrdev_device);
0425 return ret;
0426 }
0427 device_initcall(xen_late_init_mcelog);