Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * /dev/mcelog driver
0004  *
0005  * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
0006  * Rest from unknown author(s).
0007  * 2004 Andi Kleen. Rewrote most of it.
0008  * Copyright 2008 Intel Corporation
0009  * Author: Andi Kleen
0010  */
0011 
0012 #include <linux/miscdevice.h>
0013 #include <linux/slab.h>
0014 #include <linux/kmod.h>
0015 #include <linux/poll.h>
0016 
0017 #include "internal.h"
0018 
0019 static BLOCKING_NOTIFIER_HEAD(mce_injector_chain);
0020 
0021 static DEFINE_MUTEX(mce_chrdev_read_mutex);
0022 
0023 static char mce_helper[128];
0024 static char *mce_helper_argv[2] = { mce_helper, NULL };
0025 
0026 /*
0027  * Lockless MCE logging infrastructure.
0028  * This avoids deadlocks on printk locks without having to break locks. Also
0029  * separate MCEs from kernel messages to avoid bogus bug reports.
0030  */
0031 
0032 static struct mce_log_buffer *mcelog;
0033 
0034 static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
0035 
0036 static int dev_mce_log(struct notifier_block *nb, unsigned long val,
0037                 void *data)
0038 {
0039     struct mce *mce = (struct mce *)data;
0040     unsigned int entry;
0041 
0042     if (mce->kflags & MCE_HANDLED_CEC)
0043         return NOTIFY_DONE;
0044 
0045     mutex_lock(&mce_chrdev_read_mutex);
0046 
0047     entry = mcelog->next;
0048 
0049     /*
0050      * When the buffer fills up discard new entries. Assume that the
0051      * earlier errors are the more interesting ones:
0052      */
0053     if (entry >= mcelog->len) {
0054         set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog->flags);
0055         goto unlock;
0056     }
0057 
0058     mcelog->next = entry + 1;
0059 
0060     memcpy(mcelog->entry + entry, mce, sizeof(struct mce));
0061     mcelog->entry[entry].finished = 1;
0062     mcelog->entry[entry].kflags = 0;
0063 
0064     /* wake processes polling /dev/mcelog */
0065     wake_up_interruptible(&mce_chrdev_wait);
0066 
0067 unlock:
0068     mutex_unlock(&mce_chrdev_read_mutex);
0069 
0070     if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
0071         mce->kflags |= MCE_HANDLED_MCELOG;
0072 
0073     return NOTIFY_OK;
0074 }
0075 
0076 static struct notifier_block dev_mcelog_nb = {
0077     .notifier_call  = dev_mce_log,
0078     .priority   = MCE_PRIO_MCELOG,
0079 };
0080 
0081 static void mce_do_trigger(struct work_struct *work)
0082 {
0083     call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
0084 }
0085 
0086 static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
0087 
0088 
0089 void mce_work_trigger(void)
0090 {
0091     if (mce_helper[0])
0092         schedule_work(&mce_trigger_work);
0093 }
0094 
0095 static ssize_t
0096 show_trigger(struct device *s, struct device_attribute *attr, char *buf)
0097 {
0098     strcpy(buf, mce_helper);
0099     strcat(buf, "\n");
0100     return strlen(mce_helper) + 1;
0101 }
0102 
0103 static ssize_t set_trigger(struct device *s, struct device_attribute *attr,
0104                 const char *buf, size_t siz)
0105 {
0106     char *p;
0107 
0108     strncpy(mce_helper, buf, sizeof(mce_helper));
0109     mce_helper[sizeof(mce_helper)-1] = 0;
0110     p = strchr(mce_helper, '\n');
0111 
0112     if (p)
0113         *p = 0;
0114 
0115     return strlen(mce_helper) + !!p;
0116 }
0117 
0118 DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
0119 
0120 /*
0121  * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log.
0122  */
0123 
0124 static DEFINE_SPINLOCK(mce_chrdev_state_lock);
0125 static int mce_chrdev_open_count;   /* #times opened */
0126 static int mce_chrdev_open_exclu;   /* already open exclusive? */
0127 
0128 static int mce_chrdev_open(struct inode *inode, struct file *file)
0129 {
0130     spin_lock(&mce_chrdev_state_lock);
0131 
0132     if (mce_chrdev_open_exclu ||
0133         (mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
0134         spin_unlock(&mce_chrdev_state_lock);
0135 
0136         return -EBUSY;
0137     }
0138 
0139     if (file->f_flags & O_EXCL)
0140         mce_chrdev_open_exclu = 1;
0141     mce_chrdev_open_count++;
0142 
0143     spin_unlock(&mce_chrdev_state_lock);
0144 
0145     return nonseekable_open(inode, file);
0146 }
0147 
0148 static int mce_chrdev_release(struct inode *inode, struct file *file)
0149 {
0150     spin_lock(&mce_chrdev_state_lock);
0151 
0152     mce_chrdev_open_count--;
0153     mce_chrdev_open_exclu = 0;
0154 
0155     spin_unlock(&mce_chrdev_state_lock);
0156 
0157     return 0;
0158 }
0159 
0160 static int mce_apei_read_done;
0161 
0162 /* Collect MCE record of previous boot in persistent storage via APEI ERST. */
0163 static int __mce_read_apei(char __user **ubuf, size_t usize)
0164 {
0165     int rc;
0166     u64 record_id;
0167     struct mce m;
0168 
0169     if (usize < sizeof(struct mce))
0170         return -EINVAL;
0171 
0172     rc = apei_read_mce(&m, &record_id);
0173     /* Error or no more MCE record */
0174     if (rc <= 0) {
0175         mce_apei_read_done = 1;
0176         /*
0177          * When ERST is disabled, mce_chrdev_read() should return
0178          * "no record" instead of "no device."
0179          */
0180         if (rc == -ENODEV)
0181             return 0;
0182         return rc;
0183     }
0184     rc = -EFAULT;
0185     if (copy_to_user(*ubuf, &m, sizeof(struct mce)))
0186         return rc;
0187     /*
0188      * In fact, we should have cleared the record after that has
0189      * been flushed to the disk or sent to network in
0190      * /sbin/mcelog, but we have no interface to support that now,
0191      * so just clear it to avoid duplication.
0192      */
0193     rc = apei_clear_mce(record_id);
0194     if (rc) {
0195         mce_apei_read_done = 1;
0196         return rc;
0197     }
0198     *ubuf += sizeof(struct mce);
0199 
0200     return 0;
0201 }
0202 
0203 static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
0204                 size_t usize, loff_t *off)
0205 {
0206     char __user *buf = ubuf;
0207     unsigned next;
0208     int i, err;
0209 
0210     mutex_lock(&mce_chrdev_read_mutex);
0211 
0212     if (!mce_apei_read_done) {
0213         err = __mce_read_apei(&buf, usize);
0214         if (err || buf != ubuf)
0215             goto out;
0216     }
0217 
0218     /* Only supports full reads right now */
0219     err = -EINVAL;
0220     if (*off != 0 || usize < mcelog->len * sizeof(struct mce))
0221         goto out;
0222 
0223     next = mcelog->next;
0224     err = 0;
0225 
0226     for (i = 0; i < next; i++) {
0227         struct mce *m = &mcelog->entry[i];
0228 
0229         err |= copy_to_user(buf, m, sizeof(*m));
0230         buf += sizeof(*m);
0231     }
0232 
0233     memset(mcelog->entry, 0, next * sizeof(struct mce));
0234     mcelog->next = 0;
0235 
0236     if (err)
0237         err = -EFAULT;
0238 
0239 out:
0240     mutex_unlock(&mce_chrdev_read_mutex);
0241 
0242     return err ? err : buf - ubuf;
0243 }
0244 
0245 static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait)
0246 {
0247     poll_wait(file, &mce_chrdev_wait, wait);
0248     if (READ_ONCE(mcelog->next))
0249         return EPOLLIN | EPOLLRDNORM;
0250     if (!mce_apei_read_done && apei_check_mce())
0251         return EPOLLIN | EPOLLRDNORM;
0252     return 0;
0253 }
0254 
0255 static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
0256                 unsigned long arg)
0257 {
0258     int __user *p = (int __user *)arg;
0259 
0260     if (!capable(CAP_SYS_ADMIN))
0261         return -EPERM;
0262 
0263     switch (cmd) {
0264     case MCE_GET_RECORD_LEN:
0265         return put_user(sizeof(struct mce), p);
0266     case MCE_GET_LOG_LEN:
0267         return put_user(mcelog->len, p);
0268     case MCE_GETCLEAR_FLAGS: {
0269         unsigned flags;
0270 
0271         do {
0272             flags = mcelog->flags;
0273         } while (cmpxchg(&mcelog->flags, flags, 0) != flags);
0274 
0275         return put_user(flags, p);
0276     }
0277     default:
0278         return -ENOTTY;
0279     }
0280 }
0281 
0282 void mce_register_injector_chain(struct notifier_block *nb)
0283 {
0284     blocking_notifier_chain_register(&mce_injector_chain, nb);
0285 }
0286 EXPORT_SYMBOL_GPL(mce_register_injector_chain);
0287 
0288 void mce_unregister_injector_chain(struct notifier_block *nb)
0289 {
0290     blocking_notifier_chain_unregister(&mce_injector_chain, nb);
0291 }
0292 EXPORT_SYMBOL_GPL(mce_unregister_injector_chain);
0293 
0294 static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
0295                 size_t usize, loff_t *off)
0296 {
0297     struct mce m;
0298 
0299     if (!capable(CAP_SYS_ADMIN))
0300         return -EPERM;
0301     /*
0302      * There are some cases where real MSR reads could slip
0303      * through.
0304      */
0305     if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
0306         return -EIO;
0307 
0308     if ((unsigned long)usize > sizeof(struct mce))
0309         usize = sizeof(struct mce);
0310     if (copy_from_user(&m, ubuf, usize))
0311         return -EFAULT;
0312 
0313     if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
0314         return -EINVAL;
0315 
0316     /*
0317      * Need to give user space some time to set everything up,
0318      * so do it a jiffie or two later everywhere.
0319      */
0320     schedule_timeout(2);
0321 
0322     blocking_notifier_call_chain(&mce_injector_chain, 0, &m);
0323 
0324     return usize;
0325 }
0326 
0327 static const struct file_operations mce_chrdev_ops = {
0328     .open           = mce_chrdev_open,
0329     .release        = mce_chrdev_release,
0330     .read           = mce_chrdev_read,
0331     .write          = mce_chrdev_write,
0332     .poll           = mce_chrdev_poll,
0333     .unlocked_ioctl     = mce_chrdev_ioctl,
0334     .compat_ioctl       = compat_ptr_ioctl,
0335     .llseek         = no_llseek,
0336 };
0337 
0338 static struct miscdevice mce_chrdev_device = {
0339     MISC_MCELOG_MINOR,
0340     "mcelog",
0341     &mce_chrdev_ops,
0342 };
0343 
0344 static __init int dev_mcelog_init_device(void)
0345 {
0346     int mce_log_len;
0347     int err;
0348 
0349     mce_log_len = max(MCE_LOG_MIN_LEN, num_online_cpus());
0350     mcelog = kzalloc(struct_size(mcelog, entry, mce_log_len), GFP_KERNEL);
0351     if (!mcelog)
0352         return -ENOMEM;
0353 
0354     memcpy(mcelog->signature, MCE_LOG_SIGNATURE, sizeof(mcelog->signature));
0355     mcelog->len = mce_log_len;
0356     mcelog->recordlen = sizeof(struct mce);
0357 
0358     /* register character device /dev/mcelog */
0359     err = misc_register(&mce_chrdev_device);
0360     if (err) {
0361         if (err == -EBUSY)
0362             /* Xen dom0 might have registered the device already. */
0363             pr_info("Unable to init device /dev/mcelog, already registered");
0364         else
0365             pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
0366 
0367         kfree(mcelog);
0368         return err;
0369     }
0370 
0371     mce_register_decode_chain(&dev_mcelog_nb);
0372     return 0;
0373 }
0374 device_initcall_sync(dev_mcelog_init_device);