0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/types.h>
0009 #include <linux/errno.h>
0010 #include <linux/sched.h>
0011 #include <linux/kernel.h>
0012 #include <linux/poll.h>
0013 #include <linux/proc_fs.h>
0014 #include <linux/init.h>
0015 #include <linux/vmalloc.h>
0016 #include <linux/spinlock.h>
0017 #include <linux/cpu.h>
0018 #include <linux/workqueue.h>
0019 #include <linux/slab.h>
0020 #include <linux/topology.h>
0021
0022 #include <linux/uaccess.h>
0023 #include <asm/io.h>
0024 #include <asm/rtas.h>
0025 #include <asm/nvram.h>
0026 #include <linux/atomic.h>
0027 #include <asm/machdep.h>
0028 #include <asm/topology.h>
0029
0030
0031 static DEFINE_SPINLOCK(rtasd_log_lock);
0032
0033 static DECLARE_WAIT_QUEUE_HEAD(rtas_log_wait);
0034
0035 static char *rtas_log_buf;
0036 static unsigned long rtas_log_start;
0037 static unsigned long rtas_log_size;
0038
0039 static int surveillance_timeout = -1;
0040
0041 static unsigned int rtas_error_log_max;
0042 static unsigned int rtas_error_log_buffer_max;
0043
0044
0045 static unsigned int event_scan;
0046 static unsigned int rtas_event_scan_rate;
0047
0048 static bool full_rtas_msgs;
0049
0050
0051 static int logging_enabled;
0052
0053
0054 static int error_log_cnt;
0055
0056
0057
0058
0059
0060
0061 static unsigned char logdata[RTAS_ERROR_LOG_MAX];
0062
0063 static char *rtas_type[] = {
0064 "Unknown", "Retry", "TCE Error", "Internal Device Failure",
0065 "Timeout", "Data Parity", "Address Parity", "Cache Parity",
0066 "Address Invalid", "ECC Uncorrected", "ECC Corrupted",
0067 };
0068
0069 static char *rtas_event_type(int type)
0070 {
0071 if ((type > 0) && (type < 11))
0072 return rtas_type[type];
0073
0074 switch (type) {
0075 case RTAS_TYPE_EPOW:
0076 return "EPOW";
0077 case RTAS_TYPE_PLATFORM:
0078 return "Platform Error";
0079 case RTAS_TYPE_IO:
0080 return "I/O Event";
0081 case RTAS_TYPE_INFO:
0082 return "Platform Information Event";
0083 case RTAS_TYPE_DEALLOC:
0084 return "Resource Deallocation Event";
0085 case RTAS_TYPE_DUMP:
0086 return "Dump Notification Event";
0087 case RTAS_TYPE_PRRN:
0088 return "Platform Resource Reassignment Event";
0089 case RTAS_TYPE_HOTPLUG:
0090 return "Hotplug Event";
0091 }
0092
0093 return rtas_type[0];
0094 }
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109 static void printk_log_rtas(char *buf, int len)
0110 {
0111
0112 int i,j,n = 0;
0113 int perline = 16;
0114 char buffer[64];
0115 char * str = "RTAS event";
0116
0117 if (full_rtas_msgs) {
0118 printk(RTAS_DEBUG "%d -------- %s begin --------\n",
0119 error_log_cnt, str);
0120
0121
0122
0123
0124
0125
0126
0127 for (i = 0; i < len; i++) {
0128 j = i % perline;
0129 if (j == 0) {
0130 memset(buffer, 0, sizeof(buffer));
0131 n = sprintf(buffer, "RTAS %d:", i/perline);
0132 }
0133
0134 if ((i % 4) == 0)
0135 n += sprintf(buffer+n, " ");
0136
0137 n += sprintf(buffer+n, "%02x", (unsigned char)buf[i]);
0138
0139 if (j == (perline-1))
0140 printk(KERN_DEBUG "%s\n", buffer);
0141 }
0142 if ((i % perline) != 0)
0143 printk(KERN_DEBUG "%s\n", buffer);
0144
0145 printk(RTAS_DEBUG "%d -------- %s end ----------\n",
0146 error_log_cnt, str);
0147 } else {
0148 struct rtas_error_log *errlog = (struct rtas_error_log *)buf;
0149
0150 printk(RTAS_DEBUG "event: %d, Type: %s (%d), Severity: %d\n",
0151 error_log_cnt,
0152 rtas_event_type(rtas_error_type(errlog)),
0153 rtas_error_type(errlog),
0154 rtas_error_severity(errlog));
0155 }
0156 }
0157
0158 static int log_rtas_len(char * buf)
0159 {
0160 int len;
0161 struct rtas_error_log *err;
0162 uint32_t extended_log_length;
0163
0164
0165 len = 8;
0166 err = (struct rtas_error_log *)buf;
0167 extended_log_length = rtas_error_extended_log_length(err);
0168 if (rtas_error_extended(err) && extended_log_length) {
0169
0170
0171 len += extended_log_length;
0172 }
0173
0174 if (rtas_error_log_max == 0)
0175 rtas_error_log_max = rtas_get_error_log_max();
0176
0177 if (len > rtas_error_log_max)
0178 len = rtas_error_log_max;
0179
0180 return len;
0181 }
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197 void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
0198 {
0199 unsigned long offset;
0200 unsigned long s;
0201 int len = 0;
0202
0203 pr_debug("rtasd: logging event\n");
0204 if (buf == NULL)
0205 return;
0206
0207 spin_lock_irqsave(&rtasd_log_lock, s);
0208
0209
0210 switch (err_type & ERR_TYPE_MASK) {
0211 case ERR_TYPE_RTAS_LOG:
0212 len = log_rtas_len(buf);
0213 if (!(err_type & ERR_FLAG_BOOT))
0214 error_log_cnt++;
0215 break;
0216 case ERR_TYPE_KERNEL_PANIC:
0217 default:
0218 WARN_ON_ONCE(!irqs_disabled());
0219 spin_unlock_irqrestore(&rtasd_log_lock, s);
0220 return;
0221 }
0222
0223 #ifdef CONFIG_PPC64
0224
0225 if (logging_enabled && !(err_type & ERR_FLAG_BOOT))
0226 nvram_write_error_log(buf, len, err_type, error_log_cnt);
0227 #endif
0228
0229
0230
0231
0232
0233
0234 if ((err_type & ERR_TYPE_MASK) == ERR_TYPE_RTAS_LOG)
0235 printk_log_rtas(buf, len);
0236
0237
0238 if (fatal || !logging_enabled) {
0239 logging_enabled = 0;
0240 WARN_ON_ONCE(!irqs_disabled());
0241 spin_unlock_irqrestore(&rtasd_log_lock, s);
0242 return;
0243 }
0244
0245
0246 switch (err_type & ERR_TYPE_MASK) {
0247 case ERR_TYPE_RTAS_LOG:
0248 offset = rtas_error_log_buffer_max *
0249 ((rtas_log_start+rtas_log_size) & LOG_NUMBER_MASK);
0250
0251
0252 memcpy(&rtas_log_buf[offset], (void *) &error_log_cnt, sizeof(int));
0253
0254
0255 offset += sizeof(int);
0256 memcpy(&rtas_log_buf[offset], buf, len);
0257
0258 if (rtas_log_size < LOG_NUMBER)
0259 rtas_log_size += 1;
0260 else
0261 rtas_log_start += 1;
0262
0263 WARN_ON_ONCE(!irqs_disabled());
0264 spin_unlock_irqrestore(&rtasd_log_lock, s);
0265 wake_up_interruptible(&rtas_log_wait);
0266 break;
0267 case ERR_TYPE_KERNEL_PANIC:
0268 default:
0269 WARN_ON_ONCE(!irqs_disabled());
0270 spin_unlock_irqrestore(&rtasd_log_lock, s);
0271 return;
0272 }
0273 }
0274
0275 static void handle_rtas_event(const struct rtas_error_log *log)
0276 {
0277 if (!machine_is(pseries))
0278 return;
0279
0280 if (rtas_error_type(log) == RTAS_TYPE_PRRN)
0281 pr_info_ratelimited("Platform resource reassignment ignored.\n");
0282 }
0283
0284 static int rtas_log_open(struct inode * inode, struct file * file)
0285 {
0286 return 0;
0287 }
0288
0289 static int rtas_log_release(struct inode * inode, struct file * file)
0290 {
0291 return 0;
0292 }
0293
0294
0295
0296
0297
0298 static ssize_t rtas_log_read(struct file * file, char __user * buf,
0299 size_t count, loff_t *ppos)
0300 {
0301 int error;
0302 char *tmp;
0303 unsigned long s;
0304 unsigned long offset;
0305
0306 if (!buf || count < rtas_error_log_buffer_max)
0307 return -EINVAL;
0308
0309 count = rtas_error_log_buffer_max;
0310
0311 if (!access_ok(buf, count))
0312 return -EFAULT;
0313
0314 tmp = kmalloc(count, GFP_KERNEL);
0315 if (!tmp)
0316 return -ENOMEM;
0317
0318 spin_lock_irqsave(&rtasd_log_lock, s);
0319
0320
0321 while (rtas_log_size == 0) {
0322 if (file->f_flags & O_NONBLOCK) {
0323 spin_unlock_irqrestore(&rtasd_log_lock, s);
0324 error = -EAGAIN;
0325 goto out;
0326 }
0327
0328 if (!logging_enabled) {
0329 spin_unlock_irqrestore(&rtasd_log_lock, s);
0330 error = -ENODATA;
0331 goto out;
0332 }
0333 #ifdef CONFIG_PPC64
0334 nvram_clear_error_log();
0335 #endif
0336
0337 spin_unlock_irqrestore(&rtasd_log_lock, s);
0338 error = wait_event_interruptible(rtas_log_wait, rtas_log_size);
0339 if (error)
0340 goto out;
0341 spin_lock_irqsave(&rtasd_log_lock, s);
0342 }
0343
0344 offset = rtas_error_log_buffer_max * (rtas_log_start & LOG_NUMBER_MASK);
0345 memcpy(tmp, &rtas_log_buf[offset], count);
0346
0347 rtas_log_start += 1;
0348 rtas_log_size -= 1;
0349 spin_unlock_irqrestore(&rtasd_log_lock, s);
0350
0351 error = copy_to_user(buf, tmp, count) ? -EFAULT : count;
0352 out:
0353 kfree(tmp);
0354 return error;
0355 }
0356
0357 static __poll_t rtas_log_poll(struct file *file, poll_table * wait)
0358 {
0359 poll_wait(file, &rtas_log_wait, wait);
0360 if (rtas_log_size)
0361 return EPOLLIN | EPOLLRDNORM;
0362 return 0;
0363 }
0364
0365 static const struct proc_ops rtas_log_proc_ops = {
0366 .proc_read = rtas_log_read,
0367 .proc_poll = rtas_log_poll,
0368 .proc_open = rtas_log_open,
0369 .proc_release = rtas_log_release,
0370 .proc_lseek = noop_llseek,
0371 };
0372
0373 static int enable_surveillance(int timeout)
0374 {
0375 int error;
0376
0377 error = rtas_set_indicator(SURVEILLANCE_TOKEN, 0, timeout);
0378
0379 if (error == 0)
0380 return 0;
0381
0382 if (error == -EINVAL) {
0383 printk(KERN_DEBUG "rtasd: surveillance not supported\n");
0384 return 0;
0385 }
0386
0387 printk(KERN_ERR "rtasd: could not update surveillance\n");
0388 return -1;
0389 }
0390
0391 static void do_event_scan(void)
0392 {
0393 int error;
0394 do {
0395 memset(logdata, 0, rtas_error_log_max);
0396 error = rtas_call(event_scan, 4, 1, NULL,
0397 RTAS_EVENT_SCAN_ALL_EVENTS, 0,
0398 __pa(logdata), rtas_error_log_max);
0399 if (error == -1) {
0400 printk(KERN_ERR "event-scan failed\n");
0401 break;
0402 }
0403
0404 if (error == 0) {
0405 if (rtas_error_type((struct rtas_error_log *)logdata) !=
0406 RTAS_TYPE_PRRN)
0407 pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG,
0408 0);
0409 handle_rtas_event((struct rtas_error_log *)logdata);
0410 }
0411
0412 } while(error == 0);
0413 }
0414
0415 static void rtas_event_scan(struct work_struct *w);
0416 static DECLARE_DELAYED_WORK(event_scan_work, rtas_event_scan);
0417
0418
0419
0420
0421
0422 static unsigned long event_scan_delay = 1*HZ;
0423 static int first_pass = 1;
0424
0425 static void rtas_event_scan(struct work_struct *w)
0426 {
0427 unsigned int cpu;
0428
0429 do_event_scan();
0430
0431 cpus_read_lock();
0432
0433
0434 cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
0435 if (cpu >= nr_cpu_ids) {
0436 cpu = cpumask_first(cpu_online_mask);
0437
0438 if (first_pass) {
0439 first_pass = 0;
0440 event_scan_delay = 30*HZ/rtas_event_scan_rate;
0441
0442 if (surveillance_timeout != -1) {
0443 pr_debug("rtasd: enabling surveillance\n");
0444 enable_surveillance(surveillance_timeout);
0445 pr_debug("rtasd: surveillance enabled\n");
0446 }
0447 }
0448 }
0449
0450 schedule_delayed_work_on(cpu, &event_scan_work,
0451 __round_jiffies_relative(event_scan_delay, cpu));
0452
0453 cpus_read_unlock();
0454 }
0455
0456 #ifdef CONFIG_PPC64
0457 static void __init retrieve_nvram_error_log(void)
0458 {
0459 unsigned int err_type ;
0460 int rc ;
0461
0462
0463 memset(logdata, 0, rtas_error_log_max);
0464 rc = nvram_read_error_log(logdata, rtas_error_log_max,
0465 &err_type, &error_log_cnt);
0466
0467 logging_enabled = 1;
0468 if (!rc) {
0469 if (err_type != ERR_FLAG_ALREADY_LOGGED) {
0470 pSeries_log_error(logdata, err_type | ERR_FLAG_BOOT, 0);
0471 }
0472 }
0473 }
0474 #else
0475 static void __init retrieve_nvram_error_log(void)
0476 {
0477 }
0478 #endif
0479
0480 static void __init start_event_scan(void)
0481 {
0482 printk(KERN_DEBUG "RTAS daemon started\n");
0483 pr_debug("rtasd: will sleep for %d milliseconds\n",
0484 (30000 / rtas_event_scan_rate));
0485
0486
0487 retrieve_nvram_error_log();
0488
0489 schedule_delayed_work_on(cpumask_first(cpu_online_mask),
0490 &event_scan_work, event_scan_delay);
0491 }
0492
0493
0494 void rtas_cancel_event_scan(void)
0495 {
0496 cancel_delayed_work_sync(&event_scan_work);
0497 }
0498 EXPORT_SYMBOL_GPL(rtas_cancel_event_scan);
0499
0500 static int __init rtas_event_scan_init(void)
0501 {
0502 if (!machine_is(pseries) && !machine_is(chrp))
0503 return 0;
0504
0505
0506 event_scan = rtas_token("event-scan");
0507 if (event_scan == RTAS_UNKNOWN_SERVICE) {
0508 printk(KERN_INFO "rtasd: No event-scan on system\n");
0509 return -ENODEV;
0510 }
0511
0512 rtas_event_scan_rate = rtas_token("rtas-event-scan-rate");
0513 if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) {
0514 printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n");
0515 return -ENODEV;
0516 }
0517
0518 if (!rtas_event_scan_rate) {
0519
0520 printk(KERN_DEBUG "rtasd: scan rate is 0, not scanning\n");
0521 return 0;
0522 }
0523
0524
0525 rtas_error_log_max = rtas_get_error_log_max();
0526 rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int);
0527
0528 rtas_log_buf = vmalloc(array_size(LOG_NUMBER,
0529 rtas_error_log_buffer_max));
0530 if (!rtas_log_buf) {
0531 printk(KERN_ERR "rtasd: no memory\n");
0532 return -ENOMEM;
0533 }
0534
0535 start_event_scan();
0536
0537 return 0;
0538 }
0539 arch_initcall(rtas_event_scan_init);
0540
0541 static int __init rtas_init(void)
0542 {
0543 struct proc_dir_entry *entry;
0544
0545 if (!machine_is(pseries) && !machine_is(chrp))
0546 return 0;
0547
0548 if (!rtas_log_buf)
0549 return -ENODEV;
0550
0551 entry = proc_create("powerpc/rtas/error_log", 0400, NULL,
0552 &rtas_log_proc_ops);
0553 if (!entry)
0554 printk(KERN_ERR "Failed to create error_log proc entry\n");
0555
0556 return 0;
0557 }
0558 __initcall(rtas_init);
0559
0560 static int __init surveillance_setup(char *str)
0561 {
0562 int i;
0563
0564
0565 if (!machine_is(pseries))
0566 return 0;
0567
0568 if (get_option(&str,&i)) {
0569 if (i >= 0 && i <= 255)
0570 surveillance_timeout = i;
0571 }
0572
0573 return 1;
0574 }
0575 __setup("surveillance=", surveillance_setup);
0576
0577 static int __init rtasmsgs_setup(char *str)
0578 {
0579 return (kstrtobool(str, &full_rtas_msgs) == 0);
0580 }
0581 __setup("rtasmsgs=", rtasmsgs_setup);