Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  *  linux/kernel/printk.c
0004  *
0005  *  Copyright (C) 1991, 1992  Linus Torvalds
0006  *
0007  * Modified to make sys_syslog() more flexible: added commands to
0008  * return the last 4k of kernel messages, regardless of whether
0009  * they've been read or not.  Added option to suppress kernel printk's
0010  * to the console.  Added hook for sending the console messages
0011  * elsewhere, in preparation for a serial line console (someday).
0012  * Ted Ts'o, 2/11/93.
0013  * Modified for sysctl support, 1/8/97, Chris Horn.
0014  * Fixed SMP synchronization, 08/08/99, Manfred Spraul
0015  *     manfred@colorfullife.com
0016  * Rewrote bits to get rid of console_lock
0017  *  01Mar01 Andrew Morton
0018  */
0019 
0020 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
0021 
0022 #include <linux/kernel.h>
0023 #include <linux/mm.h>
0024 #include <linux/tty.h>
0025 #include <linux/tty_driver.h>
0026 #include <linux/console.h>
0027 #include <linux/init.h>
0028 #include <linux/jiffies.h>
0029 #include <linux/nmi.h>
0030 #include <linux/module.h>
0031 #include <linux/moduleparam.h>
0032 #include <linux/delay.h>
0033 #include <linux/smp.h>
0034 #include <linux/security.h>
0035 #include <linux/memblock.h>
0036 #include <linux/syscalls.h>
0037 #include <linux/crash_core.h>
0038 #include <linux/ratelimit.h>
0039 #include <linux/kmsg_dump.h>
0040 #include <linux/syslog.h>
0041 #include <linux/cpu.h>
0042 #include <linux/rculist.h>
0043 #include <linux/poll.h>
0044 #include <linux/irq_work.h>
0045 #include <linux/ctype.h>
0046 #include <linux/uio.h>
0047 #include <linux/sched/clock.h>
0048 #include <linux/sched/debug.h>
0049 #include <linux/sched/task_stack.h>
0050 
0051 #include <linux/uaccess.h>
0052 #include <asm/sections.h>
0053 
0054 #include <trace/events/initcall.h>
0055 #define CREATE_TRACE_POINTS
0056 #include <trace/events/printk.h>
0057 
0058 #include "printk_ringbuffer.h"
0059 #include "console_cmdline.h"
0060 #include "braille.h"
0061 #include "internal.h"
0062 
0063 int console_printk[4] = {
0064     CONSOLE_LOGLEVEL_DEFAULT,   /* console_loglevel */
0065     MESSAGE_LOGLEVEL_DEFAULT,   /* default_message_loglevel */
0066     CONSOLE_LOGLEVEL_MIN,       /* minimum_console_loglevel */
0067     CONSOLE_LOGLEVEL_DEFAULT,   /* default_console_loglevel */
0068 };
0069 EXPORT_SYMBOL_GPL(console_printk);
0070 
0071 atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0);
0072 EXPORT_SYMBOL(ignore_console_lock_warning);
0073 
0074 /*
0075  * Low level drivers may need that to know if they can schedule in
0076  * their unblank() callback or not. So let's export it.
0077  */
0078 int oops_in_progress;
0079 EXPORT_SYMBOL(oops_in_progress);
0080 
0081 /*
0082  * console_sem protects the console_drivers list, and also
0083  * provides serialisation for access to the entire console
0084  * driver system.
0085  */
0086 static DEFINE_SEMAPHORE(console_sem);
0087 struct console *console_drivers;
0088 EXPORT_SYMBOL_GPL(console_drivers);
0089 
0090 /*
0091  * System may need to suppress printk message under certain
0092  * circumstances, like after kernel panic happens.
0093  */
0094 int __read_mostly suppress_printk;
0095 
0096 /*
0097  * During panic, heavy printk by other CPUs can delay the
0098  * panic and risk deadlock on console resources.
0099  */
0100 static int __read_mostly suppress_panic_printk;
0101 
0102 #ifdef CONFIG_LOCKDEP
0103 static struct lockdep_map console_lock_dep_map = {
0104     .name = "console_lock"
0105 };
0106 #endif
0107 
0108 enum devkmsg_log_bits {
0109     __DEVKMSG_LOG_BIT_ON = 0,
0110     __DEVKMSG_LOG_BIT_OFF,
0111     __DEVKMSG_LOG_BIT_LOCK,
0112 };
0113 
0114 enum devkmsg_log_masks {
0115     DEVKMSG_LOG_MASK_ON             = BIT(__DEVKMSG_LOG_BIT_ON),
0116     DEVKMSG_LOG_MASK_OFF            = BIT(__DEVKMSG_LOG_BIT_OFF),
0117     DEVKMSG_LOG_MASK_LOCK           = BIT(__DEVKMSG_LOG_BIT_LOCK),
0118 };
0119 
0120 /* Keep both the 'on' and 'off' bits clear, i.e. ratelimit by default: */
0121 #define DEVKMSG_LOG_MASK_DEFAULT    0
0122 
0123 static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT;
0124 
0125 static int __control_devkmsg(char *str)
0126 {
0127     size_t len;
0128 
0129     if (!str)
0130         return -EINVAL;
0131 
0132     len = str_has_prefix(str, "on");
0133     if (len) {
0134         devkmsg_log = DEVKMSG_LOG_MASK_ON;
0135         return len;
0136     }
0137 
0138     len = str_has_prefix(str, "off");
0139     if (len) {
0140         devkmsg_log = DEVKMSG_LOG_MASK_OFF;
0141         return len;
0142     }
0143 
0144     len = str_has_prefix(str, "ratelimit");
0145     if (len) {
0146         devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT;
0147         return len;
0148     }
0149 
0150     return -EINVAL;
0151 }
0152 
0153 static int __init control_devkmsg(char *str)
0154 {
0155     if (__control_devkmsg(str) < 0) {
0156         pr_warn("printk.devkmsg: bad option string '%s'\n", str);
0157         return 1;
0158     }
0159 
0160     /*
0161      * Set sysctl string accordingly:
0162      */
0163     if (devkmsg_log == DEVKMSG_LOG_MASK_ON)
0164         strcpy(devkmsg_log_str, "on");
0165     else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF)
0166         strcpy(devkmsg_log_str, "off");
0167     /* else "ratelimit" which is set by default. */
0168 
0169     /*
0170      * Sysctl cannot change it anymore. The kernel command line setting of
0171      * this parameter is to force the setting to be permanent throughout the
0172      * runtime of the system. This is a precation measure against userspace
0173      * trying to be a smarta** and attempting to change it up on us.
0174      */
0175     devkmsg_log |= DEVKMSG_LOG_MASK_LOCK;
0176 
0177     return 1;
0178 }
0179 __setup("printk.devkmsg=", control_devkmsg);
0180 
0181 char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit";
0182 #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL)
0183 int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
0184                   void *buffer, size_t *lenp, loff_t *ppos)
0185 {
0186     char old_str[DEVKMSG_STR_MAX_SIZE];
0187     unsigned int old;
0188     int err;
0189 
0190     if (write) {
0191         if (devkmsg_log & DEVKMSG_LOG_MASK_LOCK)
0192             return -EINVAL;
0193 
0194         old = devkmsg_log;
0195         strncpy(old_str, devkmsg_log_str, DEVKMSG_STR_MAX_SIZE);
0196     }
0197 
0198     err = proc_dostring(table, write, buffer, lenp, ppos);
0199     if (err)
0200         return err;
0201 
0202     if (write) {
0203         err = __control_devkmsg(devkmsg_log_str);
0204 
0205         /*
0206          * Do not accept an unknown string OR a known string with
0207          * trailing crap...
0208          */
0209         if (err < 0 || (err + 1 != *lenp)) {
0210 
0211             /* ... and restore old setting. */
0212             devkmsg_log = old;
0213             strncpy(devkmsg_log_str, old_str, DEVKMSG_STR_MAX_SIZE);
0214 
0215             return -EINVAL;
0216         }
0217     }
0218 
0219     return 0;
0220 }
0221 #endif /* CONFIG_PRINTK && CONFIG_SYSCTL */
0222 
0223 /* Number of registered extended console drivers. */
0224 static int nr_ext_console_drivers;
0225 
0226 /*
0227  * Helper macros to handle lockdep when locking/unlocking console_sem. We use
0228  * macros instead of functions so that _RET_IP_ contains useful information.
0229  */
0230 #define down_console_sem() do { \
0231     down(&console_sem);\
0232     mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);\
0233 } while (0)
0234 
0235 static int __down_trylock_console_sem(unsigned long ip)
0236 {
0237     int lock_failed;
0238     unsigned long flags;
0239 
0240     /*
0241      * Here and in __up_console_sem() we need to be in safe mode,
0242      * because spindump/WARN/etc from under console ->lock will
0243      * deadlock in printk()->down_trylock_console_sem() otherwise.
0244      */
0245     printk_safe_enter_irqsave(flags);
0246     lock_failed = down_trylock(&console_sem);
0247     printk_safe_exit_irqrestore(flags);
0248 
0249     if (lock_failed)
0250         return 1;
0251     mutex_acquire(&console_lock_dep_map, 0, 1, ip);
0252     return 0;
0253 }
0254 #define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_)
0255 
0256 static void __up_console_sem(unsigned long ip)
0257 {
0258     unsigned long flags;
0259 
0260     mutex_release(&console_lock_dep_map, ip);
0261 
0262     printk_safe_enter_irqsave(flags);
0263     up(&console_sem);
0264     printk_safe_exit_irqrestore(flags);
0265 }
0266 #define up_console_sem() __up_console_sem(_RET_IP_)
0267 
0268 static bool panic_in_progress(void)
0269 {
0270     return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID);
0271 }
0272 
0273 /*
0274  * This is used for debugging the mess that is the VT code by
0275  * keeping track if we have the console semaphore held. It's
0276  * definitely not the perfect debug tool (we don't know if _WE_
0277  * hold it and are racing, but it helps tracking those weird code
0278  * paths in the console code where we end up in places I want
0279  * locked without the console semaphore held).
0280  */
0281 static int console_locked, console_suspended;
0282 
0283 /*
0284  *  Array of consoles built from command line options (console=)
0285  */
0286 
0287 #define MAX_CMDLINECONSOLES 8
0288 
0289 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
0290 
0291 static int preferred_console = -1;
0292 int console_set_on_cmdline;
0293 EXPORT_SYMBOL(console_set_on_cmdline);
0294 
0295 /* Flag: console code may call schedule() */
0296 static int console_may_schedule;
0297 
0298 enum con_msg_format_flags {
0299     MSG_FORMAT_DEFAULT  = 0,
0300     MSG_FORMAT_SYSLOG   = (1 << 0),
0301 };
0302 
0303 static int console_msg_format = MSG_FORMAT_DEFAULT;
0304 
0305 /*
0306  * The printk log buffer consists of a sequenced collection of records, each
0307  * containing variable length message text. Every record also contains its
0308  * own meta-data (@info).
0309  *
0310  * Every record meta-data carries the timestamp in microseconds, as well as
0311  * the standard userspace syslog level and syslog facility. The usual kernel
0312  * messages use LOG_KERN; userspace-injected messages always carry a matching
0313  * syslog facility, by default LOG_USER. The origin of every message can be
0314  * reliably determined that way.
0315  *
0316  * The human readable log message of a record is available in @text, the
0317  * length of the message text in @text_len. The stored message is not
0318  * terminated.
0319  *
0320  * Optionally, a record can carry a dictionary of properties (key/value
0321  * pairs), to provide userspace with a machine-readable message context.
0322  *
0323  * Examples for well-defined, commonly used property names are:
0324  *   DEVICE=b12:8               device identifier
0325  *                                b12:8         block dev_t
0326  *                                c127:3        char dev_t
0327  *                                n8            netdev ifindex
0328  *                                +sound:card0  subsystem:devname
0329  *   SUBSYSTEM=pci              driver-core subsystem name
0330  *
0331  * Valid characters in property names are [a-zA-Z0-9.-_]. Property names
0332  * and values are terminated by a '\0' character.
0333  *
0334  * Example of record values:
0335  *   record.text_buf                = "it's a line" (unterminated)
0336  *   record.info.seq                = 56
0337  *   record.info.ts_nsec            = 36863
0338  *   record.info.text_len           = 11
0339  *   record.info.facility           = 0 (LOG_KERN)
0340  *   record.info.flags              = 0
0341  *   record.info.level              = 3 (LOG_ERR)
0342  *   record.info.caller_id          = 299 (task 299)
0343  *   record.info.dev_info.subsystem = "pci" (terminated)
0344  *   record.info.dev_info.device    = "+pci:0000:00:01.0" (terminated)
0345  *
0346  * The 'struct printk_info' buffer must never be directly exported to
0347  * userspace, it is a kernel-private implementation detail that might
0348  * need to be changed in the future, when the requirements change.
0349  *
0350  * /dev/kmsg exports the structured data in the following line format:
0351  *   "<level>,<sequnum>,<timestamp>,<contflag>[,additional_values, ... ];<message text>\n"
0352  *
0353  * Users of the export format should ignore possible additional values
0354  * separated by ',', and find the message after the ';' character.
0355  *
0356  * The optional key/value pairs are attached as continuation lines starting
0357  * with a space character and terminated by a newline. All possible
0358  * non-prinatable characters are escaped in the "\xff" notation.
0359  */
0360 
0361 /* syslog_lock protects syslog_* variables and write access to clear_seq. */
0362 static DEFINE_MUTEX(syslog_lock);
0363 
0364 #ifdef CONFIG_PRINTK
0365 DECLARE_WAIT_QUEUE_HEAD(log_wait);
0366 /* All 3 protected by @syslog_lock. */
0367 /* the next printk record to read by syslog(READ) or /proc/kmsg */
0368 static u64 syslog_seq;
0369 static size_t syslog_partial;
0370 static bool syslog_time;
0371 
0372 struct latched_seq {
0373     seqcount_latch_t    latch;
0374     u64         val[2];
0375 };
0376 
0377 /*
0378  * The next printk record to read after the last 'clear' command. There are
0379  * two copies (updated with seqcount_latch) so that reads can locklessly
0380  * access a valid value. Writers are synchronized by @syslog_lock.
0381  */
0382 static struct latched_seq clear_seq = {
0383     .latch      = SEQCNT_LATCH_ZERO(clear_seq.latch),
0384     .val[0]     = 0,
0385     .val[1]     = 0,
0386 };
0387 
0388 #ifdef CONFIG_PRINTK_CALLER
0389 #define PREFIX_MAX      48
0390 #else
0391 #define PREFIX_MAX      32
0392 #endif
0393 
0394 /* the maximum size of a formatted record (i.e. with prefix added per line) */
0395 #define CONSOLE_LOG_MAX     1024
0396 
0397 /* the maximum size for a dropped text message */
0398 #define DROPPED_TEXT_MAX    64
0399 
0400 /* the maximum size allowed to be reserved for a record */
0401 #define LOG_LINE_MAX        (CONSOLE_LOG_MAX - PREFIX_MAX)
0402 
0403 #define LOG_LEVEL(v)        ((v) & 0x07)
0404 #define LOG_FACILITY(v)     ((v) >> 3 & 0xff)
0405 
0406 /* record buffer */
0407 #define LOG_ALIGN __alignof__(unsigned long)
0408 #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
0409 #define LOG_BUF_LEN_MAX (u32)(1 << 31)
0410 static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
0411 static char *log_buf = __log_buf;
0412 static u32 log_buf_len = __LOG_BUF_LEN;
0413 
0414 /*
0415  * Define the average message size. This only affects the number of
0416  * descriptors that will be available. Underestimating is better than
0417  * overestimating (too many available descriptors is better than not enough).
0418  */
0419 #define PRB_AVGBITS 5   /* 32 character average length */
0420 
0421 #if CONFIG_LOG_BUF_SHIFT <= PRB_AVGBITS
0422 #error CONFIG_LOG_BUF_SHIFT value too small.
0423 #endif
0424 _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS,
0425          PRB_AVGBITS, &__log_buf[0]);
0426 
0427 static struct printk_ringbuffer printk_rb_dynamic;
0428 
0429 static struct printk_ringbuffer *prb = &printk_rb_static;
0430 
0431 /*
0432  * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before
0433  * per_cpu_areas are initialised. This variable is set to true when
0434  * it's safe to access per-CPU data.
0435  */
0436 static bool __printk_percpu_data_ready __read_mostly;
0437 
0438 bool printk_percpu_data_ready(void)
0439 {
0440     return __printk_percpu_data_ready;
0441 }
0442 
0443 /* Must be called under syslog_lock. */
0444 static void latched_seq_write(struct latched_seq *ls, u64 val)
0445 {
0446     raw_write_seqcount_latch(&ls->latch);
0447     ls->val[0] = val;
0448     raw_write_seqcount_latch(&ls->latch);
0449     ls->val[1] = val;
0450 }
0451 
0452 /* Can be called from any context. */
0453 static u64 latched_seq_read_nolock(struct latched_seq *ls)
0454 {
0455     unsigned int seq;
0456     unsigned int idx;
0457     u64 val;
0458 
0459     do {
0460         seq = raw_read_seqcount_latch(&ls->latch);
0461         idx = seq & 0x1;
0462         val = ls->val[idx];
0463     } while (read_seqcount_latch_retry(&ls->latch, seq));
0464 
0465     return val;
0466 }
0467 
0468 /* Return log buffer address */
0469 char *log_buf_addr_get(void)
0470 {
0471     return log_buf;
0472 }
0473 
0474 /* Return log buffer size */
0475 u32 log_buf_len_get(void)
0476 {
0477     return log_buf_len;
0478 }
0479 
0480 /*
0481  * Define how much of the log buffer we could take at maximum. The value
0482  * must be greater than two. Note that only half of the buffer is available
0483  * when the index points to the middle.
0484  */
0485 #define MAX_LOG_TAKE_PART 4
0486 static const char trunc_msg[] = "<truncated>";
0487 
0488 static void truncate_msg(u16 *text_len, u16 *trunc_msg_len)
0489 {
0490     /*
0491      * The message should not take the whole buffer. Otherwise, it might
0492      * get removed too soon.
0493      */
0494     u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART;
0495 
0496     if (*text_len > max_text_len)
0497         *text_len = max_text_len;
0498 
0499     /* enable the warning message (if there is room) */
0500     *trunc_msg_len = strlen(trunc_msg);
0501     if (*text_len >= *trunc_msg_len)
0502         *text_len -= *trunc_msg_len;
0503     else
0504         *trunc_msg_len = 0;
0505 }
0506 
0507 int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT);
0508 
0509 static int syslog_action_restricted(int type)
0510 {
0511     if (dmesg_restrict)
0512         return 1;
0513     /*
0514      * Unless restricted, we allow "read all" and "get buffer size"
0515      * for everybody.
0516      */
0517     return type != SYSLOG_ACTION_READ_ALL &&
0518            type != SYSLOG_ACTION_SIZE_BUFFER;
0519 }
0520 
0521 static int check_syslog_permissions(int type, int source)
0522 {
0523     /*
0524      * If this is from /proc/kmsg and we've already opened it, then we've
0525      * already done the capabilities checks at open time.
0526      */
0527     if (source == SYSLOG_FROM_PROC && type != SYSLOG_ACTION_OPEN)
0528         goto ok;
0529 
0530     if (syslog_action_restricted(type)) {
0531         if (capable(CAP_SYSLOG))
0532             goto ok;
0533         /*
0534          * For historical reasons, accept CAP_SYS_ADMIN too, with
0535          * a warning.
0536          */
0537         if (capable(CAP_SYS_ADMIN)) {
0538             pr_warn_once("%s (%d): Attempt to access syslog with "
0539                      "CAP_SYS_ADMIN but no CAP_SYSLOG "
0540                      "(deprecated).\n",
0541                  current->comm, task_pid_nr(current));
0542             goto ok;
0543         }
0544         return -EPERM;
0545     }
0546 ok:
0547     return security_syslog(type);
0548 }
0549 
0550 static void append_char(char **pp, char *e, char c)
0551 {
0552     if (*pp < e)
0553         *(*pp)++ = c;
0554 }
0555 
0556 static ssize_t info_print_ext_header(char *buf, size_t size,
0557                      struct printk_info *info)
0558 {
0559     u64 ts_usec = info->ts_nsec;
0560     char caller[20];
0561 #ifdef CONFIG_PRINTK_CALLER
0562     u32 id = info->caller_id;
0563 
0564     snprintf(caller, sizeof(caller), ",caller=%c%u",
0565          id & 0x80000000 ? 'C' : 'T', id & ~0x80000000);
0566 #else
0567     caller[0] = '\0';
0568 #endif
0569 
0570     do_div(ts_usec, 1000);
0571 
0572     return scnprintf(buf, size, "%u,%llu,%llu,%c%s;",
0573              (info->facility << 3) | info->level, info->seq,
0574              ts_usec, info->flags & LOG_CONT ? 'c' : '-', caller);
0575 }
0576 
0577 static ssize_t msg_add_ext_text(char *buf, size_t size,
0578                 const char *text, size_t text_len,
0579                 unsigned char endc)
0580 {
0581     char *p = buf, *e = buf + size;
0582     size_t i;
0583 
0584     /* escape non-printable characters */
0585     for (i = 0; i < text_len; i++) {
0586         unsigned char c = text[i];
0587 
0588         if (c < ' ' || c >= 127 || c == '\\')
0589             p += scnprintf(p, e - p, "\\x%02x", c);
0590         else
0591             append_char(&p, e, c);
0592     }
0593     append_char(&p, e, endc);
0594 
0595     return p - buf;
0596 }
0597 
0598 static ssize_t msg_add_dict_text(char *buf, size_t size,
0599                  const char *key, const char *val)
0600 {
0601     size_t val_len = strlen(val);
0602     ssize_t len;
0603 
0604     if (!val_len)
0605         return 0;
0606 
0607     len = msg_add_ext_text(buf, size, "", 0, ' ');  /* dict prefix */
0608     len += msg_add_ext_text(buf + len, size - len, key, strlen(key), '=');
0609     len += msg_add_ext_text(buf + len, size - len, val, val_len, '\n');
0610 
0611     return len;
0612 }
0613 
0614 static ssize_t msg_print_ext_body(char *buf, size_t size,
0615                   char *text, size_t text_len,
0616                   struct dev_printk_info *dev_info)
0617 {
0618     ssize_t len;
0619 
0620     len = msg_add_ext_text(buf, size, text, text_len, '\n');
0621 
0622     if (!dev_info)
0623         goto out;
0624 
0625     len += msg_add_dict_text(buf + len, size - len, "SUBSYSTEM",
0626                  dev_info->subsystem);
0627     len += msg_add_dict_text(buf + len, size - len, "DEVICE",
0628                  dev_info->device);
0629 out:
0630     return len;
0631 }
0632 
0633 /* /dev/kmsg - userspace message inject/listen interface */
0634 struct devkmsg_user {
0635     atomic64_t seq;
0636     struct ratelimit_state rs;
0637     struct mutex lock;
0638     char buf[CONSOLE_EXT_LOG_MAX];
0639 
0640     struct printk_info info;
0641     char text_buf[CONSOLE_EXT_LOG_MAX];
0642     struct printk_record record;
0643 };
0644 
0645 static __printf(3, 4) __cold
0646 int devkmsg_emit(int facility, int level, const char *fmt, ...)
0647 {
0648     va_list args;
0649     int r;
0650 
0651     va_start(args, fmt);
0652     r = vprintk_emit(facility, level, NULL, fmt, args);
0653     va_end(args);
0654 
0655     return r;
0656 }
0657 
0658 static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
0659 {
0660     char *buf, *line;
0661     int level = default_message_loglevel;
0662     int facility = 1;   /* LOG_USER */
0663     struct file *file = iocb->ki_filp;
0664     struct devkmsg_user *user = file->private_data;
0665     size_t len = iov_iter_count(from);
0666     ssize_t ret = len;
0667 
0668     if (!user || len > LOG_LINE_MAX)
0669         return -EINVAL;
0670 
0671     /* Ignore when user logging is disabled. */
0672     if (devkmsg_log & DEVKMSG_LOG_MASK_OFF)
0673         return len;
0674 
0675     /* Ratelimit when not explicitly enabled. */
0676     if (!(devkmsg_log & DEVKMSG_LOG_MASK_ON)) {
0677         if (!___ratelimit(&user->rs, current->comm))
0678             return ret;
0679     }
0680 
0681     buf = kmalloc(len+1, GFP_KERNEL);
0682     if (buf == NULL)
0683         return -ENOMEM;
0684 
0685     buf[len] = '\0';
0686     if (!copy_from_iter_full(buf, len, from)) {
0687         kfree(buf);
0688         return -EFAULT;
0689     }
0690 
0691     /*
0692      * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace
0693      * the decimal value represents 32bit, the lower 3 bit are the log
0694      * level, the rest are the log facility.
0695      *
0696      * If no prefix or no userspace facility is specified, we
0697      * enforce LOG_USER, to be able to reliably distinguish
0698      * kernel-generated messages from userspace-injected ones.
0699      */
0700     line = buf;
0701     if (line[0] == '<') {
0702         char *endp = NULL;
0703         unsigned int u;
0704 
0705         u = simple_strtoul(line + 1, &endp, 10);
0706         if (endp && endp[0] == '>') {
0707             level = LOG_LEVEL(u);
0708             if (LOG_FACILITY(u) != 0)
0709                 facility = LOG_FACILITY(u);
0710             endp++;
0711             line = endp;
0712         }
0713     }
0714 
0715     devkmsg_emit(facility, level, "%s", line);
0716     kfree(buf);
0717     return ret;
0718 }
0719 
0720 static ssize_t devkmsg_read(struct file *file, char __user *buf,
0721                 size_t count, loff_t *ppos)
0722 {
0723     struct devkmsg_user *user = file->private_data;
0724     struct printk_record *r = &user->record;
0725     size_t len;
0726     ssize_t ret;
0727 
0728     if (!user)
0729         return -EBADF;
0730 
0731     ret = mutex_lock_interruptible(&user->lock);
0732     if (ret)
0733         return ret;
0734 
0735     if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) {
0736         if (file->f_flags & O_NONBLOCK) {
0737             ret = -EAGAIN;
0738             goto out;
0739         }
0740 
0741         /*
0742          * Guarantee this task is visible on the waitqueue before
0743          * checking the wake condition.
0744          *
0745          * The full memory barrier within set_current_state() of
0746          * prepare_to_wait_event() pairs with the full memory barrier
0747          * within wq_has_sleeper().
0748          *
0749          * This pairs with __wake_up_klogd:A.
0750          */
0751         ret = wait_event_interruptible(log_wait,
0752                 prb_read_valid(prb,
0753                     atomic64_read(&user->seq), r)); /* LMM(devkmsg_read:A) */
0754         if (ret)
0755             goto out;
0756     }
0757 
0758     if (r->info->seq != atomic64_read(&user->seq)) {
0759         /* our last seen message is gone, return error and reset */
0760         atomic64_set(&user->seq, r->info->seq);
0761         ret = -EPIPE;
0762         goto out;
0763     }
0764 
0765     len = info_print_ext_header(user->buf, sizeof(user->buf), r->info);
0766     len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len,
0767                   &r->text_buf[0], r->info->text_len,
0768                   &r->info->dev_info);
0769 
0770     atomic64_set(&user->seq, r->info->seq + 1);
0771 
0772     if (len > count) {
0773         ret = -EINVAL;
0774         goto out;
0775     }
0776 
0777     if (copy_to_user(buf, user->buf, len)) {
0778         ret = -EFAULT;
0779         goto out;
0780     }
0781     ret = len;
0782 out:
0783     mutex_unlock(&user->lock);
0784     return ret;
0785 }
0786 
0787 /*
0788  * Be careful when modifying this function!!!
0789  *
0790  * Only few operations are supported because the device works only with the
0791  * entire variable length messages (records). Non-standard values are
0792  * returned in the other cases and has been this way for quite some time.
0793  * User space applications might depend on this behavior.
0794  */
0795 static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
0796 {
0797     struct devkmsg_user *user = file->private_data;
0798     loff_t ret = 0;
0799 
0800     if (!user)
0801         return -EBADF;
0802     if (offset)
0803         return -ESPIPE;
0804 
0805     switch (whence) {
0806     case SEEK_SET:
0807         /* the first record */
0808         atomic64_set(&user->seq, prb_first_valid_seq(prb));
0809         break;
0810     case SEEK_DATA:
0811         /*
0812          * The first record after the last SYSLOG_ACTION_CLEAR,
0813          * like issued by 'dmesg -c'. Reading /dev/kmsg itself
0814          * changes no global state, and does not clear anything.
0815          */
0816         atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq));
0817         break;
0818     case SEEK_END:
0819         /* after the last record */
0820         atomic64_set(&user->seq, prb_next_seq(prb));
0821         break;
0822     default:
0823         ret = -EINVAL;
0824     }
0825     return ret;
0826 }
0827 
0828 static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
0829 {
0830     struct devkmsg_user *user = file->private_data;
0831     struct printk_info info;
0832     __poll_t ret = 0;
0833 
0834     if (!user)
0835         return EPOLLERR|EPOLLNVAL;
0836 
0837     poll_wait(file, &log_wait, wait);
0838 
0839     if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) {
0840         /* return error when data has vanished underneath us */
0841         if (info.seq != atomic64_read(&user->seq))
0842             ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
0843         else
0844             ret = EPOLLIN|EPOLLRDNORM;
0845     }
0846 
0847     return ret;
0848 }
0849 
0850 static int devkmsg_open(struct inode *inode, struct file *file)
0851 {
0852     struct devkmsg_user *user;
0853     int err;
0854 
0855     if (devkmsg_log & DEVKMSG_LOG_MASK_OFF)
0856         return -EPERM;
0857 
0858     /* write-only does not need any file context */
0859     if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
0860         err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL,
0861                            SYSLOG_FROM_READER);
0862         if (err)
0863             return err;
0864     }
0865 
0866     user = kvmalloc(sizeof(struct devkmsg_user), GFP_KERNEL);
0867     if (!user)
0868         return -ENOMEM;
0869 
0870     ratelimit_default_init(&user->rs);
0871     ratelimit_set_flags(&user->rs, RATELIMIT_MSG_ON_RELEASE);
0872 
0873     mutex_init(&user->lock);
0874 
0875     prb_rec_init_rd(&user->record, &user->info,
0876             &user->text_buf[0], sizeof(user->text_buf));
0877 
0878     atomic64_set(&user->seq, prb_first_valid_seq(prb));
0879 
0880     file->private_data = user;
0881     return 0;
0882 }
0883 
0884 static int devkmsg_release(struct inode *inode, struct file *file)
0885 {
0886     struct devkmsg_user *user = file->private_data;
0887 
0888     if (!user)
0889         return 0;
0890 
0891     ratelimit_state_exit(&user->rs);
0892 
0893     mutex_destroy(&user->lock);
0894     kvfree(user);
0895     return 0;
0896 }
0897 
0898 const struct file_operations kmsg_fops = {
0899     .open = devkmsg_open,
0900     .read = devkmsg_read,
0901     .write_iter = devkmsg_write,
0902     .llseek = devkmsg_llseek,
0903     .poll = devkmsg_poll,
0904     .release = devkmsg_release,
0905 };
0906 
0907 #ifdef CONFIG_CRASH_CORE
0908 /*
0909  * This appends the listed symbols to /proc/vmcore
0910  *
0911  * /proc/vmcore is used by various utilities, like crash and makedumpfile to
0912  * obtain access to symbols that are otherwise very difficult to locate.  These
0913  * symbols are specifically used so that utilities can access and extract the
0914  * dmesg log from a vmcore file after a crash.
0915  */
0916 void log_buf_vmcoreinfo_setup(void)
0917 {
0918     struct dev_printk_info *dev_info = NULL;
0919 
0920     VMCOREINFO_SYMBOL(prb);
0921     VMCOREINFO_SYMBOL(printk_rb_static);
0922     VMCOREINFO_SYMBOL(clear_seq);
0923 
0924     /*
0925      * Export struct size and field offsets. User space tools can
0926      * parse it and detect any changes to structure down the line.
0927      */
0928 
0929     VMCOREINFO_STRUCT_SIZE(printk_ringbuffer);
0930     VMCOREINFO_OFFSET(printk_ringbuffer, desc_ring);
0931     VMCOREINFO_OFFSET(printk_ringbuffer, text_data_ring);
0932     VMCOREINFO_OFFSET(printk_ringbuffer, fail);
0933 
0934     VMCOREINFO_STRUCT_SIZE(prb_desc_ring);
0935     VMCOREINFO_OFFSET(prb_desc_ring, count_bits);
0936     VMCOREINFO_OFFSET(prb_desc_ring, descs);
0937     VMCOREINFO_OFFSET(prb_desc_ring, infos);
0938     VMCOREINFO_OFFSET(prb_desc_ring, head_id);
0939     VMCOREINFO_OFFSET(prb_desc_ring, tail_id);
0940 
0941     VMCOREINFO_STRUCT_SIZE(prb_desc);
0942     VMCOREINFO_OFFSET(prb_desc, state_var);
0943     VMCOREINFO_OFFSET(prb_desc, text_blk_lpos);
0944 
0945     VMCOREINFO_STRUCT_SIZE(prb_data_blk_lpos);
0946     VMCOREINFO_OFFSET(prb_data_blk_lpos, begin);
0947     VMCOREINFO_OFFSET(prb_data_blk_lpos, next);
0948 
0949     VMCOREINFO_STRUCT_SIZE(printk_info);
0950     VMCOREINFO_OFFSET(printk_info, seq);
0951     VMCOREINFO_OFFSET(printk_info, ts_nsec);
0952     VMCOREINFO_OFFSET(printk_info, text_len);
0953     VMCOREINFO_OFFSET(printk_info, caller_id);
0954     VMCOREINFO_OFFSET(printk_info, dev_info);
0955 
0956     VMCOREINFO_STRUCT_SIZE(dev_printk_info);
0957     VMCOREINFO_OFFSET(dev_printk_info, subsystem);
0958     VMCOREINFO_LENGTH(printk_info_subsystem, sizeof(dev_info->subsystem));
0959     VMCOREINFO_OFFSET(dev_printk_info, device);
0960     VMCOREINFO_LENGTH(printk_info_device, sizeof(dev_info->device));
0961 
0962     VMCOREINFO_STRUCT_SIZE(prb_data_ring);
0963     VMCOREINFO_OFFSET(prb_data_ring, size_bits);
0964     VMCOREINFO_OFFSET(prb_data_ring, data);
0965     VMCOREINFO_OFFSET(prb_data_ring, head_lpos);
0966     VMCOREINFO_OFFSET(prb_data_ring, tail_lpos);
0967 
0968     VMCOREINFO_SIZE(atomic_long_t);
0969     VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter);
0970 
0971     VMCOREINFO_STRUCT_SIZE(latched_seq);
0972     VMCOREINFO_OFFSET(latched_seq, val);
0973 }
0974 #endif
0975 
0976 /* requested log_buf_len from kernel cmdline */
0977 static unsigned long __initdata new_log_buf_len;
0978 
0979 /* we practice scaling the ring buffer by powers of 2 */
0980 static void __init log_buf_len_update(u64 size)
0981 {
0982     if (size > (u64)LOG_BUF_LEN_MAX) {
0983         size = (u64)LOG_BUF_LEN_MAX;
0984         pr_err("log_buf over 2G is not supported.\n");
0985     }
0986 
0987     if (size)
0988         size = roundup_pow_of_two(size);
0989     if (size > log_buf_len)
0990         new_log_buf_len = (unsigned long)size;
0991 }
0992 
0993 /* save requested log_buf_len since it's too early to process it */
0994 static int __init log_buf_len_setup(char *str)
0995 {
0996     u64 size;
0997 
0998     if (!str)
0999         return -EINVAL;
1000 
1001     size = memparse(str, &str);
1002 
1003     log_buf_len_update(size);
1004 
1005     return 0;
1006 }
1007 early_param("log_buf_len", log_buf_len_setup);
1008 
1009 #ifdef CONFIG_SMP
1010 #define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT)
1011 
1012 static void __init log_buf_add_cpu(void)
1013 {
1014     unsigned int cpu_extra;
1015 
1016     /*
1017      * archs should set up cpu_possible_bits properly with
1018      * set_cpu_possible() after setup_arch() but just in
1019      * case lets ensure this is valid.
1020      */
1021     if (num_possible_cpus() == 1)
1022         return;
1023 
1024     cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN;
1025 
1026     /* by default this will only continue through for large > 64 CPUs */
1027     if (cpu_extra <= __LOG_BUF_LEN / 2)
1028         return;
1029 
1030     pr_info("log_buf_len individual max cpu contribution: %d bytes\n",
1031         __LOG_CPU_MAX_BUF_LEN);
1032     pr_info("log_buf_len total cpu_extra contributions: %d bytes\n",
1033         cpu_extra);
1034     pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN);
1035 
1036     log_buf_len_update(cpu_extra + __LOG_BUF_LEN);
1037 }
1038 #else /* !CONFIG_SMP */
1039 static inline void log_buf_add_cpu(void) {}
1040 #endif /* CONFIG_SMP */
1041 
1042 static void __init set_percpu_data_ready(void)
1043 {
1044     __printk_percpu_data_ready = true;
1045 }
1046 
1047 static unsigned int __init add_to_rb(struct printk_ringbuffer *rb,
1048                      struct printk_record *r)
1049 {
1050     struct prb_reserved_entry e;
1051     struct printk_record dest_r;
1052 
1053     prb_rec_init_wr(&dest_r, r->info->text_len);
1054 
1055     if (!prb_reserve(&e, rb, &dest_r))
1056         return 0;
1057 
1058     memcpy(&dest_r.text_buf[0], &r->text_buf[0], r->info->text_len);
1059     dest_r.info->text_len = r->info->text_len;
1060     dest_r.info->facility = r->info->facility;
1061     dest_r.info->level = r->info->level;
1062     dest_r.info->flags = r->info->flags;
1063     dest_r.info->ts_nsec = r->info->ts_nsec;
1064     dest_r.info->caller_id = r->info->caller_id;
1065     memcpy(&dest_r.info->dev_info, &r->info->dev_info, sizeof(dest_r.info->dev_info));
1066 
1067     prb_final_commit(&e);
1068 
1069     return prb_record_text_space(&e);
1070 }
1071 
1072 static char setup_text_buf[LOG_LINE_MAX] __initdata;
1073 
1074 void __init setup_log_buf(int early)
1075 {
1076     struct printk_info *new_infos;
1077     unsigned int new_descs_count;
1078     struct prb_desc *new_descs;
1079     struct printk_info info;
1080     struct printk_record r;
1081     unsigned int text_size;
1082     size_t new_descs_size;
1083     size_t new_infos_size;
1084     unsigned long flags;
1085     char *new_log_buf;
1086     unsigned int free;
1087     u64 seq;
1088 
1089     /*
1090      * Some archs call setup_log_buf() multiple times - first is very
1091      * early, e.g. from setup_arch(), and second - when percpu_areas
1092      * are initialised.
1093      */
1094     if (!early)
1095         set_percpu_data_ready();
1096 
1097     if (log_buf != __log_buf)
1098         return;
1099 
1100     if (!early && !new_log_buf_len)
1101         log_buf_add_cpu();
1102 
1103     if (!new_log_buf_len)
1104         return;
1105 
1106     new_descs_count = new_log_buf_len >> PRB_AVGBITS;
1107     if (new_descs_count == 0) {
1108         pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len);
1109         return;
1110     }
1111 
1112     new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN);
1113     if (unlikely(!new_log_buf)) {
1114         pr_err("log_buf_len: %lu text bytes not available\n",
1115                new_log_buf_len);
1116         return;
1117     }
1118 
1119     new_descs_size = new_descs_count * sizeof(struct prb_desc);
1120     new_descs = memblock_alloc(new_descs_size, LOG_ALIGN);
1121     if (unlikely(!new_descs)) {
1122         pr_err("log_buf_len: %zu desc bytes not available\n",
1123                new_descs_size);
1124         goto err_free_log_buf;
1125     }
1126 
1127     new_infos_size = new_descs_count * sizeof(struct printk_info);
1128     new_infos = memblock_alloc(new_infos_size, LOG_ALIGN);
1129     if (unlikely(!new_infos)) {
1130         pr_err("log_buf_len: %zu info bytes not available\n",
1131                new_infos_size);
1132         goto err_free_descs;
1133     }
1134 
1135     prb_rec_init_rd(&r, &info, &setup_text_buf[0], sizeof(setup_text_buf));
1136 
1137     prb_init(&printk_rb_dynamic,
1138          new_log_buf, ilog2(new_log_buf_len),
1139          new_descs, ilog2(new_descs_count),
1140          new_infos);
1141 
1142     local_irq_save(flags);
1143 
1144     log_buf_len = new_log_buf_len;
1145     log_buf = new_log_buf;
1146     new_log_buf_len = 0;
1147 
1148     free = __LOG_BUF_LEN;
1149     prb_for_each_record(0, &printk_rb_static, seq, &r) {
1150         text_size = add_to_rb(&printk_rb_dynamic, &r);
1151         if (text_size > free)
1152             free = 0;
1153         else
1154             free -= text_size;
1155     }
1156 
1157     prb = &printk_rb_dynamic;
1158 
1159     local_irq_restore(flags);
1160 
1161     /*
1162      * Copy any remaining messages that might have appeared from
1163      * NMI context after copying but before switching to the
1164      * dynamic buffer.
1165      */
1166     prb_for_each_record(seq, &printk_rb_static, seq, &r) {
1167         text_size = add_to_rb(&printk_rb_dynamic, &r);
1168         if (text_size > free)
1169             free = 0;
1170         else
1171             free -= text_size;
1172     }
1173 
1174     if (seq != prb_next_seq(&printk_rb_static)) {
1175         pr_err("dropped %llu messages\n",
1176                prb_next_seq(&printk_rb_static) - seq);
1177     }
1178 
1179     pr_info("log_buf_len: %u bytes\n", log_buf_len);
1180     pr_info("early log buf free: %u(%u%%)\n",
1181         free, (free * 100) / __LOG_BUF_LEN);
1182     return;
1183 
1184 err_free_descs:
1185     memblock_free(new_descs, new_descs_size);
1186 err_free_log_buf:
1187     memblock_free(new_log_buf, new_log_buf_len);
1188 }
1189 
1190 static bool __read_mostly ignore_loglevel;
1191 
1192 static int __init ignore_loglevel_setup(char *str)
1193 {
1194     ignore_loglevel = true;
1195     pr_info("debug: ignoring loglevel setting.\n");
1196 
1197     return 0;
1198 }
1199 
1200 early_param("ignore_loglevel", ignore_loglevel_setup);
1201 module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR);
1202 MODULE_PARM_DESC(ignore_loglevel,
1203          "ignore loglevel setting (prints all kernel messages to the console)");
1204 
1205 static bool suppress_message_printing(int level)
1206 {
1207     return (level >= console_loglevel && !ignore_loglevel);
1208 }
1209 
1210 #ifdef CONFIG_BOOT_PRINTK_DELAY
1211 
1212 static int boot_delay; /* msecs delay after each printk during bootup */
1213 static unsigned long long loops_per_msec;   /* based on boot_delay */
1214 
1215 static int __init boot_delay_setup(char *str)
1216 {
1217     unsigned long lpj;
1218 
1219     lpj = preset_lpj ? preset_lpj : 1000000;    /* some guess */
1220     loops_per_msec = (unsigned long long)lpj / 1000 * HZ;
1221 
1222     get_option(&str, &boot_delay);
1223     if (boot_delay > 10 * 1000)
1224         boot_delay = 0;
1225 
1226     pr_debug("boot_delay: %u, preset_lpj: %ld, lpj: %lu, "
1227         "HZ: %d, loops_per_msec: %llu\n",
1228         boot_delay, preset_lpj, lpj, HZ, loops_per_msec);
1229     return 0;
1230 }
1231 early_param("boot_delay", boot_delay_setup);
1232 
1233 static void boot_delay_msec(int level)
1234 {
1235     unsigned long long k;
1236     unsigned long timeout;
1237 
1238     if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING)
1239         || suppress_message_printing(level)) {
1240         return;
1241     }
1242 
1243     k = (unsigned long long)loops_per_msec * boot_delay;
1244 
1245     timeout = jiffies + msecs_to_jiffies(boot_delay);
1246     while (k) {
1247         k--;
1248         cpu_relax();
1249         /*
1250          * use (volatile) jiffies to prevent
1251          * compiler reduction; loop termination via jiffies
1252          * is secondary and may or may not happen.
1253          */
1254         if (time_after(jiffies, timeout))
1255             break;
1256         touch_nmi_watchdog();
1257     }
1258 }
1259 #else
1260 static inline void boot_delay_msec(int level)
1261 {
1262 }
1263 #endif
1264 
1265 static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
1266 module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
1267 
1268 static size_t print_syslog(unsigned int level, char *buf)
1269 {
1270     return sprintf(buf, "<%u>", level);
1271 }
1272 
1273 static size_t print_time(u64 ts, char *buf)
1274 {
1275     unsigned long rem_nsec = do_div(ts, 1000000000);
1276 
1277     return sprintf(buf, "[%5lu.%06lu]",
1278                (unsigned long)ts, rem_nsec / 1000);
1279 }
1280 
1281 #ifdef CONFIG_PRINTK_CALLER
1282 static size_t print_caller(u32 id, char *buf)
1283 {
1284     char caller[12];
1285 
1286     snprintf(caller, sizeof(caller), "%c%u",
1287          id & 0x80000000 ? 'C' : 'T', id & ~0x80000000);
1288     return sprintf(buf, "[%6s]", caller);
1289 }
1290 #else
1291 #define print_caller(id, buf) 0
1292 #endif
1293 
1294 static size_t info_print_prefix(const struct printk_info  *info, bool syslog,
1295                 bool time, char *buf)
1296 {
1297     size_t len = 0;
1298 
1299     if (syslog)
1300         len = print_syslog((info->facility << 3) | info->level, buf);
1301 
1302     if (time)
1303         len += print_time(info->ts_nsec, buf + len);
1304 
1305     len += print_caller(info->caller_id, buf + len);
1306 
1307     if (IS_ENABLED(CONFIG_PRINTK_CALLER) || time) {
1308         buf[len++] = ' ';
1309         buf[len] = '\0';
1310     }
1311 
1312     return len;
1313 }
1314 
1315 /*
1316  * Prepare the record for printing. The text is shifted within the given
1317  * buffer to avoid a need for another one. The following operations are
1318  * done:
1319  *
1320  *   - Add prefix for each line.
1321  *   - Drop truncated lines that no longer fit into the buffer.
1322  *   - Add the trailing newline that has been removed in vprintk_store().
1323  *   - Add a string terminator.
1324  *
1325  * Since the produced string is always terminated, the maximum possible
1326  * return value is @r->text_buf_size - 1;
1327  *
1328  * Return: The length of the updated/prepared text, including the added
1329  * prefixes and the newline. The terminator is not counted. The dropped
1330  * line(s) are not counted.
1331  */
1332 static size_t record_print_text(struct printk_record *r, bool syslog,
1333                 bool time)
1334 {
1335     size_t text_len = r->info->text_len;
1336     size_t buf_size = r->text_buf_size;
1337     char *text = r->text_buf;
1338     char prefix[PREFIX_MAX];
1339     bool truncated = false;
1340     size_t prefix_len;
1341     size_t line_len;
1342     size_t len = 0;
1343     char *next;
1344 
1345     /*
1346      * If the message was truncated because the buffer was not large
1347      * enough, treat the available text as if it were the full text.
1348      */
1349     if (text_len > buf_size)
1350         text_len = buf_size;
1351 
1352     prefix_len = info_print_prefix(r->info, syslog, time, prefix);
1353 
1354     /*
1355      * @text_len: bytes of unprocessed text
1356      * @line_len: bytes of current line _without_ newline
1357      * @text:     pointer to beginning of current line
1358      * @len:      number of bytes prepared in r->text_buf
1359      */
1360     for (;;) {
1361         next = memchr(text, '\n', text_len);
1362         if (next) {
1363             line_len = next - text;
1364         } else {
1365             /* Drop truncated line(s). */
1366             if (truncated)
1367                 break;
1368             line_len = text_len;
1369         }
1370 
1371         /*
1372          * Truncate the text if there is not enough space to add the
1373          * prefix and a trailing newline and a terminator.
1374          */
1375         if (len + prefix_len + text_len + 1 + 1 > buf_size) {
1376             /* Drop even the current line if no space. */
1377             if (len + prefix_len + line_len + 1 + 1 > buf_size)
1378                 break;
1379 
1380             text_len = buf_size - len - prefix_len - 1 - 1;
1381             truncated = true;
1382         }
1383 
1384         memmove(text + prefix_len, text, text_len);
1385         memcpy(text, prefix, prefix_len);
1386 
1387         /*
1388          * Increment the prepared length to include the text and
1389          * prefix that were just moved+copied. Also increment for the
1390          * newline at the end of this line. If this is the last line,
1391          * there is no newline, but it will be added immediately below.
1392          */
1393         len += prefix_len + line_len + 1;
1394         if (text_len == line_len) {
1395             /*
1396              * This is the last line. Add the trailing newline
1397              * removed in vprintk_store().
1398              */
1399             text[prefix_len + line_len] = '\n';
1400             break;
1401         }
1402 
1403         /*
1404          * Advance beyond the added prefix and the related line with
1405          * its newline.
1406          */
1407         text += prefix_len + line_len + 1;
1408 
1409         /*
1410          * The remaining text has only decreased by the line with its
1411          * newline.
1412          *
1413          * Note that @text_len can become zero. It happens when @text
1414          * ended with a newline (either due to truncation or the
1415          * original string ending with "\n\n"). The loop is correctly
1416          * repeated and (if not truncated) an empty line with a prefix
1417          * will be prepared.
1418          */
1419         text_len -= line_len + 1;
1420     }
1421 
1422     /*
1423      * If a buffer was provided, it will be terminated. Space for the
1424      * string terminator is guaranteed to be available. The terminator is
1425      * not counted in the return value.
1426      */
1427     if (buf_size > 0)
1428         r->text_buf[len] = 0;
1429 
1430     return len;
1431 }
1432 
1433 static size_t get_record_print_text_size(struct printk_info *info,
1434                      unsigned int line_count,
1435                      bool syslog, bool time)
1436 {
1437     char prefix[PREFIX_MAX];
1438     size_t prefix_len;
1439 
1440     prefix_len = info_print_prefix(info, syslog, time, prefix);
1441 
1442     /*
1443      * Each line will be preceded with a prefix. The intermediate
1444      * newlines are already within the text, but a final trailing
1445      * newline will be added.
1446      */
1447     return ((prefix_len * line_count) + info->text_len + 1);
1448 }
1449 
1450 /*
1451  * Beginning with @start_seq, find the first record where it and all following
1452  * records up to (but not including) @max_seq fit into @size.
1453  *
1454  * @max_seq is simply an upper bound and does not need to exist. If the caller
1455  * does not require an upper bound, -1 can be used for @max_seq.
1456  */
1457 static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size,
1458                   bool syslog, bool time)
1459 {
1460     struct printk_info info;
1461     unsigned int line_count;
1462     size_t len = 0;
1463     u64 seq;
1464 
1465     /* Determine the size of the records up to @max_seq. */
1466     prb_for_each_info(start_seq, prb, seq, &info, &line_count) {
1467         if (info.seq >= max_seq)
1468             break;
1469         len += get_record_print_text_size(&info, line_count, syslog, time);
1470     }
1471 
1472     /*
1473      * Adjust the upper bound for the next loop to avoid subtracting
1474      * lengths that were never added.
1475      */
1476     if (seq < max_seq)
1477         max_seq = seq;
1478 
1479     /*
1480      * Move first record forward until length fits into the buffer. Ignore
1481      * newest messages that were not counted in the above cycle. Messages
1482      * might appear and get lost in the meantime. This is a best effort
1483      * that prevents an infinite loop that could occur with a retry.
1484      */
1485     prb_for_each_info(start_seq, prb, seq, &info, &line_count) {
1486         if (len <= size || info.seq >= max_seq)
1487             break;
1488         len -= get_record_print_text_size(&info, line_count, syslog, time);
1489     }
1490 
1491     return seq;
1492 }
1493 
1494 /* The caller is responsible for making sure @size is greater than 0. */
1495 static int syslog_print(char __user *buf, int size)
1496 {
1497     struct printk_info info;
1498     struct printk_record r;
1499     char *text;
1500     int len = 0;
1501     u64 seq;
1502 
1503     text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
1504     if (!text)
1505         return -ENOMEM;
1506 
1507     prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
1508 
1509     mutex_lock(&syslog_lock);
1510 
1511     /*
1512      * Wait for the @syslog_seq record to be available. @syslog_seq may
1513      * change while waiting.
1514      */
1515     do {
1516         seq = syslog_seq;
1517 
1518         mutex_unlock(&syslog_lock);
1519         /*
1520          * Guarantee this task is visible on the waitqueue before
1521          * checking the wake condition.
1522          *
1523          * The full memory barrier within set_current_state() of
1524          * prepare_to_wait_event() pairs with the full memory barrier
1525          * within wq_has_sleeper().
1526          *
1527          * This pairs with __wake_up_klogd:A.
1528          */
1529         len = wait_event_interruptible(log_wait,
1530                 prb_read_valid(prb, seq, NULL)); /* LMM(syslog_print:A) */
1531         mutex_lock(&syslog_lock);
1532 
1533         if (len)
1534             goto out;
1535     } while (syslog_seq != seq);
1536 
1537     /*
1538      * Copy records that fit into the buffer. The above cycle makes sure
1539      * that the first record is always available.
1540      */
1541     do {
1542         size_t n;
1543         size_t skip;
1544         int err;
1545 
1546         if (!prb_read_valid(prb, syslog_seq, &r))
1547             break;
1548 
1549         if (r.info->seq != syslog_seq) {
1550             /* message is gone, move to next valid one */
1551             syslog_seq = r.info->seq;
1552             syslog_partial = 0;
1553         }
1554 
1555         /*
1556          * To keep reading/counting partial line consistent,
1557          * use printk_time value as of the beginning of a line.
1558          */
1559         if (!syslog_partial)
1560             syslog_time = printk_time;
1561 
1562         skip = syslog_partial;
1563         n = record_print_text(&r, true, syslog_time);
1564         if (n - syslog_partial <= size) {
1565             /* message fits into buffer, move forward */
1566             syslog_seq = r.info->seq + 1;
1567             n -= syslog_partial;
1568             syslog_partial = 0;
1569         } else if (!len){
1570             /* partial read(), remember position */
1571             n = size;
1572             syslog_partial += n;
1573         } else
1574             n = 0;
1575 
1576         if (!n)
1577             break;
1578 
1579         mutex_unlock(&syslog_lock);
1580         err = copy_to_user(buf, text + skip, n);
1581         mutex_lock(&syslog_lock);
1582 
1583         if (err) {
1584             if (!len)
1585                 len = -EFAULT;
1586             break;
1587         }
1588 
1589         len += n;
1590         size -= n;
1591         buf += n;
1592     } while (size);
1593 out:
1594     mutex_unlock(&syslog_lock);
1595     kfree(text);
1596     return len;
1597 }
1598 
1599 static int syslog_print_all(char __user *buf, int size, bool clear)
1600 {
1601     struct printk_info info;
1602     struct printk_record r;
1603     char *text;
1604     int len = 0;
1605     u64 seq;
1606     bool time;
1607 
1608     text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
1609     if (!text)
1610         return -ENOMEM;
1611 
1612     time = printk_time;
1613     /*
1614      * Find first record that fits, including all following records,
1615      * into the user-provided buffer for this dump.
1616      */
1617     seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1,
1618                      size, true, time);
1619 
1620     prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
1621 
1622     len = 0;
1623     prb_for_each_record(seq, prb, seq, &r) {
1624         int textlen;
1625 
1626         textlen = record_print_text(&r, true, time);
1627 
1628         if (len + textlen > size) {
1629             seq--;
1630             break;
1631         }
1632 
1633         if (copy_to_user(buf + len, text, textlen))
1634             len = -EFAULT;
1635         else
1636             len += textlen;
1637 
1638         if (len < 0)
1639             break;
1640     }
1641 
1642     if (clear) {
1643         mutex_lock(&syslog_lock);
1644         latched_seq_write(&clear_seq, seq);
1645         mutex_unlock(&syslog_lock);
1646     }
1647 
1648     kfree(text);
1649     return len;
1650 }
1651 
1652 static void syslog_clear(void)
1653 {
1654     mutex_lock(&syslog_lock);
1655     latched_seq_write(&clear_seq, prb_next_seq(prb));
1656     mutex_unlock(&syslog_lock);
1657 }
1658 
1659 int do_syslog(int type, char __user *buf, int len, int source)
1660 {
1661     struct printk_info info;
1662     bool clear = false;
1663     static int saved_console_loglevel = LOGLEVEL_DEFAULT;
1664     int error;
1665 
1666     error = check_syslog_permissions(type, source);
1667     if (error)
1668         return error;
1669 
1670     switch (type) {
1671     case SYSLOG_ACTION_CLOSE:   /* Close log */
1672         break;
1673     case SYSLOG_ACTION_OPEN:    /* Open log */
1674         break;
1675     case SYSLOG_ACTION_READ:    /* Read from log */
1676         if (!buf || len < 0)
1677             return -EINVAL;
1678         if (!len)
1679             return 0;
1680         if (!access_ok(buf, len))
1681             return -EFAULT;
1682         error = syslog_print(buf, len);
1683         break;
1684     /* Read/clear last kernel messages */
1685     case SYSLOG_ACTION_READ_CLEAR:
1686         clear = true;
1687         fallthrough;
1688     /* Read last kernel messages */
1689     case SYSLOG_ACTION_READ_ALL:
1690         if (!buf || len < 0)
1691             return -EINVAL;
1692         if (!len)
1693             return 0;
1694         if (!access_ok(buf, len))
1695             return -EFAULT;
1696         error = syslog_print_all(buf, len, clear);
1697         break;
1698     /* Clear ring buffer */
1699     case SYSLOG_ACTION_CLEAR:
1700         syslog_clear();
1701         break;
1702     /* Disable logging to console */
1703     case SYSLOG_ACTION_CONSOLE_OFF:
1704         if (saved_console_loglevel == LOGLEVEL_DEFAULT)
1705             saved_console_loglevel = console_loglevel;
1706         console_loglevel = minimum_console_loglevel;
1707         break;
1708     /* Enable logging to console */
1709     case SYSLOG_ACTION_CONSOLE_ON:
1710         if (saved_console_loglevel != LOGLEVEL_DEFAULT) {
1711             console_loglevel = saved_console_loglevel;
1712             saved_console_loglevel = LOGLEVEL_DEFAULT;
1713         }
1714         break;
1715     /* Set level of messages printed to console */
1716     case SYSLOG_ACTION_CONSOLE_LEVEL:
1717         if (len < 1 || len > 8)
1718             return -EINVAL;
1719         if (len < minimum_console_loglevel)
1720             len = minimum_console_loglevel;
1721         console_loglevel = len;
1722         /* Implicitly re-enable logging to console */
1723         saved_console_loglevel = LOGLEVEL_DEFAULT;
1724         break;
1725     /* Number of chars in the log buffer */
1726     case SYSLOG_ACTION_SIZE_UNREAD:
1727         mutex_lock(&syslog_lock);
1728         if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) {
1729             /* No unread messages. */
1730             mutex_unlock(&syslog_lock);
1731             return 0;
1732         }
1733         if (info.seq != syslog_seq) {
1734             /* messages are gone, move to first one */
1735             syslog_seq = info.seq;
1736             syslog_partial = 0;
1737         }
1738         if (source == SYSLOG_FROM_PROC) {
1739             /*
1740              * Short-cut for poll(/"proc/kmsg") which simply checks
1741              * for pending data, not the size; return the count of
1742              * records, not the length.
1743              */
1744             error = prb_next_seq(prb) - syslog_seq;
1745         } else {
1746             bool time = syslog_partial ? syslog_time : printk_time;
1747             unsigned int line_count;
1748             u64 seq;
1749 
1750             prb_for_each_info(syslog_seq, prb, seq, &info,
1751                       &line_count) {
1752                 error += get_record_print_text_size(&info, line_count,
1753                                     true, time);
1754                 time = printk_time;
1755             }
1756             error -= syslog_partial;
1757         }
1758         mutex_unlock(&syslog_lock);
1759         break;
1760     /* Size of the log buffer */
1761     case SYSLOG_ACTION_SIZE_BUFFER:
1762         error = log_buf_len;
1763         break;
1764     default:
1765         error = -EINVAL;
1766         break;
1767     }
1768 
1769     return error;
1770 }
1771 
1772 SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
1773 {
1774     return do_syslog(type, buf, len, SYSLOG_FROM_READER);
1775 }
1776 
1777 /*
1778  * Special console_lock variants that help to reduce the risk of soft-lockups.
1779  * They allow to pass console_lock to another printk() call using a busy wait.
1780  */
1781 
1782 #ifdef CONFIG_LOCKDEP
1783 static struct lockdep_map console_owner_dep_map = {
1784     .name = "console_owner"
1785 };
1786 #endif
1787 
1788 static DEFINE_RAW_SPINLOCK(console_owner_lock);
1789 static struct task_struct *console_owner;
1790 static bool console_waiter;
1791 
1792 /**
1793  * console_lock_spinning_enable - mark beginning of code where another
1794  *  thread might safely busy wait
1795  *
1796  * This basically converts console_lock into a spinlock. This marks
1797  * the section where the console_lock owner can not sleep, because
1798  * there may be a waiter spinning (like a spinlock). Also it must be
1799  * ready to hand over the lock at the end of the section.
1800  */
1801 static void console_lock_spinning_enable(void)
1802 {
1803     raw_spin_lock(&console_owner_lock);
1804     console_owner = current;
1805     raw_spin_unlock(&console_owner_lock);
1806 
1807     /* The waiter may spin on us after setting console_owner */
1808     spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
1809 }
1810 
1811 /**
1812  * console_lock_spinning_disable_and_check - mark end of code where another
1813  *  thread was able to busy wait and check if there is a waiter
1814  *
1815  * This is called at the end of the section where spinning is allowed.
1816  * It has two functions. First, it is a signal that it is no longer
1817  * safe to start busy waiting for the lock. Second, it checks if
1818  * there is a busy waiter and passes the lock rights to her.
1819  *
1820  * Important: Callers lose the lock if there was a busy waiter.
1821  *  They must not touch items synchronized by console_lock
1822  *  in this case.
1823  *
1824  * Return: 1 if the lock rights were passed, 0 otherwise.
1825  */
1826 static int console_lock_spinning_disable_and_check(void)
1827 {
1828     int waiter;
1829 
1830     raw_spin_lock(&console_owner_lock);
1831     waiter = READ_ONCE(console_waiter);
1832     console_owner = NULL;
1833     raw_spin_unlock(&console_owner_lock);
1834 
1835     if (!waiter) {
1836         spin_release(&console_owner_dep_map, _THIS_IP_);
1837         return 0;
1838     }
1839 
1840     /* The waiter is now free to continue */
1841     WRITE_ONCE(console_waiter, false);
1842 
1843     spin_release(&console_owner_dep_map, _THIS_IP_);
1844 
1845     /*
1846      * Hand off console_lock to waiter. The waiter will perform
1847      * the up(). After this, the waiter is the console_lock owner.
1848      */
1849     mutex_release(&console_lock_dep_map, _THIS_IP_);
1850     return 1;
1851 }
1852 
1853 /**
1854  * console_trylock_spinning - try to get console_lock by busy waiting
1855  *
1856  * This allows to busy wait for the console_lock when the current
1857  * owner is running in specially marked sections. It means that
1858  * the current owner is running and cannot reschedule until it
1859  * is ready to lose the lock.
1860  *
1861  * Return: 1 if we got the lock, 0 othrewise
1862  */
1863 static int console_trylock_spinning(void)
1864 {
1865     struct task_struct *owner = NULL;
1866     bool waiter;
1867     bool spin = false;
1868     unsigned long flags;
1869 
1870     if (console_trylock())
1871         return 1;
1872 
1873     /*
1874      * It's unsafe to spin once a panic has begun. If we are the
1875      * panic CPU, we may have already halted the owner of the
1876      * console_sem. If we are not the panic CPU, then we should
1877      * avoid taking console_sem, so the panic CPU has a better
1878      * chance of cleanly acquiring it later.
1879      */
1880     if (panic_in_progress())
1881         return 0;
1882 
1883     printk_safe_enter_irqsave(flags);
1884 
1885     raw_spin_lock(&console_owner_lock);
1886     owner = READ_ONCE(console_owner);
1887     waiter = READ_ONCE(console_waiter);
1888     if (!waiter && owner && owner != current) {
1889         WRITE_ONCE(console_waiter, true);
1890         spin = true;
1891     }
1892     raw_spin_unlock(&console_owner_lock);
1893 
1894     /*
1895      * If there is an active printk() writing to the
1896      * consoles, instead of having it write our data too,
1897      * see if we can offload that load from the active
1898      * printer, and do some printing ourselves.
1899      * Go into a spin only if there isn't already a waiter
1900      * spinning, and there is an active printer, and
1901      * that active printer isn't us (recursive printk?).
1902      */
1903     if (!spin) {
1904         printk_safe_exit_irqrestore(flags);
1905         return 0;
1906     }
1907 
1908     /* We spin waiting for the owner to release us */
1909     spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
1910     /* Owner will clear console_waiter on hand off */
1911     while (READ_ONCE(console_waiter))
1912         cpu_relax();
1913     spin_release(&console_owner_dep_map, _THIS_IP_);
1914 
1915     printk_safe_exit_irqrestore(flags);
1916     /*
1917      * The owner passed the console lock to us.
1918      * Since we did not spin on console lock, annotate
1919      * this as a trylock. Otherwise lockdep will
1920      * complain.
1921      */
1922     mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
1923 
1924     return 1;
1925 }
1926 
1927 /*
1928  * Call the specified console driver, asking it to write out the specified
1929  * text and length. If @dropped_text is non-NULL and any records have been
1930  * dropped, a dropped message will be written out first.
1931  */
1932 static void call_console_driver(struct console *con, const char *text, size_t len,
1933                 char *dropped_text)
1934 {
1935     size_t dropped_len;
1936 
1937     if (con->dropped && dropped_text) {
1938         dropped_len = snprintf(dropped_text, DROPPED_TEXT_MAX,
1939                        "** %lu printk messages dropped **\n",
1940                        con->dropped);
1941         con->dropped = 0;
1942         con->write(con, dropped_text, dropped_len);
1943     }
1944 
1945     con->write(con, text, len);
1946 }
1947 
1948 /*
1949  * Recursion is tracked separately on each CPU. If NMIs are supported, an
1950  * additional NMI context per CPU is also separately tracked. Until per-CPU
1951  * is available, a separate "early tracking" is performed.
1952  */
1953 static DEFINE_PER_CPU(u8, printk_count);
1954 static u8 printk_count_early;
1955 #ifdef CONFIG_HAVE_NMI
1956 static DEFINE_PER_CPU(u8, printk_count_nmi);
1957 static u8 printk_count_nmi_early;
1958 #endif
1959 
1960 /*
1961  * Recursion is limited to keep the output sane. printk() should not require
1962  * more than 1 level of recursion (allowing, for example, printk() to trigger
1963  * a WARN), but a higher value is used in case some printk-internal errors
1964  * exist, such as the ringbuffer validation checks failing.
1965  */
1966 #define PRINTK_MAX_RECURSION 3
1967 
1968 /*
1969  * Return a pointer to the dedicated counter for the CPU+context of the
1970  * caller.
1971  */
1972 static u8 *__printk_recursion_counter(void)
1973 {
1974 #ifdef CONFIG_HAVE_NMI
1975     if (in_nmi()) {
1976         if (printk_percpu_data_ready())
1977             return this_cpu_ptr(&printk_count_nmi);
1978         return &printk_count_nmi_early;
1979     }
1980 #endif
1981     if (printk_percpu_data_ready())
1982         return this_cpu_ptr(&printk_count);
1983     return &printk_count_early;
1984 }
1985 
1986 /*
1987  * Enter recursion tracking. Interrupts are disabled to simplify tracking.
1988  * The caller must check the boolean return value to see if the recursion is
1989  * allowed. On failure, interrupts are not disabled.
1990  *
1991  * @recursion_ptr must be a variable of type (u8 *) and is the same variable
1992  * that is passed to printk_exit_irqrestore().
1993  */
1994 #define printk_enter_irqsave(recursion_ptr, flags)  \
1995 ({                          \
1996     bool success = true;                \
1997                             \
1998     typecheck(u8 *, recursion_ptr);         \
1999     local_irq_save(flags);              \
2000     (recursion_ptr) = __printk_recursion_counter(); \
2001     if (*(recursion_ptr) > PRINTK_MAX_RECURSION) {  \
2002         local_irq_restore(flags);       \
2003         success = false;            \
2004     } else {                    \
2005         (*(recursion_ptr))++;           \
2006     }                       \
2007     success;                    \
2008 })
2009 
2010 /* Exit recursion tracking, restoring interrupts. */
2011 #define printk_exit_irqrestore(recursion_ptr, flags)    \
2012     do {                        \
2013         typecheck(u8 *, recursion_ptr);     \
2014         (*(recursion_ptr))--;           \
2015         local_irq_restore(flags);       \
2016     } while (0)
2017 
2018 int printk_delay_msec __read_mostly;
2019 
2020 static inline void printk_delay(int level)
2021 {
2022     boot_delay_msec(level);
2023 
2024     if (unlikely(printk_delay_msec)) {
2025         int m = printk_delay_msec;
2026 
2027         while (m--) {
2028             mdelay(1);
2029             touch_nmi_watchdog();
2030         }
2031     }
2032 }
2033 
2034 static inline u32 printk_caller_id(void)
2035 {
2036     return in_task() ? task_pid_nr(current) :
2037         0x80000000 + smp_processor_id();
2038 }
2039 
2040 /**
2041  * printk_parse_prefix - Parse level and control flags.
2042  *
2043  * @text:     The terminated text message.
2044  * @level:    A pointer to the current level value, will be updated.
2045  * @flags:    A pointer to the current printk_info flags, will be updated.
2046  *
2047  * @level may be NULL if the caller is not interested in the parsed value.
2048  * Otherwise the variable pointed to by @level must be set to
2049  * LOGLEVEL_DEFAULT in order to be updated with the parsed value.
2050  *
2051  * @flags may be NULL if the caller is not interested in the parsed value.
2052  * Otherwise the variable pointed to by @flags will be OR'd with the parsed
2053  * value.
2054  *
2055  * Return: The length of the parsed level and control flags.
2056  */
2057 u16 printk_parse_prefix(const char *text, int *level,
2058             enum printk_info_flags *flags)
2059 {
2060     u16 prefix_len = 0;
2061     int kern_level;
2062 
2063     while (*text) {
2064         kern_level = printk_get_level(text);
2065         if (!kern_level)
2066             break;
2067 
2068         switch (kern_level) {
2069         case '0' ... '7':
2070             if (level && *level == LOGLEVEL_DEFAULT)
2071                 *level = kern_level - '0';
2072             break;
2073         case 'c':   /* KERN_CONT */
2074             if (flags)
2075                 *flags |= LOG_CONT;
2076         }
2077 
2078         prefix_len += 2;
2079         text += 2;
2080     }
2081 
2082     return prefix_len;
2083 }
2084 
2085 __printf(5, 0)
2086 static u16 printk_sprint(char *text, u16 size, int facility,
2087              enum printk_info_flags *flags, const char *fmt,
2088              va_list args)
2089 {
2090     u16 text_len;
2091 
2092     text_len = vscnprintf(text, size, fmt, args);
2093 
2094     /* Mark and strip a trailing newline. */
2095     if (text_len && text[text_len - 1] == '\n') {
2096         text_len--;
2097         *flags |= LOG_NEWLINE;
2098     }
2099 
2100     /* Strip log level and control flags. */
2101     if (facility == 0) {
2102         u16 prefix_len;
2103 
2104         prefix_len = printk_parse_prefix(text, NULL, NULL);
2105         if (prefix_len) {
2106             text_len -= prefix_len;
2107             memmove(text, text + prefix_len, text_len);
2108         }
2109     }
2110 
2111     trace_console_rcuidle(text, text_len);
2112 
2113     return text_len;
2114 }
2115 
2116 __printf(4, 0)
2117 int vprintk_store(int facility, int level,
2118           const struct dev_printk_info *dev_info,
2119           const char *fmt, va_list args)
2120 {
2121     struct prb_reserved_entry e;
2122     enum printk_info_flags flags = 0;
2123     struct printk_record r;
2124     unsigned long irqflags;
2125     u16 trunc_msg_len = 0;
2126     char prefix_buf[8];
2127     u8 *recursion_ptr;
2128     u16 reserve_size;
2129     va_list args2;
2130     u32 caller_id;
2131     u16 text_len;
2132     int ret = 0;
2133     u64 ts_nsec;
2134 
2135     if (!printk_enter_irqsave(recursion_ptr, irqflags))
2136         return 0;
2137 
2138     /*
2139      * Since the duration of printk() can vary depending on the message
2140      * and state of the ringbuffer, grab the timestamp now so that it is
2141      * close to the call of printk(). This provides a more deterministic
2142      * timestamp with respect to the caller.
2143      */
2144     ts_nsec = local_clock();
2145 
2146     caller_id = printk_caller_id();
2147 
2148     /*
2149      * The sprintf needs to come first since the syslog prefix might be
2150      * passed in as a parameter. An extra byte must be reserved so that
2151      * later the vscnprintf() into the reserved buffer has room for the
2152      * terminating '\0', which is not counted by vsnprintf().
2153      */
2154     va_copy(args2, args);
2155     reserve_size = vsnprintf(&prefix_buf[0], sizeof(prefix_buf), fmt, args2) + 1;
2156     va_end(args2);
2157 
2158     if (reserve_size > LOG_LINE_MAX)
2159         reserve_size = LOG_LINE_MAX;
2160 
2161     /* Extract log level or control flags. */
2162     if (facility == 0)
2163         printk_parse_prefix(&prefix_buf[0], &level, &flags);
2164 
2165     if (level == LOGLEVEL_DEFAULT)
2166         level = default_message_loglevel;
2167 
2168     if (dev_info)
2169         flags |= LOG_NEWLINE;
2170 
2171     if (flags & LOG_CONT) {
2172         prb_rec_init_wr(&r, reserve_size);
2173         if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) {
2174             text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size,
2175                          facility, &flags, fmt, args);
2176             r.info->text_len += text_len;
2177 
2178             if (flags & LOG_NEWLINE) {
2179                 r.info->flags |= LOG_NEWLINE;
2180                 prb_final_commit(&e);
2181             } else {
2182                 prb_commit(&e);
2183             }
2184 
2185             ret = text_len;
2186             goto out;
2187         }
2188     }
2189 
2190     /*
2191      * Explicitly initialize the record before every prb_reserve() call.
2192      * prb_reserve_in_last() and prb_reserve() purposely invalidate the
2193      * structure when they fail.
2194      */
2195     prb_rec_init_wr(&r, reserve_size);
2196     if (!prb_reserve(&e, prb, &r)) {
2197         /* truncate the message if it is too long for empty buffer */
2198         truncate_msg(&reserve_size, &trunc_msg_len);
2199 
2200         prb_rec_init_wr(&r, reserve_size + trunc_msg_len);
2201         if (!prb_reserve(&e, prb, &r))
2202             goto out;
2203     }
2204 
2205     /* fill message */
2206     text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args);
2207     if (trunc_msg_len)
2208         memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len);
2209     r.info->text_len = text_len + trunc_msg_len;
2210     r.info->facility = facility;
2211     r.info->level = level & 7;
2212     r.info->flags = flags & 0x1f;
2213     r.info->ts_nsec = ts_nsec;
2214     r.info->caller_id = caller_id;
2215     if (dev_info)
2216         memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info));
2217 
2218     /* A message without a trailing newline can be continued. */
2219     if (!(flags & LOG_NEWLINE))
2220         prb_commit(&e);
2221     else
2222         prb_final_commit(&e);
2223 
2224     ret = text_len + trunc_msg_len;
2225 out:
2226     printk_exit_irqrestore(recursion_ptr, irqflags);
2227     return ret;
2228 }
2229 
2230 asmlinkage int vprintk_emit(int facility, int level,
2231                 const struct dev_printk_info *dev_info,
2232                 const char *fmt, va_list args)
2233 {
2234     int printed_len;
2235     bool in_sched = false;
2236 
2237     /* Suppress unimportant messages after panic happens */
2238     if (unlikely(suppress_printk))
2239         return 0;
2240 
2241     if (unlikely(suppress_panic_printk) &&
2242         atomic_read(&panic_cpu) != raw_smp_processor_id())
2243         return 0;
2244 
2245     if (level == LOGLEVEL_SCHED) {
2246         level = LOGLEVEL_DEFAULT;
2247         in_sched = true;
2248     }
2249 
2250     printk_delay(level);
2251 
2252     printed_len = vprintk_store(facility, level, dev_info, fmt, args);
2253 
2254     /* If called from the scheduler, we can not call up(). */
2255     if (!in_sched) {
2256         /*
2257          * The caller may be holding system-critical or
2258          * timing-sensitive locks. Disable preemption during
2259          * printing of all remaining records to all consoles so that
2260          * this context can return as soon as possible. Hopefully
2261          * another printk() caller will take over the printing.
2262          */
2263         preempt_disable();
2264         /*
2265          * Try to acquire and then immediately release the console
2266          * semaphore. The release will print out buffers. With the
2267          * spinning variant, this context tries to take over the
2268          * printing from another printing context.
2269          */
2270         if (console_trylock_spinning())
2271             console_unlock();
2272         preempt_enable();
2273     }
2274 
2275     wake_up_klogd();
2276     return printed_len;
2277 }
2278 EXPORT_SYMBOL(vprintk_emit);
2279 
2280 int vprintk_default(const char *fmt, va_list args)
2281 {
2282     return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
2283 }
2284 EXPORT_SYMBOL_GPL(vprintk_default);
2285 
2286 asmlinkage __visible int _printk(const char *fmt, ...)
2287 {
2288     va_list args;
2289     int r;
2290 
2291     va_start(args, fmt);
2292     r = vprintk(fmt, args);
2293     va_end(args);
2294 
2295     return r;
2296 }
2297 EXPORT_SYMBOL(_printk);
2298 
2299 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress);
2300 
2301 #else /* CONFIG_PRINTK */
2302 
2303 #define CONSOLE_LOG_MAX     0
2304 #define DROPPED_TEXT_MAX    0
2305 #define printk_time     false
2306 
2307 #define prb_read_valid(rb, seq, r)  false
2308 #define prb_first_valid_seq(rb)     0
2309 #define prb_next_seq(rb)        0
2310 
2311 static u64 syslog_seq;
2312 
2313 static size_t record_print_text(const struct printk_record *r,
2314                 bool syslog, bool time)
2315 {
2316     return 0;
2317 }
2318 static ssize_t info_print_ext_header(char *buf, size_t size,
2319                      struct printk_info *info)
2320 {
2321     return 0;
2322 }
2323 static ssize_t msg_print_ext_body(char *buf, size_t size,
2324                   char *text, size_t text_len,
2325                   struct dev_printk_info *dev_info) { return 0; }
2326 static void console_lock_spinning_enable(void) { }
2327 static int console_lock_spinning_disable_and_check(void) { return 0; }
2328 static void call_console_driver(struct console *con, const char *text, size_t len,
2329                 char *dropped_text)
2330 {
2331 }
2332 static bool suppress_message_printing(int level) { return false; }
2333 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; }
2334 
2335 #endif /* CONFIG_PRINTK */
2336 
2337 #ifdef CONFIG_EARLY_PRINTK
2338 struct console *early_console;
2339 
2340 asmlinkage __visible void early_printk(const char *fmt, ...)
2341 {
2342     va_list ap;
2343     char buf[512];
2344     int n;
2345 
2346     if (!early_console)
2347         return;
2348 
2349     va_start(ap, fmt);
2350     n = vscnprintf(buf, sizeof(buf), fmt, ap);
2351     va_end(ap);
2352 
2353     early_console->write(early_console, buf, n);
2354 }
2355 #endif
2356 
2357 static void set_user_specified(struct console_cmdline *c, bool user_specified)
2358 {
2359     if (!user_specified)
2360         return;
2361 
2362     /*
2363      * @c console was defined by the user on the command line.
2364      * Do not clear when added twice also by SPCR or the device tree.
2365      */
2366     c->user_specified = true;
2367     /* At least one console defined by the user on the command line. */
2368     console_set_on_cmdline = 1;
2369 }
2370 
2371 static int __add_preferred_console(char *name, int idx, char *options,
2372                    char *brl_options, bool user_specified)
2373 {
2374     struct console_cmdline *c;
2375     int i;
2376 
2377     /*
2378      *  See if this tty is not yet registered, and
2379      *  if we have a slot free.
2380      */
2381     for (i = 0, c = console_cmdline;
2382          i < MAX_CMDLINECONSOLES && c->name[0];
2383          i++, c++) {
2384         if (strcmp(c->name, name) == 0 && c->index == idx) {
2385             if (!brl_options)
2386                 preferred_console = i;
2387             set_user_specified(c, user_specified);
2388             return 0;
2389         }
2390     }
2391     if (i == MAX_CMDLINECONSOLES)
2392         return -E2BIG;
2393     if (!brl_options)
2394         preferred_console = i;
2395     strlcpy(c->name, name, sizeof(c->name));
2396     c->options = options;
2397     set_user_specified(c, user_specified);
2398     braille_set_options(c, brl_options);
2399 
2400     c->index = idx;
2401     return 0;
2402 }
2403 
2404 static int __init console_msg_format_setup(char *str)
2405 {
2406     if (!strcmp(str, "syslog"))
2407         console_msg_format = MSG_FORMAT_SYSLOG;
2408     if (!strcmp(str, "default"))
2409         console_msg_format = MSG_FORMAT_DEFAULT;
2410     return 1;
2411 }
2412 __setup("console_msg_format=", console_msg_format_setup);
2413 
2414 /*
2415  * Set up a console.  Called via do_early_param() in init/main.c
2416  * for each "console=" parameter in the boot command line.
2417  */
2418 static int __init console_setup(char *str)
2419 {
2420     char buf[sizeof(console_cmdline[0].name) + 4]; /* 4 for "ttyS" */
2421     char *s, *options, *brl_options = NULL;
2422     int idx;
2423 
2424     /*
2425      * console="" or console=null have been suggested as a way to
2426      * disable console output. Use ttynull that has been created
2427      * for exactly this purpose.
2428      */
2429     if (str[0] == 0 || strcmp(str, "null") == 0) {
2430         __add_preferred_console("ttynull", 0, NULL, NULL, true);
2431         return 1;
2432     }
2433 
2434     if (_braille_console_setup(&str, &brl_options))
2435         return 1;
2436 
2437     /*
2438      * Decode str into name, index, options.
2439      */
2440     if (str[0] >= '0' && str[0] <= '9') {
2441         strcpy(buf, "ttyS");
2442         strncpy(buf + 4, str, sizeof(buf) - 5);
2443     } else {
2444         strncpy(buf, str, sizeof(buf) - 1);
2445     }
2446     buf[sizeof(buf) - 1] = 0;
2447     options = strchr(str, ',');
2448     if (options)
2449         *(options++) = 0;
2450 #ifdef __sparc__
2451     if (!strcmp(str, "ttya"))
2452         strcpy(buf, "ttyS0");
2453     if (!strcmp(str, "ttyb"))
2454         strcpy(buf, "ttyS1");
2455 #endif
2456     for (s = buf; *s; s++)
2457         if (isdigit(*s) || *s == ',')
2458             break;
2459     idx = simple_strtoul(s, NULL, 10);
2460     *s = 0;
2461 
2462     __add_preferred_console(buf, idx, options, brl_options, true);
2463     return 1;
2464 }
2465 __setup("console=", console_setup);
2466 
2467 /**
2468  * add_preferred_console - add a device to the list of preferred consoles.
2469  * @name: device name
2470  * @idx: device index
2471  * @options: options for this console
2472  *
2473  * The last preferred console added will be used for kernel messages
2474  * and stdin/out/err for init.  Normally this is used by console_setup
2475  * above to handle user-supplied console arguments; however it can also
2476  * be used by arch-specific code either to override the user or more
2477  * commonly to provide a default console (ie from PROM variables) when
2478  * the user has not supplied one.
2479  */
2480 int add_preferred_console(char *name, int idx, char *options)
2481 {
2482     return __add_preferred_console(name, idx, options, NULL, false);
2483 }
2484 
2485 bool console_suspend_enabled = true;
2486 EXPORT_SYMBOL(console_suspend_enabled);
2487 
2488 static int __init console_suspend_disable(char *str)
2489 {
2490     console_suspend_enabled = false;
2491     return 1;
2492 }
2493 __setup("no_console_suspend", console_suspend_disable);
2494 module_param_named(console_suspend, console_suspend_enabled,
2495         bool, S_IRUGO | S_IWUSR);
2496 MODULE_PARM_DESC(console_suspend, "suspend console during suspend"
2497     " and hibernate operations");
2498 
2499 static bool printk_console_no_auto_verbose;
2500 
2501 void console_verbose(void)
2502 {
2503     if (console_loglevel && !printk_console_no_auto_verbose)
2504         console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
2505 }
2506 EXPORT_SYMBOL_GPL(console_verbose);
2507 
2508 module_param_named(console_no_auto_verbose, printk_console_no_auto_verbose, bool, 0644);
2509 MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to highest on oops/panic/etc");
2510 
2511 /**
2512  * suspend_console - suspend the console subsystem
2513  *
2514  * This disables printk() while we go into suspend states
2515  */
2516 void suspend_console(void)
2517 {
2518     if (!console_suspend_enabled)
2519         return;
2520     pr_info("Suspending console(s) (use no_console_suspend to debug)\n");
2521     pr_flush(1000, true);
2522     console_lock();
2523     console_suspended = 1;
2524     up_console_sem();
2525 }
2526 
2527 void resume_console(void)
2528 {
2529     if (!console_suspend_enabled)
2530         return;
2531     down_console_sem();
2532     console_suspended = 0;
2533     console_unlock();
2534     pr_flush(1000, true);
2535 }
2536 
2537 /**
2538  * console_cpu_notify - print deferred console messages after CPU hotplug
2539  * @cpu: unused
2540  *
2541  * If printk() is called from a CPU that is not online yet, the messages
2542  * will be printed on the console only if there are CON_ANYTIME consoles.
2543  * This function is called when a new CPU comes online (or fails to come
2544  * up) or goes offline.
2545  */
2546 static int console_cpu_notify(unsigned int cpu)
2547 {
2548     if (!cpuhp_tasks_frozen) {
2549         /* If trylock fails, someone else is doing the printing */
2550         if (console_trylock())
2551             console_unlock();
2552     }
2553     return 0;
2554 }
2555 
2556 /**
2557  * console_lock - lock the console system for exclusive use.
2558  *
2559  * Acquires a lock which guarantees that the caller has
2560  * exclusive access to the console system and the console_drivers list.
2561  *
2562  * Can sleep, returns nothing.
2563  */
2564 void console_lock(void)
2565 {
2566     might_sleep();
2567 
2568     down_console_sem();
2569     if (console_suspended)
2570         return;
2571     console_locked = 1;
2572     console_may_schedule = 1;
2573 }
2574 EXPORT_SYMBOL(console_lock);
2575 
2576 /**
2577  * console_trylock - try to lock the console system for exclusive use.
2578  *
2579  * Try to acquire a lock which guarantees that the caller has exclusive
2580  * access to the console system and the console_drivers list.
2581  *
2582  * returns 1 on success, and 0 on failure to acquire the lock.
2583  */
2584 int console_trylock(void)
2585 {
2586     if (down_trylock_console_sem())
2587         return 0;
2588     if (console_suspended) {
2589         up_console_sem();
2590         return 0;
2591     }
2592     console_locked = 1;
2593     console_may_schedule = 0;
2594     return 1;
2595 }
2596 EXPORT_SYMBOL(console_trylock);
2597 
2598 int is_console_locked(void)
2599 {
2600     return console_locked;
2601 }
2602 EXPORT_SYMBOL(is_console_locked);
2603 
2604 /*
2605  * Return true when this CPU should unlock console_sem without pushing all
2606  * messages to the console. This reduces the chance that the console is
2607  * locked when the panic CPU tries to use it.
2608  */
2609 static bool abandon_console_lock_in_panic(void)
2610 {
2611     if (!panic_in_progress())
2612         return false;
2613 
2614     /*
2615      * We can use raw_smp_processor_id() here because it is impossible for
2616      * the task to be migrated to the panic_cpu, or away from it. If
2617      * panic_cpu has already been set, and we're not currently executing on
2618      * that CPU, then we never will be.
2619      */
2620     return atomic_read(&panic_cpu) != raw_smp_processor_id();
2621 }
2622 
2623 /*
2624  * Check if the given console is currently capable and allowed to print
2625  * records.
2626  *
2627  * Requires the console_lock.
2628  */
2629 static inline bool console_is_usable(struct console *con)
2630 {
2631     if (!(con->flags & CON_ENABLED))
2632         return false;
2633 
2634     if (!con->write)
2635         return false;
2636 
2637     /*
2638      * Console drivers may assume that per-cpu resources have been
2639      * allocated. So unless they're explicitly marked as being able to
2640      * cope (CON_ANYTIME) don't call them until this CPU is officially up.
2641      */
2642     if (!cpu_online(raw_smp_processor_id()) &&
2643         !(con->flags & CON_ANYTIME))
2644         return false;
2645 
2646     return true;
2647 }
2648 
2649 static void __console_unlock(void)
2650 {
2651     console_locked = 0;
2652     up_console_sem();
2653 }
2654 
2655 /*
2656  * Print one record for the given console. The record printed is whatever
2657  * record is the next available record for the given console.
2658  *
2659  * @text is a buffer of size CONSOLE_LOG_MAX.
2660  *
2661  * If extended messages should be printed, @ext_text is a buffer of size
2662  * CONSOLE_EXT_LOG_MAX. Otherwise @ext_text must be NULL.
2663  *
2664  * If dropped messages should be printed, @dropped_text is a buffer of size
2665  * DROPPED_TEXT_MAX. Otherwise @dropped_text must be NULL.
2666  *
2667  * @handover will be set to true if a printk waiter has taken over the
2668  * console_lock, in which case the caller is no longer holding the
2669  * console_lock. Otherwise it is set to false.
2670  *
2671  * Returns false if the given console has no next record to print, otherwise
2672  * true.
2673  *
2674  * Requires the console_lock.
2675  */
2676 static bool console_emit_next_record(struct console *con, char *text, char *ext_text,
2677                      char *dropped_text, bool *handover)
2678 {
2679     static int panic_console_dropped;
2680     struct printk_info info;
2681     struct printk_record r;
2682     unsigned long flags;
2683     char *write_text;
2684     size_t len;
2685 
2686     prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
2687 
2688     *handover = false;
2689 
2690     if (!prb_read_valid(prb, con->seq, &r))
2691         return false;
2692 
2693     if (con->seq != r.info->seq) {
2694         con->dropped += r.info->seq - con->seq;
2695         con->seq = r.info->seq;
2696         if (panic_in_progress() && panic_console_dropped++ > 10) {
2697             suppress_panic_printk = 1;
2698             pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n");
2699         }
2700     }
2701 
2702     /* Skip record that has level above the console loglevel. */
2703     if (suppress_message_printing(r.info->level)) {
2704         con->seq++;
2705         goto skip;
2706     }
2707 
2708     if (ext_text) {
2709         write_text = ext_text;
2710         len = info_print_ext_header(ext_text, CONSOLE_EXT_LOG_MAX, r.info);
2711         len += msg_print_ext_body(ext_text + len, CONSOLE_EXT_LOG_MAX - len,
2712                       &r.text_buf[0], r.info->text_len, &r.info->dev_info);
2713     } else {
2714         write_text = text;
2715         len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
2716     }
2717 
2718     /*
2719      * While actively printing out messages, if another printk()
2720      * were to occur on another CPU, it may wait for this one to
2721      * finish. This task can not be preempted if there is a
2722      * waiter waiting to take over.
2723      *
2724      * Interrupts are disabled because the hand over to a waiter
2725      * must not be interrupted until the hand over is completed
2726      * (@console_waiter is cleared).
2727      */
2728     printk_safe_enter_irqsave(flags);
2729     console_lock_spinning_enable();
2730 
2731     stop_critical_timings();    /* don't trace print latency */
2732     call_console_driver(con, write_text, len, dropped_text);
2733     start_critical_timings();
2734 
2735     con->seq++;
2736 
2737     *handover = console_lock_spinning_disable_and_check();
2738     printk_safe_exit_irqrestore(flags);
2739 skip:
2740     return true;
2741 }
2742 
2743 /*
2744  * Print out all remaining records to all consoles.
2745  *
2746  * @do_cond_resched is set by the caller. It can be true only in schedulable
2747  * context.
2748  *
2749  * @next_seq is set to the sequence number after the last available record.
2750  * The value is valid only when this function returns true. It means that all
2751  * usable consoles are completely flushed.
2752  *
2753  * @handover will be set to true if a printk waiter has taken over the
2754  * console_lock, in which case the caller is no longer holding the
2755  * console_lock. Otherwise it is set to false.
2756  *
2757  * Returns true when there was at least one usable console and all messages
2758  * were flushed to all usable consoles. A returned false informs the caller
2759  * that everything was not flushed (either there were no usable consoles or
2760  * another context has taken over printing or it is a panic situation and this
2761  * is not the panic CPU). Regardless the reason, the caller should assume it
2762  * is not useful to immediately try again.
2763  *
2764  * Requires the console_lock.
2765  */
2766 static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover)
2767 {
2768     static char dropped_text[DROPPED_TEXT_MAX];
2769     static char ext_text[CONSOLE_EXT_LOG_MAX];
2770     static char text[CONSOLE_LOG_MAX];
2771     bool any_usable = false;
2772     struct console *con;
2773     bool any_progress;
2774 
2775     *next_seq = 0;
2776     *handover = false;
2777 
2778     do {
2779         any_progress = false;
2780 
2781         for_each_console(con) {
2782             bool progress;
2783 
2784             if (!console_is_usable(con))
2785                 continue;
2786             any_usable = true;
2787 
2788             if (con->flags & CON_EXTENDED) {
2789                 /* Extended consoles do not print "dropped messages". */
2790                 progress = console_emit_next_record(con, &text[0],
2791                                     &ext_text[0], NULL,
2792                                     handover);
2793             } else {
2794                 progress = console_emit_next_record(con, &text[0],
2795                                     NULL, &dropped_text[0],
2796                                     handover);
2797             }
2798             if (*handover)
2799                 return false;
2800 
2801             /* Track the next of the highest seq flushed. */
2802             if (con->seq > *next_seq)
2803                 *next_seq = con->seq;
2804 
2805             if (!progress)
2806                 continue;
2807             any_progress = true;
2808 
2809             /* Allow panic_cpu to take over the consoles safely. */
2810             if (abandon_console_lock_in_panic())
2811                 return false;
2812 
2813             if (do_cond_resched)
2814                 cond_resched();
2815         }
2816     } while (any_progress);
2817 
2818     return any_usable;
2819 }
2820 
2821 /**
2822  * console_unlock - unlock the console system
2823  *
2824  * Releases the console_lock which the caller holds on the console system
2825  * and the console driver list.
2826  *
2827  * While the console_lock was held, console output may have been buffered
2828  * by printk().  If this is the case, console_unlock(); emits
2829  * the output prior to releasing the lock.
2830  *
2831  * console_unlock(); may be called from any context.
2832  */
2833 void console_unlock(void)
2834 {
2835     bool do_cond_resched;
2836     bool handover;
2837     bool flushed;
2838     u64 next_seq;
2839 
2840     if (console_suspended) {
2841         up_console_sem();
2842         return;
2843     }
2844 
2845     /*
2846      * Console drivers are called with interrupts disabled, so
2847      * @console_may_schedule should be cleared before; however, we may
2848      * end up dumping a lot of lines, for example, if called from
2849      * console registration path, and should invoke cond_resched()
2850      * between lines if allowable.  Not doing so can cause a very long
2851      * scheduling stall on a slow console leading to RCU stall and
2852      * softlockup warnings which exacerbate the issue with more
2853      * messages practically incapacitating the system. Therefore, create
2854      * a local to use for the printing loop.
2855      */
2856     do_cond_resched = console_may_schedule;
2857 
2858     do {
2859         console_may_schedule = 0;
2860 
2861         flushed = console_flush_all(do_cond_resched, &next_seq, &handover);
2862         if (!handover)
2863             __console_unlock();
2864 
2865         /*
2866          * Abort if there was a failure to flush all messages to all
2867          * usable consoles. Either it is not possible to flush (in
2868          * which case it would be an infinite loop of retrying) or
2869          * another context has taken over printing.
2870          */
2871         if (!flushed)
2872             break;
2873 
2874         /*
2875          * Some context may have added new records after
2876          * console_flush_all() but before unlocking the console.
2877          * Re-check if there is a new record to flush. If the trylock
2878          * fails, another context is already handling the printing.
2879          */
2880     } while (prb_read_valid(prb, next_seq, NULL) && console_trylock());
2881 }
2882 EXPORT_SYMBOL(console_unlock);
2883 
2884 /**
2885  * console_conditional_schedule - yield the CPU if required
2886  *
2887  * If the console code is currently allowed to sleep, and
2888  * if this CPU should yield the CPU to another task, do
2889  * so here.
2890  *
2891  * Must be called within console_lock();.
2892  */
2893 void __sched console_conditional_schedule(void)
2894 {
2895     if (console_may_schedule)
2896         cond_resched();
2897 }
2898 EXPORT_SYMBOL(console_conditional_schedule);
2899 
2900 void console_unblank(void)
2901 {
2902     struct console *c;
2903 
2904     /*
2905      * console_unblank can no longer be called in interrupt context unless
2906      * oops_in_progress is set to 1..
2907      */
2908     if (oops_in_progress) {
2909         if (down_trylock_console_sem() != 0)
2910             return;
2911     } else
2912         console_lock();
2913 
2914     console_locked = 1;
2915     console_may_schedule = 0;
2916     for_each_console(c)
2917         if ((c->flags & CON_ENABLED) && c->unblank)
2918             c->unblank();
2919     console_unlock();
2920 
2921     if (!oops_in_progress)
2922         pr_flush(1000, true);
2923 }
2924 
2925 /**
2926  * console_flush_on_panic - flush console content on panic
2927  * @mode: flush all messages in buffer or just the pending ones
2928  *
2929  * Immediately output all pending messages no matter what.
2930  */
2931 void console_flush_on_panic(enum con_flush_mode mode)
2932 {
2933     /*
2934      * If someone else is holding the console lock, trylock will fail
2935      * and may_schedule may be set.  Ignore and proceed to unlock so
2936      * that messages are flushed out.  As this can be called from any
2937      * context and we don't want to get preempted while flushing,
2938      * ensure may_schedule is cleared.
2939      */
2940     console_trylock();
2941     console_may_schedule = 0;
2942 
2943     if (mode == CONSOLE_REPLAY_ALL) {
2944         struct console *c;
2945         u64 seq;
2946 
2947         seq = prb_first_valid_seq(prb);
2948         for_each_console(c)
2949             c->seq = seq;
2950     }
2951     console_unlock();
2952 }
2953 
2954 /*
2955  * Return the console tty driver structure and its associated index
2956  */
2957 struct tty_driver *console_device(int *index)
2958 {
2959     struct console *c;
2960     struct tty_driver *driver = NULL;
2961 
2962     console_lock();
2963     for_each_console(c) {
2964         if (!c->device)
2965             continue;
2966         driver = c->device(c, index);
2967         if (driver)
2968             break;
2969     }
2970     console_unlock();
2971     return driver;
2972 }
2973 
2974 /*
2975  * Prevent further output on the passed console device so that (for example)
2976  * serial drivers can disable console output before suspending a port, and can
2977  * re-enable output afterwards.
2978  */
2979 void console_stop(struct console *console)
2980 {
2981     __pr_flush(console, 1000, true);
2982     console_lock();
2983     console->flags &= ~CON_ENABLED;
2984     console_unlock();
2985 }
2986 EXPORT_SYMBOL(console_stop);
2987 
2988 void console_start(struct console *console)
2989 {
2990     console_lock();
2991     console->flags |= CON_ENABLED;
2992     console_unlock();
2993     __pr_flush(console, 1000, true);
2994 }
2995 EXPORT_SYMBOL(console_start);
2996 
2997 static int __read_mostly keep_bootcon;
2998 
2999 static int __init keep_bootcon_setup(char *str)
3000 {
3001     keep_bootcon = 1;
3002     pr_info("debug: skip boot console de-registration.\n");
3003 
3004     return 0;
3005 }
3006 
3007 early_param("keep_bootcon", keep_bootcon_setup);
3008 
3009 /*
3010  * This is called by register_console() to try to match
3011  * the newly registered console with any of the ones selected
3012  * by either the command line or add_preferred_console() and
3013  * setup/enable it.
3014  *
3015  * Care need to be taken with consoles that are statically
3016  * enabled such as netconsole
3017  */
3018 static int try_enable_preferred_console(struct console *newcon,
3019                     bool user_specified)
3020 {
3021     struct console_cmdline *c;
3022     int i, err;
3023 
3024     for (i = 0, c = console_cmdline;
3025          i < MAX_CMDLINECONSOLES && c->name[0];
3026          i++, c++) {
3027         if (c->user_specified != user_specified)
3028             continue;
3029         if (!newcon->match ||
3030             newcon->match(newcon, c->name, c->index, c->options) != 0) {
3031             /* default matching */
3032             BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name));
3033             if (strcmp(c->name, newcon->name) != 0)
3034                 continue;
3035             if (newcon->index >= 0 &&
3036                 newcon->index != c->index)
3037                 continue;
3038             if (newcon->index < 0)
3039                 newcon->index = c->index;
3040 
3041             if (_braille_register_console(newcon, c))
3042                 return 0;
3043 
3044             if (newcon->setup &&
3045                 (err = newcon->setup(newcon, c->options)) != 0)
3046                 return err;
3047         }
3048         newcon->flags |= CON_ENABLED;
3049         if (i == preferred_console)
3050             newcon->flags |= CON_CONSDEV;
3051         return 0;
3052     }
3053 
3054     /*
3055      * Some consoles, such as pstore and netconsole, can be enabled even
3056      * without matching. Accept the pre-enabled consoles only when match()
3057      * and setup() had a chance to be called.
3058      */
3059     if (newcon->flags & CON_ENABLED && c->user_specified == user_specified)
3060         return 0;
3061 
3062     return -ENOENT;
3063 }
3064 
3065 /* Try to enable the console unconditionally */
3066 static void try_enable_default_console(struct console *newcon)
3067 {
3068     if (newcon->index < 0)
3069         newcon->index = 0;
3070 
3071     if (newcon->setup && newcon->setup(newcon, NULL) != 0)
3072         return;
3073 
3074     newcon->flags |= CON_ENABLED;
3075 
3076     if (newcon->device)
3077         newcon->flags |= CON_CONSDEV;
3078 }
3079 
3080 #define con_printk(lvl, con, fmt, ...)          \
3081     printk(lvl pr_fmt("%sconsole [%s%d] " fmt), \
3082            (con->flags & CON_BOOT) ? "boot" : "",   \
3083            con->name, con->index, ##__VA_ARGS__)
3084 
3085 /*
3086  * The console driver calls this routine during kernel initialization
3087  * to register the console printing procedure with printk() and to
3088  * print any messages that were printed by the kernel before the
3089  * console driver was initialized.
3090  *
3091  * This can happen pretty early during the boot process (because of
3092  * early_printk) - sometimes before setup_arch() completes - be careful
3093  * of what kernel features are used - they may not be initialised yet.
3094  *
3095  * There are two types of consoles - bootconsoles (early_printk) and
3096  * "real" consoles (everything which is not a bootconsole) which are
3097  * handled differently.
3098  *  - Any number of bootconsoles can be registered at any time.
3099  *  - As soon as a "real" console is registered, all bootconsoles
3100  *    will be unregistered automatically.
3101  *  - Once a "real" console is registered, any attempt to register a
3102  *    bootconsoles will be rejected
3103  */
3104 void register_console(struct console *newcon)
3105 {
3106     struct console *con;
3107     bool bootcon_enabled = false;
3108     bool realcon_enabled = false;
3109     int err;
3110 
3111     for_each_console(con) {
3112         if (WARN(con == newcon, "console '%s%d' already registered\n",
3113                      con->name, con->index))
3114             return;
3115     }
3116 
3117     for_each_console(con) {
3118         if (con->flags & CON_BOOT)
3119             bootcon_enabled = true;
3120         else
3121             realcon_enabled = true;
3122     }
3123 
3124     /* Do not register boot consoles when there already is a real one. */
3125     if (newcon->flags & CON_BOOT && realcon_enabled) {
3126         pr_info("Too late to register bootconsole %s%d\n",
3127             newcon->name, newcon->index);
3128         return;
3129     }
3130 
3131     /*
3132      * See if we want to enable this console driver by default.
3133      *
3134      * Nope when a console is preferred by the command line, device
3135      * tree, or SPCR.
3136      *
3137      * The first real console with tty binding (driver) wins. More
3138      * consoles might get enabled before the right one is found.
3139      *
3140      * Note that a console with tty binding will have CON_CONSDEV
3141      * flag set and will be first in the list.
3142      */
3143     if (preferred_console < 0) {
3144         if (!console_drivers || !console_drivers->device ||
3145             console_drivers->flags & CON_BOOT) {
3146             try_enable_default_console(newcon);
3147         }
3148     }
3149 
3150     /* See if this console matches one we selected on the command line */
3151     err = try_enable_preferred_console(newcon, true);
3152 
3153     /* If not, try to match against the platform default(s) */
3154     if (err == -ENOENT)
3155         err = try_enable_preferred_console(newcon, false);
3156 
3157     /* printk() messages are not printed to the Braille console. */
3158     if (err || newcon->flags & CON_BRL)
3159         return;
3160 
3161     /*
3162      * If we have a bootconsole, and are switching to a real console,
3163      * don't print everything out again, since when the boot console, and
3164      * the real console are the same physical device, it's annoying to
3165      * see the beginning boot messages twice
3166      */
3167     if (bootcon_enabled &&
3168         ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) {
3169         newcon->flags &= ~CON_PRINTBUFFER;
3170     }
3171 
3172     /*
3173      *  Put this console in the list - keep the
3174      *  preferred driver at the head of the list.
3175      */
3176     console_lock();
3177     if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) {
3178         newcon->next = console_drivers;
3179         console_drivers = newcon;
3180         if (newcon->next)
3181             newcon->next->flags &= ~CON_CONSDEV;
3182         /* Ensure this flag is always set for the head of the list */
3183         newcon->flags |= CON_CONSDEV;
3184     } else {
3185         newcon->next = console_drivers->next;
3186         console_drivers->next = newcon;
3187     }
3188 
3189     if (newcon->flags & CON_EXTENDED)
3190         nr_ext_console_drivers++;
3191 
3192     newcon->dropped = 0;
3193     if (newcon->flags & CON_PRINTBUFFER) {
3194         /* Get a consistent copy of @syslog_seq. */
3195         mutex_lock(&syslog_lock);
3196         newcon->seq = syslog_seq;
3197         mutex_unlock(&syslog_lock);
3198     } else {
3199         /* Begin with next message. */
3200         newcon->seq = prb_next_seq(prb);
3201     }
3202     console_unlock();
3203     console_sysfs_notify();
3204 
3205     /*
3206      * By unregistering the bootconsoles after we enable the real console
3207      * we get the "console xxx enabled" message on all the consoles -
3208      * boot consoles, real consoles, etc - this is to ensure that end
3209      * users know there might be something in the kernel's log buffer that
3210      * went to the bootconsole (that they do not see on the real console)
3211      */
3212     con_printk(KERN_INFO, newcon, "enabled\n");
3213     if (bootcon_enabled &&
3214         ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) &&
3215         !keep_bootcon) {
3216         /* We need to iterate through all boot consoles, to make
3217          * sure we print everything out, before we unregister them.
3218          */
3219         for_each_console(con)
3220             if (con->flags & CON_BOOT)
3221                 unregister_console(con);
3222     }
3223 }
3224 EXPORT_SYMBOL(register_console);
3225 
3226 int unregister_console(struct console *console)
3227 {
3228     struct console *con;
3229     int res;
3230 
3231     con_printk(KERN_INFO, console, "disabled\n");
3232 
3233     res = _braille_unregister_console(console);
3234     if (res < 0)
3235         return res;
3236     if (res > 0)
3237         return 0;
3238 
3239     res = -ENODEV;
3240     console_lock();
3241     if (console_drivers == console) {
3242         console_drivers=console->next;
3243         res = 0;
3244     } else {
3245         for_each_console(con) {
3246             if (con->next == console) {
3247                 con->next = console->next;
3248                 res = 0;
3249                 break;
3250             }
3251         }
3252     }
3253 
3254     if (res)
3255         goto out_disable_unlock;
3256 
3257     if (console->flags & CON_EXTENDED)
3258         nr_ext_console_drivers--;
3259 
3260     /*
3261      * If this isn't the last console and it has CON_CONSDEV set, we
3262      * need to set it on the next preferred console.
3263      */
3264     if (console_drivers != NULL && console->flags & CON_CONSDEV)
3265         console_drivers->flags |= CON_CONSDEV;
3266 
3267     console->flags &= ~CON_ENABLED;
3268     console_unlock();
3269     console_sysfs_notify();
3270 
3271     if (console->exit)
3272         res = console->exit(console);
3273 
3274     return res;
3275 
3276 out_disable_unlock:
3277     console->flags &= ~CON_ENABLED;
3278     console_unlock();
3279 
3280     return res;
3281 }
3282 EXPORT_SYMBOL(unregister_console);
3283 
3284 /*
3285  * Initialize the console device. This is called *early*, so
3286  * we can't necessarily depend on lots of kernel help here.
3287  * Just do some early initializations, and do the complex setup
3288  * later.
3289  */
3290 void __init console_init(void)
3291 {
3292     int ret;
3293     initcall_t call;
3294     initcall_entry_t *ce;
3295 
3296     /* Setup the default TTY line discipline. */
3297     n_tty_init();
3298 
3299     /*
3300      * set up the console device so that later boot sequences can
3301      * inform about problems etc..
3302      */
3303     ce = __con_initcall_start;
3304     trace_initcall_level("console");
3305     while (ce < __con_initcall_end) {
3306         call = initcall_from_entry(ce);
3307         trace_initcall_start(call);
3308         ret = call();
3309         trace_initcall_finish(call, ret);
3310         ce++;
3311     }
3312 }
3313 
3314 /*
3315  * Some boot consoles access data that is in the init section and which will
3316  * be discarded after the initcalls have been run. To make sure that no code
3317  * will access this data, unregister the boot consoles in a late initcall.
3318  *
3319  * If for some reason, such as deferred probe or the driver being a loadable
3320  * module, the real console hasn't registered yet at this point, there will
3321  * be a brief interval in which no messages are logged to the console, which
3322  * makes it difficult to diagnose problems that occur during this time.
3323  *
3324  * To mitigate this problem somewhat, only unregister consoles whose memory
3325  * intersects with the init section. Note that all other boot consoles will
3326  * get unregistered when the real preferred console is registered.
3327  */
3328 static int __init printk_late_init(void)
3329 {
3330     struct console *con;
3331     int ret;
3332 
3333     for_each_console(con) {
3334         if (!(con->flags & CON_BOOT))
3335             continue;
3336 
3337         /* Check addresses that might be used for enabled consoles. */
3338         if (init_section_intersects(con, sizeof(*con)) ||
3339             init_section_contains(con->write, 0) ||
3340             init_section_contains(con->read, 0) ||
3341             init_section_contains(con->device, 0) ||
3342             init_section_contains(con->unblank, 0) ||
3343             init_section_contains(con->data, 0)) {
3344             /*
3345              * Please, consider moving the reported consoles out
3346              * of the init section.
3347              */
3348             pr_warn("bootconsole [%s%d] uses init memory and must be disabled even before the real one is ready\n",
3349                 con->name, con->index);
3350             unregister_console(con);
3351         }
3352     }
3353     ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL,
3354                     console_cpu_notify);
3355     WARN_ON(ret < 0);
3356     ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online",
3357                     console_cpu_notify, NULL);
3358     WARN_ON(ret < 0);
3359     printk_sysctl_init();
3360     return 0;
3361 }
3362 late_initcall(printk_late_init);
3363 
3364 #if defined CONFIG_PRINTK
3365 /* If @con is specified, only wait for that console. Otherwise wait for all. */
3366 static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress)
3367 {
3368     int remaining = timeout_ms;
3369     struct console *c;
3370     u64 last_diff = 0;
3371     u64 printk_seq;
3372     u64 diff;
3373     u64 seq;
3374 
3375     might_sleep();
3376 
3377     seq = prb_next_seq(prb);
3378 
3379     for (;;) {
3380         diff = 0;
3381 
3382         console_lock();
3383 
3384         for_each_console(c) {
3385             if (con && con != c)
3386                 continue;
3387             if (!console_is_usable(c))
3388                 continue;
3389             printk_seq = c->seq;
3390             if (printk_seq < seq)
3391                 diff += seq - printk_seq;
3392         }
3393 
3394         /*
3395          * If consoles are suspended, it cannot be expected that they
3396          * make forward progress, so timeout immediately. @diff is
3397          * still used to return a valid flush status.
3398          */
3399         if (console_suspended)
3400             remaining = 0;
3401         else if (diff != last_diff && reset_on_progress)
3402             remaining = timeout_ms;
3403 
3404         console_unlock();
3405 
3406         if (diff == 0 || remaining == 0)
3407             break;
3408 
3409         if (remaining < 0) {
3410             /* no timeout limit */
3411             msleep(100);
3412         } else if (remaining < 100) {
3413             msleep(remaining);
3414             remaining = 0;
3415         } else {
3416             msleep(100);
3417             remaining -= 100;
3418         }
3419 
3420         last_diff = diff;
3421     }
3422 
3423     return (diff == 0);
3424 }
3425 
3426 /**
3427  * pr_flush() - Wait for printing threads to catch up.
3428  *
3429  * @timeout_ms:        The maximum time (in ms) to wait.
3430  * @reset_on_progress: Reset the timeout if forward progress is seen.
3431  *
3432  * A value of 0 for @timeout_ms means no waiting will occur. A value of -1
3433  * represents infinite waiting.
3434  *
3435  * If @reset_on_progress is true, the timeout will be reset whenever any
3436  * printer has been seen to make some forward progress.
3437  *
3438  * Context: Process context. May sleep while acquiring console lock.
3439  * Return: true if all enabled printers are caught up.
3440  */
3441 bool pr_flush(int timeout_ms, bool reset_on_progress)
3442 {
3443     return __pr_flush(NULL, timeout_ms, reset_on_progress);
3444 }
3445 EXPORT_SYMBOL(pr_flush);
3446 
3447 /*
3448  * Delayed printk version, for scheduler-internal messages:
3449  */
3450 #define PRINTK_PENDING_WAKEUP   0x01
3451 #define PRINTK_PENDING_OUTPUT   0x02
3452 
3453 static DEFINE_PER_CPU(int, printk_pending);
3454 
3455 static void wake_up_klogd_work_func(struct irq_work *irq_work)
3456 {
3457     int pending = this_cpu_xchg(printk_pending, 0);
3458 
3459     if (pending & PRINTK_PENDING_OUTPUT) {
3460         /* If trylock fails, someone else is doing the printing */
3461         if (console_trylock())
3462             console_unlock();
3463     }
3464 
3465     if (pending & PRINTK_PENDING_WAKEUP)
3466         wake_up_interruptible(&log_wait);
3467 }
3468 
3469 static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
3470     IRQ_WORK_INIT_LAZY(wake_up_klogd_work_func);
3471 
3472 static void __wake_up_klogd(int val)
3473 {
3474     if (!printk_percpu_data_ready())
3475         return;
3476 
3477     preempt_disable();
3478     /*
3479      * Guarantee any new records can be seen by tasks preparing to wait
3480      * before this context checks if the wait queue is empty.
3481      *
3482      * The full memory barrier within wq_has_sleeper() pairs with the full
3483      * memory barrier within set_current_state() of
3484      * prepare_to_wait_event(), which is called after ___wait_event() adds
3485      * the waiter but before it has checked the wait condition.
3486      *
3487      * This pairs with devkmsg_read:A and syslog_print:A.
3488      */
3489     if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */
3490         (val & PRINTK_PENDING_OUTPUT)) {
3491         this_cpu_or(printk_pending, val);
3492         irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
3493     }
3494     preempt_enable();
3495 }
3496 
3497 void wake_up_klogd(void)
3498 {
3499     __wake_up_klogd(PRINTK_PENDING_WAKEUP);
3500 }
3501 
3502 void defer_console_output(void)
3503 {
3504     /*
3505      * New messages may have been added directly to the ringbuffer
3506      * using vprintk_store(), so wake any waiters as well.
3507      */
3508     __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT);
3509 }
3510 
3511 void printk_trigger_flush(void)
3512 {
3513     defer_console_output();
3514 }
3515 
3516 int vprintk_deferred(const char *fmt, va_list args)
3517 {
3518     int r;
3519 
3520     r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
3521     defer_console_output();
3522 
3523     return r;
3524 }
3525 
3526 int _printk_deferred(const char *fmt, ...)
3527 {
3528     va_list args;
3529     int r;
3530 
3531     va_start(args, fmt);
3532     r = vprintk_deferred(fmt, args);
3533     va_end(args);
3534 
3535     return r;
3536 }
3537 
3538 /*
3539  * printk rate limiting, lifted from the networking subsystem.
3540  *
3541  * This enforces a rate limit: not more than 10 kernel messages
3542  * every 5s to make a denial-of-service attack impossible.
3543  */
3544 DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
3545 
3546 int __printk_ratelimit(const char *func)
3547 {
3548     return ___ratelimit(&printk_ratelimit_state, func);
3549 }
3550 EXPORT_SYMBOL(__printk_ratelimit);
3551 
3552 /**
3553  * printk_timed_ratelimit - caller-controlled printk ratelimiting
3554  * @caller_jiffies: pointer to caller's state
3555  * @interval_msecs: minimum interval between prints
3556  *
3557  * printk_timed_ratelimit() returns true if more than @interval_msecs
3558  * milliseconds have elapsed since the last time printk_timed_ratelimit()
3559  * returned true.
3560  */
3561 bool printk_timed_ratelimit(unsigned long *caller_jiffies,
3562             unsigned int interval_msecs)
3563 {
3564     unsigned long elapsed = jiffies - *caller_jiffies;
3565 
3566     if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs))
3567         return false;
3568 
3569     *caller_jiffies = jiffies;
3570     return true;
3571 }
3572 EXPORT_SYMBOL(printk_timed_ratelimit);
3573 
3574 static DEFINE_SPINLOCK(dump_list_lock);
3575 static LIST_HEAD(dump_list);
3576 
3577 /**
3578  * kmsg_dump_register - register a kernel log dumper.
3579  * @dumper: pointer to the kmsg_dumper structure
3580  *
3581  * Adds a kernel log dumper to the system. The dump callback in the
3582  * structure will be called when the kernel oopses or panics and must be
3583  * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise.
3584  */
3585 int kmsg_dump_register(struct kmsg_dumper *dumper)
3586 {
3587     unsigned long flags;
3588     int err = -EBUSY;
3589 
3590     /* The dump callback needs to be set */
3591     if (!dumper->dump)
3592         return -EINVAL;
3593 
3594     spin_lock_irqsave(&dump_list_lock, flags);
3595     /* Don't allow registering multiple times */
3596     if (!dumper->registered) {
3597         dumper->registered = 1;
3598         list_add_tail_rcu(&dumper->list, &dump_list);
3599         err = 0;
3600     }
3601     spin_unlock_irqrestore(&dump_list_lock, flags);
3602 
3603     return err;
3604 }
3605 EXPORT_SYMBOL_GPL(kmsg_dump_register);
3606 
3607 /**
3608  * kmsg_dump_unregister - unregister a kmsg dumper.
3609  * @dumper: pointer to the kmsg_dumper structure
3610  *
3611  * Removes a dump device from the system. Returns zero on success and
3612  * %-EINVAL otherwise.
3613  */
3614 int kmsg_dump_unregister(struct kmsg_dumper *dumper)
3615 {
3616     unsigned long flags;
3617     int err = -EINVAL;
3618 
3619     spin_lock_irqsave(&dump_list_lock, flags);
3620     if (dumper->registered) {
3621         dumper->registered = 0;
3622         list_del_rcu(&dumper->list);
3623         err = 0;
3624     }
3625     spin_unlock_irqrestore(&dump_list_lock, flags);
3626     synchronize_rcu();
3627 
3628     return err;
3629 }
3630 EXPORT_SYMBOL_GPL(kmsg_dump_unregister);
3631 
3632 static bool always_kmsg_dump;
3633 module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
3634 
3635 const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason)
3636 {
3637     switch (reason) {
3638     case KMSG_DUMP_PANIC:
3639         return "Panic";
3640     case KMSG_DUMP_OOPS:
3641         return "Oops";
3642     case KMSG_DUMP_EMERG:
3643         return "Emergency";
3644     case KMSG_DUMP_SHUTDOWN:
3645         return "Shutdown";
3646     default:
3647         return "Unknown";
3648     }
3649 }
3650 EXPORT_SYMBOL_GPL(kmsg_dump_reason_str);
3651 
3652 /**
3653  * kmsg_dump - dump kernel log to kernel message dumpers.
3654  * @reason: the reason (oops, panic etc) for dumping
3655  *
3656  * Call each of the registered dumper's dump() callback, which can
3657  * retrieve the kmsg records with kmsg_dump_get_line() or
3658  * kmsg_dump_get_buffer().
3659  */
3660 void kmsg_dump(enum kmsg_dump_reason reason)
3661 {
3662     struct kmsg_dumper *dumper;
3663 
3664     rcu_read_lock();
3665     list_for_each_entry_rcu(dumper, &dump_list, list) {
3666         enum kmsg_dump_reason max_reason = dumper->max_reason;
3667 
3668         /*
3669          * If client has not provided a specific max_reason, default
3670          * to KMSG_DUMP_OOPS, unless always_kmsg_dump was set.
3671          */
3672         if (max_reason == KMSG_DUMP_UNDEF) {
3673             max_reason = always_kmsg_dump ? KMSG_DUMP_MAX :
3674                             KMSG_DUMP_OOPS;
3675         }
3676         if (reason > max_reason)
3677             continue;
3678 
3679         /* invoke dumper which will iterate over records */
3680         dumper->dump(dumper, reason);
3681     }
3682     rcu_read_unlock();
3683 }
3684 
3685 /**
3686  * kmsg_dump_get_line - retrieve one kmsg log line
3687  * @iter: kmsg dump iterator
3688  * @syslog: include the "<4>" prefixes
3689  * @line: buffer to copy the line to
3690  * @size: maximum size of the buffer
3691  * @len: length of line placed into buffer
3692  *
3693  * Start at the beginning of the kmsg buffer, with the oldest kmsg
3694  * record, and copy one record into the provided buffer.
3695  *
3696  * Consecutive calls will return the next available record moving
3697  * towards the end of the buffer with the youngest messages.
3698  *
3699  * A return value of FALSE indicates that there are no more records to
3700  * read.
3701  */
3702 bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog,
3703             char *line, size_t size, size_t *len)
3704 {
3705     u64 min_seq = latched_seq_read_nolock(&clear_seq);
3706     struct printk_info info;
3707     unsigned int line_count;
3708     struct printk_record r;
3709     size_t l = 0;
3710     bool ret = false;
3711 
3712     if (iter->cur_seq < min_seq)
3713         iter->cur_seq = min_seq;
3714 
3715     prb_rec_init_rd(&r, &info, line, size);
3716 
3717     /* Read text or count text lines? */
3718     if (line) {
3719         if (!prb_read_valid(prb, iter->cur_seq, &r))
3720             goto out;
3721         l = record_print_text(&r, syslog, printk_time);
3722     } else {
3723         if (!prb_read_valid_info(prb, iter->cur_seq,
3724                      &info, &line_count)) {
3725             goto out;
3726         }
3727         l = get_record_print_text_size(&info, line_count, syslog,
3728                            printk_time);
3729 
3730     }
3731 
3732     iter->cur_seq = r.info->seq + 1;
3733     ret = true;
3734 out:
3735     if (len)
3736         *len = l;
3737     return ret;
3738 }
3739 EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
3740 
3741 /**
3742  * kmsg_dump_get_buffer - copy kmsg log lines
3743  * @iter: kmsg dump iterator
3744  * @syslog: include the "<4>" prefixes
3745  * @buf: buffer to copy the line to
3746  * @size: maximum size of the buffer
3747  * @len_out: length of line placed into buffer
3748  *
3749  * Start at the end of the kmsg buffer and fill the provided buffer
3750  * with as many of the *youngest* kmsg records that fit into it.
3751  * If the buffer is large enough, all available kmsg records will be
3752  * copied with a single call.
3753  *
3754  * Consecutive calls will fill the buffer with the next block of
3755  * available older records, not including the earlier retrieved ones.
3756  *
3757  * A return value of FALSE indicates that there are no more records to
3758  * read.
3759  */
3760 bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog,
3761               char *buf, size_t size, size_t *len_out)
3762 {
3763     u64 min_seq = latched_seq_read_nolock(&clear_seq);
3764     struct printk_info info;
3765     struct printk_record r;
3766     u64 seq;
3767     u64 next_seq;
3768     size_t len = 0;
3769     bool ret = false;
3770     bool time = printk_time;
3771 
3772     if (!buf || !size)
3773         goto out;
3774 
3775     if (iter->cur_seq < min_seq)
3776         iter->cur_seq = min_seq;
3777 
3778     if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) {
3779         if (info.seq != iter->cur_seq) {
3780             /* messages are gone, move to first available one */
3781             iter->cur_seq = info.seq;
3782         }
3783     }
3784 
3785     /* last entry */
3786     if (iter->cur_seq >= iter->next_seq)
3787         goto out;
3788 
3789     /*
3790      * Find first record that fits, including all following records,
3791      * into the user-provided buffer for this dump. Pass in size-1
3792      * because this function (by way of record_print_text()) will
3793      * not write more than size-1 bytes of text into @buf.
3794      */
3795     seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq,
3796                      size - 1, syslog, time);
3797 
3798     /*
3799      * Next kmsg_dump_get_buffer() invocation will dump block of
3800      * older records stored right before this one.
3801      */
3802     next_seq = seq;
3803 
3804     prb_rec_init_rd(&r, &info, buf, size);
3805 
3806     len = 0;
3807     prb_for_each_record(seq, prb, seq, &r) {
3808         if (r.info->seq >= iter->next_seq)
3809             break;
3810 
3811         len += record_print_text(&r, syslog, time);
3812 
3813         /* Adjust record to store to remaining buffer space. */
3814         prb_rec_init_rd(&r, &info, buf + len, size - len);
3815     }
3816 
3817     iter->next_seq = next_seq;
3818     ret = true;
3819 out:
3820     if (len_out)
3821         *len_out = len;
3822     return ret;
3823 }
3824 EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
3825 
3826 /**
3827  * kmsg_dump_rewind - reset the iterator
3828  * @iter: kmsg dump iterator
3829  *
3830  * Reset the dumper's iterator so that kmsg_dump_get_line() and
3831  * kmsg_dump_get_buffer() can be called again and used multiple
3832  * times within the same dumper.dump() callback.
3833  */
3834 void kmsg_dump_rewind(struct kmsg_dump_iter *iter)
3835 {
3836     iter->cur_seq = latched_seq_read_nolock(&clear_seq);
3837     iter->next_seq = prb_next_seq(prb);
3838 }
3839 EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
3840 
3841 #endif
3842 
3843 #ifdef CONFIG_SMP
3844 static atomic_t printk_cpu_sync_owner = ATOMIC_INIT(-1);
3845 static atomic_t printk_cpu_sync_nested = ATOMIC_INIT(0);
3846 
3847 /**
3848  * __printk_cpu_sync_wait() - Busy wait until the printk cpu-reentrant
3849  *                            spinning lock is not owned by any CPU.
3850  *
3851  * Context: Any context.
3852  */
3853 void __printk_cpu_sync_wait(void)
3854 {
3855     do {
3856         cpu_relax();
3857     } while (atomic_read(&printk_cpu_sync_owner) != -1);
3858 }
3859 EXPORT_SYMBOL(__printk_cpu_sync_wait);
3860 
3861 /**
3862  * __printk_cpu_sync_try_get() - Try to acquire the printk cpu-reentrant
3863  *                               spinning lock.
3864  *
3865  * If no processor has the lock, the calling processor takes the lock and
3866  * becomes the owner. If the calling processor is already the owner of the
3867  * lock, this function succeeds immediately.
3868  *
3869  * Context: Any context. Expects interrupts to be disabled.
3870  * Return: 1 on success, otherwise 0.
3871  */
3872 int __printk_cpu_sync_try_get(void)
3873 {
3874     int cpu;
3875     int old;
3876 
3877     cpu = smp_processor_id();
3878 
3879     /*
3880      * Guarantee loads and stores from this CPU when it is the lock owner
3881      * are _not_ visible to the previous lock owner. This pairs with
3882      * __printk_cpu_sync_put:B.
3883      *
3884      * Memory barrier involvement:
3885      *
3886      * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B,
3887      * then __printk_cpu_sync_put:A can never read from
3888      * __printk_cpu_sync_try_get:B.
3889      *
3890      * Relies on:
3891      *
3892      * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B
3893      * of the previous CPU
3894      *    matching
3895      * ACQUIRE from __printk_cpu_sync_try_get:A to
3896      * __printk_cpu_sync_try_get:B of this CPU
3897      */
3898     old = atomic_cmpxchg_acquire(&printk_cpu_sync_owner, -1,
3899                      cpu); /* LMM(__printk_cpu_sync_try_get:A) */
3900     if (old == -1) {
3901         /*
3902          * This CPU is now the owner and begins loading/storing
3903          * data: LMM(__printk_cpu_sync_try_get:B)
3904          */
3905         return 1;
3906 
3907     } else if (old == cpu) {
3908         /* This CPU is already the owner. */
3909         atomic_inc(&printk_cpu_sync_nested);
3910         return 1;
3911     }
3912 
3913     return 0;
3914 }
3915 EXPORT_SYMBOL(__printk_cpu_sync_try_get);
3916 
3917 /**
3918  * __printk_cpu_sync_put() - Release the printk cpu-reentrant spinning lock.
3919  *
3920  * The calling processor must be the owner of the lock.
3921  *
3922  * Context: Any context. Expects interrupts to be disabled.
3923  */
3924 void __printk_cpu_sync_put(void)
3925 {
3926     if (atomic_read(&printk_cpu_sync_nested)) {
3927         atomic_dec(&printk_cpu_sync_nested);
3928         return;
3929     }
3930 
3931     /*
3932      * This CPU is finished loading/storing data:
3933      * LMM(__printk_cpu_sync_put:A)
3934      */
3935 
3936     /*
3937      * Guarantee loads and stores from this CPU when it was the
3938      * lock owner are visible to the next lock owner. This pairs
3939      * with __printk_cpu_sync_try_get:A.
3940      *
3941      * Memory barrier involvement:
3942      *
3943      * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B,
3944      * then __printk_cpu_sync_try_get:B reads from __printk_cpu_sync_put:A.
3945      *
3946      * Relies on:
3947      *
3948      * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B
3949      * of this CPU
3950      *    matching
3951      * ACQUIRE from __printk_cpu_sync_try_get:A to
3952      * __printk_cpu_sync_try_get:B of the next CPU
3953      */
3954     atomic_set_release(&printk_cpu_sync_owner,
3955                -1); /* LMM(__printk_cpu_sync_put:B) */
3956 }
3957 EXPORT_SYMBOL(__printk_cpu_sync_put);
3958 #endif /* CONFIG_SMP */