Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * sysctl.c: General linux system control interface
0004  *
0005  * Begun 24 March 1995, Stephen Tweedie
0006  * Added /proc support, Dec 1995
0007  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
0008  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
0009  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
0010  * Dynamic registration fixes, Stephen Tweedie.
0011  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
0012  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
0013  *  Horn.
0014  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
0015  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
0016  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
0017  *  Wendling.
0018  * The list_for_each() macro wasn't appropriate for the sysctl loop.
0019  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
0020  */
0021 
0022 #include <linux/module.h>
0023 #include <linux/mm.h>
0024 #include <linux/swap.h>
0025 #include <linux/slab.h>
0026 #include <linux/sysctl.h>
0027 #include <linux/bitmap.h>
0028 #include <linux/signal.h>
0029 #include <linux/panic.h>
0030 #include <linux/printk.h>
0031 #include <linux/proc_fs.h>
0032 #include <linux/security.h>
0033 #include <linux/ctype.h>
0034 #include <linux/kmemleak.h>
0035 #include <linux/filter.h>
0036 #include <linux/fs.h>
0037 #include <linux/init.h>
0038 #include <linux/kernel.h>
0039 #include <linux/kobject.h>
0040 #include <linux/net.h>
0041 #include <linux/sysrq.h>
0042 #include <linux/highuid.h>
0043 #include <linux/writeback.h>
0044 #include <linux/ratelimit.h>
0045 #include <linux/compaction.h>
0046 #include <linux/hugetlb.h>
0047 #include <linux/initrd.h>
0048 #include <linux/key.h>
0049 #include <linux/times.h>
0050 #include <linux/limits.h>
0051 #include <linux/dcache.h>
0052 #include <linux/syscalls.h>
0053 #include <linux/vmstat.h>
0054 #include <linux/nfs_fs.h>
0055 #include <linux/acpi.h>
0056 #include <linux/reboot.h>
0057 #include <linux/ftrace.h>
0058 #include <linux/perf_event.h>
0059 #include <linux/oom.h>
0060 #include <linux/kmod.h>
0061 #include <linux/capability.h>
0062 #include <linux/binfmts.h>
0063 #include <linux/sched/sysctl.h>
0064 #include <linux/mount.h>
0065 #include <linux/userfaultfd_k.h>
0066 #include <linux/pid.h>
0067 
0068 #include "../lib/kstrtox.h"
0069 
0070 #include <linux/uaccess.h>
0071 #include <asm/processor.h>
0072 
0073 #ifdef CONFIG_X86
0074 #include <asm/nmi.h>
0075 #include <asm/stacktrace.h>
0076 #include <asm/io.h>
0077 #endif
0078 #ifdef CONFIG_SPARC
0079 #include <asm/setup.h>
0080 #endif
0081 #ifdef CONFIG_RT_MUTEXES
0082 #include <linux/rtmutex.h>
0083 #endif
0084 
0085 #if defined(CONFIG_SYSCTL)
0086 
0087 /* Constants used for minimum and  maximum */
0088 
0089 #ifdef CONFIG_PERF_EVENTS
0090 static const int six_hundred_forty_kb = 640 * 1024;
0091 #endif
0092 
0093 
0094 static const int ngroups_max = NGROUPS_MAX;
0095 static const int cap_last_cap = CAP_LAST_CAP;
0096 
0097 #ifdef CONFIG_PROC_SYSCTL
0098 
0099 /**
0100  * enum sysctl_writes_mode - supported sysctl write modes
0101  *
0102  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
0103  *  to be written, and multiple writes on the same sysctl file descriptor
0104  *  will rewrite the sysctl value, regardless of file position. No warning
0105  *  is issued when the initial position is not 0.
0106  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
0107  *  not 0.
0108  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
0109  *  file position 0 and the value must be fully contained in the buffer
0110  *  sent to the write syscall. If dealing with strings respect the file
0111  *  position, but restrict this to the max length of the buffer, anything
0112  *  passed the max length will be ignored. Multiple writes will append
0113  *  to the buffer.
0114  *
0115  * These write modes control how current file position affects the behavior of
0116  * updating sysctl values through the proc interface on each write.
0117  */
0118 enum sysctl_writes_mode {
0119     SYSCTL_WRITES_LEGACY        = -1,
0120     SYSCTL_WRITES_WARN      = 0,
0121     SYSCTL_WRITES_STRICT        = 1,
0122 };
0123 
0124 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
0125 #endif /* CONFIG_PROC_SYSCTL */
0126 
0127 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
0128     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
0129 int sysctl_legacy_va_layout;
0130 #endif
0131 
0132 #ifdef CONFIG_COMPACTION
0133 /* min_extfrag_threshold is SYSCTL_ZERO */;
0134 static const int max_extfrag_threshold = 1000;
0135 #endif
0136 
0137 #endif /* CONFIG_SYSCTL */
0138 
0139 /*
0140  * /proc/sys support
0141  */
0142 
0143 #ifdef CONFIG_PROC_SYSCTL
0144 
0145 static int _proc_do_string(char *data, int maxlen, int write,
0146         char *buffer, size_t *lenp, loff_t *ppos)
0147 {
0148     size_t len;
0149     char c, *p;
0150 
0151     if (!data || !maxlen || !*lenp) {
0152         *lenp = 0;
0153         return 0;
0154     }
0155 
0156     if (write) {
0157         if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
0158             /* Only continue writes not past the end of buffer. */
0159             len = strlen(data);
0160             if (len > maxlen - 1)
0161                 len = maxlen - 1;
0162 
0163             if (*ppos > len)
0164                 return 0;
0165             len = *ppos;
0166         } else {
0167             /* Start writing from beginning of buffer. */
0168             len = 0;
0169         }
0170 
0171         *ppos += *lenp;
0172         p = buffer;
0173         while ((p - buffer) < *lenp && len < maxlen - 1) {
0174             c = *(p++);
0175             if (c == 0 || c == '\n')
0176                 break;
0177             data[len++] = c;
0178         }
0179         data[len] = 0;
0180     } else {
0181         len = strlen(data);
0182         if (len > maxlen)
0183             len = maxlen;
0184 
0185         if (*ppos > len) {
0186             *lenp = 0;
0187             return 0;
0188         }
0189 
0190         data += *ppos;
0191         len  -= *ppos;
0192 
0193         if (len > *lenp)
0194             len = *lenp;
0195         if (len)
0196             memcpy(buffer, data, len);
0197         if (len < *lenp) {
0198             buffer[len] = '\n';
0199             len++;
0200         }
0201         *lenp = len;
0202         *ppos += len;
0203     }
0204     return 0;
0205 }
0206 
0207 static void warn_sysctl_write(struct ctl_table *table)
0208 {
0209     pr_warn_once("%s wrote to %s when file position was not 0!\n"
0210         "This will not be supported in the future. To silence this\n"
0211         "warning, set kernel.sysctl_writes_strict = -1\n",
0212         current->comm, table->procname);
0213 }
0214 
0215 /**
0216  * proc_first_pos_non_zero_ignore - check if first position is allowed
0217  * @ppos: file position
0218  * @table: the sysctl table
0219  *
0220  * Returns true if the first position is non-zero and the sysctl_writes_strict
0221  * mode indicates this is not allowed for numeric input types. String proc
0222  * handlers can ignore the return value.
0223  */
0224 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
0225                        struct ctl_table *table)
0226 {
0227     if (!*ppos)
0228         return false;
0229 
0230     switch (sysctl_writes_strict) {
0231     case SYSCTL_WRITES_STRICT:
0232         return true;
0233     case SYSCTL_WRITES_WARN:
0234         warn_sysctl_write(table);
0235         return false;
0236     default:
0237         return false;
0238     }
0239 }
0240 
0241 /**
0242  * proc_dostring - read a string sysctl
0243  * @table: the sysctl table
0244  * @write: %TRUE if this is a write to the sysctl file
0245  * @buffer: the user buffer
0246  * @lenp: the size of the user buffer
0247  * @ppos: file position
0248  *
0249  * Reads/writes a string from/to the user buffer. If the kernel
0250  * buffer provided is not large enough to hold the string, the
0251  * string is truncated. The copied string is %NULL-terminated.
0252  * If the string is being read by the user process, it is copied
0253  * and a newline '\n' is added. It is truncated if the buffer is
0254  * not large enough.
0255  *
0256  * Returns 0 on success.
0257  */
0258 int proc_dostring(struct ctl_table *table, int write,
0259           void *buffer, size_t *lenp, loff_t *ppos)
0260 {
0261     if (write)
0262         proc_first_pos_non_zero_ignore(ppos, table);
0263 
0264     return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
0265             ppos);
0266 }
0267 
0268 static size_t proc_skip_spaces(char **buf)
0269 {
0270     size_t ret;
0271     char *tmp = skip_spaces(*buf);
0272     ret = tmp - *buf;
0273     *buf = tmp;
0274     return ret;
0275 }
0276 
0277 static void proc_skip_char(char **buf, size_t *size, const char v)
0278 {
0279     while (*size) {
0280         if (**buf != v)
0281             break;
0282         (*size)--;
0283         (*buf)++;
0284     }
0285 }
0286 
0287 /**
0288  * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
0289  *                   fail on overflow
0290  *
0291  * @cp: kernel buffer containing the string to parse
0292  * @endp: pointer to store the trailing characters
0293  * @base: the base to use
0294  * @res: where the parsed integer will be stored
0295  *
0296  * In case of success 0 is returned and @res will contain the parsed integer,
0297  * @endp will hold any trailing characters.
0298  * This function will fail the parse on overflow. If there wasn't an overflow
0299  * the function will defer the decision what characters count as invalid to the
0300  * caller.
0301  */
0302 static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
0303                unsigned long *res)
0304 {
0305     unsigned long long result;
0306     unsigned int rv;
0307 
0308     cp = _parse_integer_fixup_radix(cp, &base);
0309     rv = _parse_integer(cp, base, &result);
0310     if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
0311         return -ERANGE;
0312 
0313     cp += rv;
0314 
0315     if (endp)
0316         *endp = (char *)cp;
0317 
0318     *res = (unsigned long)result;
0319     return 0;
0320 }
0321 
0322 #define TMPBUFLEN 22
0323 /**
0324  * proc_get_long - reads an ASCII formatted integer from a user buffer
0325  *
0326  * @buf: a kernel buffer
0327  * @size: size of the kernel buffer
0328  * @val: this is where the number will be stored
0329  * @neg: set to %TRUE if number is negative
0330  * @perm_tr: a vector which contains the allowed trailers
0331  * @perm_tr_len: size of the perm_tr vector
0332  * @tr: pointer to store the trailer character
0333  *
0334  * In case of success %0 is returned and @buf and @size are updated with
0335  * the amount of bytes read. If @tr is non-NULL and a trailing
0336  * character exists (size is non-zero after returning from this
0337  * function), @tr is updated with the trailing character.
0338  */
0339 static int proc_get_long(char **buf, size_t *size,
0340               unsigned long *val, bool *neg,
0341               const char *perm_tr, unsigned perm_tr_len, char *tr)
0342 {
0343     int len;
0344     char *p, tmp[TMPBUFLEN];
0345 
0346     if (!*size)
0347         return -EINVAL;
0348 
0349     len = *size;
0350     if (len > TMPBUFLEN - 1)
0351         len = TMPBUFLEN - 1;
0352 
0353     memcpy(tmp, *buf, len);
0354 
0355     tmp[len] = 0;
0356     p = tmp;
0357     if (*p == '-' && *size > 1) {
0358         *neg = true;
0359         p++;
0360     } else
0361         *neg = false;
0362     if (!isdigit(*p))
0363         return -EINVAL;
0364 
0365     if (strtoul_lenient(p, &p, 0, val))
0366         return -EINVAL;
0367 
0368     len = p - tmp;
0369 
0370     /* We don't know if the next char is whitespace thus we may accept
0371      * invalid integers (e.g. 1234...a) or two integers instead of one
0372      * (e.g. 123...1). So lets not allow such large numbers. */
0373     if (len == TMPBUFLEN - 1)
0374         return -EINVAL;
0375 
0376     if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
0377         return -EINVAL;
0378 
0379     if (tr && (len < *size))
0380         *tr = *p;
0381 
0382     *buf += len;
0383     *size -= len;
0384 
0385     return 0;
0386 }
0387 
0388 /**
0389  * proc_put_long - converts an integer to a decimal ASCII formatted string
0390  *
0391  * @buf: the user buffer
0392  * @size: the size of the user buffer
0393  * @val: the integer to be converted
0394  * @neg: sign of the number, %TRUE for negative
0395  *
0396  * In case of success @buf and @size are updated with the amount of bytes
0397  * written.
0398  */
0399 static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
0400 {
0401     int len;
0402     char tmp[TMPBUFLEN], *p = tmp;
0403 
0404     sprintf(p, "%s%lu", neg ? "-" : "", val);
0405     len = strlen(tmp);
0406     if (len > *size)
0407         len = *size;
0408     memcpy(*buf, tmp, len);
0409     *size -= len;
0410     *buf += len;
0411 }
0412 #undef TMPBUFLEN
0413 
0414 static void proc_put_char(void **buf, size_t *size, char c)
0415 {
0416     if (*size) {
0417         char **buffer = (char **)buf;
0418         **buffer = c;
0419 
0420         (*size)--;
0421         (*buffer)++;
0422         *buf = *buffer;
0423     }
0424 }
0425 
0426 static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp,
0427                 int *valp,
0428                 int write, void *data)
0429 {
0430     if (write) {
0431         *(bool *)valp = *lvalp;
0432     } else {
0433         int val = *(bool *)valp;
0434 
0435         *lvalp = (unsigned long)val;
0436         *negp = false;
0437     }
0438     return 0;
0439 }
0440 
0441 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
0442                  int *valp,
0443                  int write, void *data)
0444 {
0445     if (write) {
0446         if (*negp) {
0447             if (*lvalp > (unsigned long) INT_MAX + 1)
0448                 return -EINVAL;
0449             WRITE_ONCE(*valp, -*lvalp);
0450         } else {
0451             if (*lvalp > (unsigned long) INT_MAX)
0452                 return -EINVAL;
0453             WRITE_ONCE(*valp, *lvalp);
0454         }
0455     } else {
0456         int val = READ_ONCE(*valp);
0457         if (val < 0) {
0458             *negp = true;
0459             *lvalp = -(unsigned long)val;
0460         } else {
0461             *negp = false;
0462             *lvalp = (unsigned long)val;
0463         }
0464     }
0465     return 0;
0466 }
0467 
0468 static int do_proc_douintvec_conv(unsigned long *lvalp,
0469                   unsigned int *valp,
0470                   int write, void *data)
0471 {
0472     if (write) {
0473         if (*lvalp > UINT_MAX)
0474             return -EINVAL;
0475         WRITE_ONCE(*valp, *lvalp);
0476     } else {
0477         unsigned int val = READ_ONCE(*valp);
0478         *lvalp = (unsigned long)val;
0479     }
0480     return 0;
0481 }
0482 
0483 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
0484 
0485 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
0486           int write, void *buffer,
0487           size_t *lenp, loff_t *ppos,
0488           int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
0489                   int write, void *data),
0490           void *data)
0491 {
0492     int *i, vleft, first = 1, err = 0;
0493     size_t left;
0494     char *p;
0495 
0496     if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
0497         *lenp = 0;
0498         return 0;
0499     }
0500 
0501     i = (int *) tbl_data;
0502     vleft = table->maxlen / sizeof(*i);
0503     left = *lenp;
0504 
0505     if (!conv)
0506         conv = do_proc_dointvec_conv;
0507 
0508     if (write) {
0509         if (proc_first_pos_non_zero_ignore(ppos, table))
0510             goto out;
0511 
0512         if (left > PAGE_SIZE - 1)
0513             left = PAGE_SIZE - 1;
0514         p = buffer;
0515     }
0516 
0517     for (; left && vleft--; i++, first=0) {
0518         unsigned long lval;
0519         bool neg;
0520 
0521         if (write) {
0522             left -= proc_skip_spaces(&p);
0523 
0524             if (!left)
0525                 break;
0526             err = proc_get_long(&p, &left, &lval, &neg,
0527                          proc_wspace_sep,
0528                          sizeof(proc_wspace_sep), NULL);
0529             if (err)
0530                 break;
0531             if (conv(&neg, &lval, i, 1, data)) {
0532                 err = -EINVAL;
0533                 break;
0534             }
0535         } else {
0536             if (conv(&neg, &lval, i, 0, data)) {
0537                 err = -EINVAL;
0538                 break;
0539             }
0540             if (!first)
0541                 proc_put_char(&buffer, &left, '\t');
0542             proc_put_long(&buffer, &left, lval, neg);
0543         }
0544     }
0545 
0546     if (!write && !first && left && !err)
0547         proc_put_char(&buffer, &left, '\n');
0548     if (write && !err && left)
0549         left -= proc_skip_spaces(&p);
0550     if (write && first)
0551         return err ? : -EINVAL;
0552     *lenp -= left;
0553 out:
0554     *ppos += *lenp;
0555     return err;
0556 }
0557 
0558 static int do_proc_dointvec(struct ctl_table *table, int write,
0559           void *buffer, size_t *lenp, loff_t *ppos,
0560           int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
0561                   int write, void *data),
0562           void *data)
0563 {
0564     return __do_proc_dointvec(table->data, table, write,
0565             buffer, lenp, ppos, conv, data);
0566 }
0567 
0568 static int do_proc_douintvec_w(unsigned int *tbl_data,
0569                    struct ctl_table *table,
0570                    void *buffer,
0571                    size_t *lenp, loff_t *ppos,
0572                    int (*conv)(unsigned long *lvalp,
0573                        unsigned int *valp,
0574                        int write, void *data),
0575                    void *data)
0576 {
0577     unsigned long lval;
0578     int err = 0;
0579     size_t left;
0580     bool neg;
0581     char *p = buffer;
0582 
0583     left = *lenp;
0584 
0585     if (proc_first_pos_non_zero_ignore(ppos, table))
0586         goto bail_early;
0587 
0588     if (left > PAGE_SIZE - 1)
0589         left = PAGE_SIZE - 1;
0590 
0591     left -= proc_skip_spaces(&p);
0592     if (!left) {
0593         err = -EINVAL;
0594         goto out_free;
0595     }
0596 
0597     err = proc_get_long(&p, &left, &lval, &neg,
0598                  proc_wspace_sep,
0599                  sizeof(proc_wspace_sep), NULL);
0600     if (err || neg) {
0601         err = -EINVAL;
0602         goto out_free;
0603     }
0604 
0605     if (conv(&lval, tbl_data, 1, data)) {
0606         err = -EINVAL;
0607         goto out_free;
0608     }
0609 
0610     if (!err && left)
0611         left -= proc_skip_spaces(&p);
0612 
0613 out_free:
0614     if (err)
0615         return -EINVAL;
0616 
0617     return 0;
0618 
0619     /* This is in keeping with old __do_proc_dointvec() */
0620 bail_early:
0621     *ppos += *lenp;
0622     return err;
0623 }
0624 
0625 static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
0626                    size_t *lenp, loff_t *ppos,
0627                    int (*conv)(unsigned long *lvalp,
0628                        unsigned int *valp,
0629                        int write, void *data),
0630                    void *data)
0631 {
0632     unsigned long lval;
0633     int err = 0;
0634     size_t left;
0635 
0636     left = *lenp;
0637 
0638     if (conv(&lval, tbl_data, 0, data)) {
0639         err = -EINVAL;
0640         goto out;
0641     }
0642 
0643     proc_put_long(&buffer, &left, lval, false);
0644     if (!left)
0645         goto out;
0646 
0647     proc_put_char(&buffer, &left, '\n');
0648 
0649 out:
0650     *lenp -= left;
0651     *ppos += *lenp;
0652 
0653     return err;
0654 }
0655 
0656 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
0657                    int write, void *buffer,
0658                    size_t *lenp, loff_t *ppos,
0659                    int (*conv)(unsigned long *lvalp,
0660                        unsigned int *valp,
0661                        int write, void *data),
0662                    void *data)
0663 {
0664     unsigned int *i, vleft;
0665 
0666     if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
0667         *lenp = 0;
0668         return 0;
0669     }
0670 
0671     i = (unsigned int *) tbl_data;
0672     vleft = table->maxlen / sizeof(*i);
0673 
0674     /*
0675      * Arrays are not supported, keep this simple. *Do not* add
0676      * support for them.
0677      */
0678     if (vleft != 1) {
0679         *lenp = 0;
0680         return -EINVAL;
0681     }
0682 
0683     if (!conv)
0684         conv = do_proc_douintvec_conv;
0685 
0686     if (write)
0687         return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
0688                        conv, data);
0689     return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
0690 }
0691 
0692 int do_proc_douintvec(struct ctl_table *table, int write,
0693               void *buffer, size_t *lenp, loff_t *ppos,
0694               int (*conv)(unsigned long *lvalp,
0695                   unsigned int *valp,
0696                   int write, void *data),
0697               void *data)
0698 {
0699     return __do_proc_douintvec(table->data, table, write,
0700                    buffer, lenp, ppos, conv, data);
0701 }
0702 
0703 /**
0704  * proc_dobool - read/write a bool
0705  * @table: the sysctl table
0706  * @write: %TRUE if this is a write to the sysctl file
0707  * @buffer: the user buffer
0708  * @lenp: the size of the user buffer
0709  * @ppos: file position
0710  *
0711  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
0712  * values from/to the user buffer, treated as an ASCII string.
0713  *
0714  * Returns 0 on success.
0715  */
0716 int proc_dobool(struct ctl_table *table, int write, void *buffer,
0717         size_t *lenp, loff_t *ppos)
0718 {
0719     return do_proc_dointvec(table, write, buffer, lenp, ppos,
0720                 do_proc_dobool_conv, NULL);
0721 }
0722 
0723 /**
0724  * proc_dointvec - read a vector of integers
0725  * @table: the sysctl table
0726  * @write: %TRUE if this is a write to the sysctl file
0727  * @buffer: the user buffer
0728  * @lenp: the size of the user buffer
0729  * @ppos: file position
0730  *
0731  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
0732  * values from/to the user buffer, treated as an ASCII string.
0733  *
0734  * Returns 0 on success.
0735  */
0736 int proc_dointvec(struct ctl_table *table, int write, void *buffer,
0737           size_t *lenp, loff_t *ppos)
0738 {
0739     return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
0740 }
0741 
0742 #ifdef CONFIG_COMPACTION
0743 static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
0744         int write, void *buffer, size_t *lenp, loff_t *ppos)
0745 {
0746     int ret, old;
0747 
0748     if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
0749         return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
0750 
0751     old = *(int *)table->data;
0752     ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
0753     if (ret)
0754         return ret;
0755     if (old != *(int *)table->data)
0756         pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
0757                  table->procname, current->comm,
0758                  task_pid_nr(current));
0759     return ret;
0760 }
0761 #endif
0762 
0763 /**
0764  * proc_douintvec - read a vector of unsigned integers
0765  * @table: the sysctl table
0766  * @write: %TRUE if this is a write to the sysctl file
0767  * @buffer: the user buffer
0768  * @lenp: the size of the user buffer
0769  * @ppos: file position
0770  *
0771  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
0772  * values from/to the user buffer, treated as an ASCII string.
0773  *
0774  * Returns 0 on success.
0775  */
0776 int proc_douintvec(struct ctl_table *table, int write, void *buffer,
0777         size_t *lenp, loff_t *ppos)
0778 {
0779     return do_proc_douintvec(table, write, buffer, lenp, ppos,
0780                  do_proc_douintvec_conv, NULL);
0781 }
0782 
0783 /*
0784  * Taint values can only be increased
0785  * This means we can safely use a temporary.
0786  */
0787 static int proc_taint(struct ctl_table *table, int write,
0788                    void *buffer, size_t *lenp, loff_t *ppos)
0789 {
0790     struct ctl_table t;
0791     unsigned long tmptaint = get_taint();
0792     int err;
0793 
0794     if (write && !capable(CAP_SYS_ADMIN))
0795         return -EPERM;
0796 
0797     t = *table;
0798     t.data = &tmptaint;
0799     err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
0800     if (err < 0)
0801         return err;
0802 
0803     if (write) {
0804         int i;
0805 
0806         /*
0807          * If we are relying on panic_on_taint not producing
0808          * false positives due to userspace input, bail out
0809          * before setting the requested taint flags.
0810          */
0811         if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
0812             return -EINVAL;
0813 
0814         /*
0815          * Poor man's atomic or. Not worth adding a primitive
0816          * to everyone's atomic.h for this
0817          */
0818         for (i = 0; i < TAINT_FLAGS_COUNT; i++)
0819             if ((1UL << i) & tmptaint)
0820                 add_taint(i, LOCKDEP_STILL_OK);
0821     }
0822 
0823     return err;
0824 }
0825 
0826 /**
0827  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
0828  * @min: pointer to minimum allowable value
0829  * @max: pointer to maximum allowable value
0830  *
0831  * The do_proc_dointvec_minmax_conv_param structure provides the
0832  * minimum and maximum values for doing range checking for those sysctl
0833  * parameters that use the proc_dointvec_minmax() handler.
0834  */
0835 struct do_proc_dointvec_minmax_conv_param {
0836     int *min;
0837     int *max;
0838 };
0839 
0840 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
0841                     int *valp,
0842                     int write, void *data)
0843 {
0844     int tmp, ret;
0845     struct do_proc_dointvec_minmax_conv_param *param = data;
0846     /*
0847      * If writing, first do so via a temporary local int so we can
0848      * bounds-check it before touching *valp.
0849      */
0850     int *ip = write ? &tmp : valp;
0851 
0852     ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
0853     if (ret)
0854         return ret;
0855 
0856     if (write) {
0857         if ((param->min && *param->min > tmp) ||
0858             (param->max && *param->max < tmp))
0859             return -EINVAL;
0860         WRITE_ONCE(*valp, tmp);
0861     }
0862 
0863     return 0;
0864 }
0865 
0866 /**
0867  * proc_dointvec_minmax - read a vector of integers with min/max values
0868  * @table: the sysctl table
0869  * @write: %TRUE if this is a write to the sysctl file
0870  * @buffer: the user buffer
0871  * @lenp: the size of the user buffer
0872  * @ppos: file position
0873  *
0874  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
0875  * values from/to the user buffer, treated as an ASCII string.
0876  *
0877  * This routine will ensure the values are within the range specified by
0878  * table->extra1 (min) and table->extra2 (max).
0879  *
0880  * Returns 0 on success or -EINVAL on write when the range check fails.
0881  */
0882 int proc_dointvec_minmax(struct ctl_table *table, int write,
0883           void *buffer, size_t *lenp, loff_t *ppos)
0884 {
0885     struct do_proc_dointvec_minmax_conv_param param = {
0886         .min = (int *) table->extra1,
0887         .max = (int *) table->extra2,
0888     };
0889     return do_proc_dointvec(table, write, buffer, lenp, ppos,
0890                 do_proc_dointvec_minmax_conv, &param);
0891 }
0892 
0893 /**
0894  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
0895  * @min: pointer to minimum allowable value
0896  * @max: pointer to maximum allowable value
0897  *
0898  * The do_proc_douintvec_minmax_conv_param structure provides the
0899  * minimum and maximum values for doing range checking for those sysctl
0900  * parameters that use the proc_douintvec_minmax() handler.
0901  */
0902 struct do_proc_douintvec_minmax_conv_param {
0903     unsigned int *min;
0904     unsigned int *max;
0905 };
0906 
0907 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
0908                      unsigned int *valp,
0909                      int write, void *data)
0910 {
0911     int ret;
0912     unsigned int tmp;
0913     struct do_proc_douintvec_minmax_conv_param *param = data;
0914     /* write via temporary local uint for bounds-checking */
0915     unsigned int *up = write ? &tmp : valp;
0916 
0917     ret = do_proc_douintvec_conv(lvalp, up, write, data);
0918     if (ret)
0919         return ret;
0920 
0921     if (write) {
0922         if ((param->min && *param->min > tmp) ||
0923             (param->max && *param->max < tmp))
0924             return -ERANGE;
0925 
0926         WRITE_ONCE(*valp, tmp);
0927     }
0928 
0929     return 0;
0930 }
0931 
0932 /**
0933  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
0934  * @table: the sysctl table
0935  * @write: %TRUE if this is a write to the sysctl file
0936  * @buffer: the user buffer
0937  * @lenp: the size of the user buffer
0938  * @ppos: file position
0939  *
0940  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
0941  * values from/to the user buffer, treated as an ASCII string. Negative
0942  * strings are not allowed.
0943  *
0944  * This routine will ensure the values are within the range specified by
0945  * table->extra1 (min) and table->extra2 (max). There is a final sanity
0946  * check for UINT_MAX to avoid having to support wrap around uses from
0947  * userspace.
0948  *
0949  * Returns 0 on success or -ERANGE on write when the range check fails.
0950  */
0951 int proc_douintvec_minmax(struct ctl_table *table, int write,
0952               void *buffer, size_t *lenp, loff_t *ppos)
0953 {
0954     struct do_proc_douintvec_minmax_conv_param param = {
0955         .min = (unsigned int *) table->extra1,
0956         .max = (unsigned int *) table->extra2,
0957     };
0958     return do_proc_douintvec(table, write, buffer, lenp, ppos,
0959                  do_proc_douintvec_minmax_conv, &param);
0960 }
0961 
0962 /**
0963  * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
0964  * @table: the sysctl table
0965  * @write: %TRUE if this is a write to the sysctl file
0966  * @buffer: the user buffer
0967  * @lenp: the size of the user buffer
0968  * @ppos: file position
0969  *
0970  * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
0971  * values from/to the user buffer, treated as an ASCII string. Negative
0972  * strings are not allowed.
0973  *
0974  * This routine will ensure the values are within the range specified by
0975  * table->extra1 (min) and table->extra2 (max).
0976  *
0977  * Returns 0 on success or an error on write when the range check fails.
0978  */
0979 int proc_dou8vec_minmax(struct ctl_table *table, int write,
0980             void *buffer, size_t *lenp, loff_t *ppos)
0981 {
0982     struct ctl_table tmp;
0983     unsigned int min = 0, max = 255U, val;
0984     u8 *data = table->data;
0985     struct do_proc_douintvec_minmax_conv_param param = {
0986         .min = &min,
0987         .max = &max,
0988     };
0989     int res;
0990 
0991     /* Do not support arrays yet. */
0992     if (table->maxlen != sizeof(u8))
0993         return -EINVAL;
0994 
0995     if (table->extra1) {
0996         min = *(unsigned int *) table->extra1;
0997         if (min > 255U)
0998             return -EINVAL;
0999     }
1000     if (table->extra2) {
1001         max = *(unsigned int *) table->extra2;
1002         if (max > 255U)
1003             return -EINVAL;
1004     }
1005 
1006     tmp = *table;
1007 
1008     tmp.maxlen = sizeof(val);
1009     tmp.data = &val;
1010     val = READ_ONCE(*data);
1011     res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
1012                 do_proc_douintvec_minmax_conv, &param);
1013     if (res)
1014         return res;
1015     if (write)
1016         WRITE_ONCE(*data, val);
1017     return 0;
1018 }
1019 EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
1020 
1021 #ifdef CONFIG_MAGIC_SYSRQ
1022 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
1023                 void *buffer, size_t *lenp, loff_t *ppos)
1024 {
1025     int tmp, ret;
1026 
1027     tmp = sysrq_mask();
1028 
1029     ret = __do_proc_dointvec(&tmp, table, write, buffer,
1030                    lenp, ppos, NULL, NULL);
1031     if (ret || !write)
1032         return ret;
1033 
1034     if (write)
1035         sysrq_toggle_support(tmp);
1036 
1037     return 0;
1038 }
1039 #endif
1040 
1041 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
1042         int write, void *buffer, size_t *lenp, loff_t *ppos,
1043         unsigned long convmul, unsigned long convdiv)
1044 {
1045     unsigned long *i, *min, *max;
1046     int vleft, first = 1, err = 0;
1047     size_t left;
1048     char *p;
1049 
1050     if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1051         *lenp = 0;
1052         return 0;
1053     }
1054 
1055     i = (unsigned long *) data;
1056     min = (unsigned long *) table->extra1;
1057     max = (unsigned long *) table->extra2;
1058     vleft = table->maxlen / sizeof(unsigned long);
1059     left = *lenp;
1060 
1061     if (write) {
1062         if (proc_first_pos_non_zero_ignore(ppos, table))
1063             goto out;
1064 
1065         if (left > PAGE_SIZE - 1)
1066             left = PAGE_SIZE - 1;
1067         p = buffer;
1068     }
1069 
1070     for (; left && vleft--; i++, first = 0) {
1071         unsigned long val;
1072 
1073         if (write) {
1074             bool neg;
1075 
1076             left -= proc_skip_spaces(&p);
1077             if (!left)
1078                 break;
1079 
1080             err = proc_get_long(&p, &left, &val, &neg,
1081                          proc_wspace_sep,
1082                          sizeof(proc_wspace_sep), NULL);
1083             if (err || neg) {
1084                 err = -EINVAL;
1085                 break;
1086             }
1087 
1088             val = convmul * val / convdiv;
1089             if ((min && val < *min) || (max && val > *max)) {
1090                 err = -EINVAL;
1091                 break;
1092             }
1093             WRITE_ONCE(*i, val);
1094         } else {
1095             val = convdiv * READ_ONCE(*i) / convmul;
1096             if (!first)
1097                 proc_put_char(&buffer, &left, '\t');
1098             proc_put_long(&buffer, &left, val, false);
1099         }
1100     }
1101 
1102     if (!write && !first && left && !err)
1103         proc_put_char(&buffer, &left, '\n');
1104     if (write && !err)
1105         left -= proc_skip_spaces(&p);
1106     if (write && first)
1107         return err ? : -EINVAL;
1108     *lenp -= left;
1109 out:
1110     *ppos += *lenp;
1111     return err;
1112 }
1113 
1114 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
1115         void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
1116         unsigned long convdiv)
1117 {
1118     return __do_proc_doulongvec_minmax(table->data, table, write,
1119             buffer, lenp, ppos, convmul, convdiv);
1120 }
1121 
1122 /**
1123  * proc_doulongvec_minmax - read a vector of long integers with min/max values
1124  * @table: the sysctl table
1125  * @write: %TRUE if this is a write to the sysctl file
1126  * @buffer: the user buffer
1127  * @lenp: the size of the user buffer
1128  * @ppos: file position
1129  *
1130  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1131  * values from/to the user buffer, treated as an ASCII string.
1132  *
1133  * This routine will ensure the values are within the range specified by
1134  * table->extra1 (min) and table->extra2 (max).
1135  *
1136  * Returns 0 on success.
1137  */
1138 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1139                void *buffer, size_t *lenp, loff_t *ppos)
1140 {
1141     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1142 }
1143 
1144 /**
1145  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1146  * @table: the sysctl table
1147  * @write: %TRUE if this is a write to the sysctl file
1148  * @buffer: the user buffer
1149  * @lenp: the size of the user buffer
1150  * @ppos: file position
1151  *
1152  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1153  * values from/to the user buffer, treated as an ASCII string. The values
1154  * are treated as milliseconds, and converted to jiffies when they are stored.
1155  *
1156  * This routine will ensure the values are within the range specified by
1157  * table->extra1 (min) and table->extra2 (max).
1158  *
1159  * Returns 0 on success.
1160  */
1161 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1162                       void *buffer, size_t *lenp, loff_t *ppos)
1163 {
1164     return do_proc_doulongvec_minmax(table, write, buffer,
1165                      lenp, ppos, HZ, 1000l);
1166 }
1167 
1168 
1169 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1170                      int *valp,
1171                      int write, void *data)
1172 {
1173     if (write) {
1174         if (*lvalp > INT_MAX / HZ)
1175             return 1;
1176         if (*negp)
1177             WRITE_ONCE(*valp, -*lvalp * HZ);
1178         else
1179             WRITE_ONCE(*valp, *lvalp * HZ);
1180     } else {
1181         int val = READ_ONCE(*valp);
1182         unsigned long lval;
1183         if (val < 0) {
1184             *negp = true;
1185             lval = -(unsigned long)val;
1186         } else {
1187             *negp = false;
1188             lval = (unsigned long)val;
1189         }
1190         *lvalp = lval / HZ;
1191     }
1192     return 0;
1193 }
1194 
1195 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1196                         int *valp,
1197                         int write, void *data)
1198 {
1199     if (write) {
1200         if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1201             return 1;
1202         *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1203     } else {
1204         int val = *valp;
1205         unsigned long lval;
1206         if (val < 0) {
1207             *negp = true;
1208             lval = -(unsigned long)val;
1209         } else {
1210             *negp = false;
1211             lval = (unsigned long)val;
1212         }
1213         *lvalp = jiffies_to_clock_t(lval);
1214     }
1215     return 0;
1216 }
1217 
1218 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1219                         int *valp,
1220                         int write, void *data)
1221 {
1222     if (write) {
1223         unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1224 
1225         if (jif > INT_MAX)
1226             return 1;
1227         WRITE_ONCE(*valp, (int)jif);
1228     } else {
1229         int val = READ_ONCE(*valp);
1230         unsigned long lval;
1231         if (val < 0) {
1232             *negp = true;
1233             lval = -(unsigned long)val;
1234         } else {
1235             *negp = false;
1236             lval = (unsigned long)val;
1237         }
1238         *lvalp = jiffies_to_msecs(lval);
1239     }
1240     return 0;
1241 }
1242 
1243 static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lvalp,
1244                         int *valp, int write, void *data)
1245 {
1246     int tmp, ret;
1247     struct do_proc_dointvec_minmax_conv_param *param = data;
1248     /*
1249      * If writing, first do so via a temporary local int so we can
1250      * bounds-check it before touching *valp.
1251      */
1252     int *ip = write ? &tmp : valp;
1253 
1254     ret = do_proc_dointvec_ms_jiffies_conv(negp, lvalp, ip, write, data);
1255     if (ret)
1256         return ret;
1257 
1258     if (write) {
1259         if ((param->min && *param->min > tmp) ||
1260                 (param->max && *param->max < tmp))
1261             return -EINVAL;
1262         *valp = tmp;
1263     }
1264     return 0;
1265 }
1266 
1267 /**
1268  * proc_dointvec_jiffies - read a vector of integers as seconds
1269  * @table: the sysctl table
1270  * @write: %TRUE if this is a write to the sysctl file
1271  * @buffer: the user buffer
1272  * @lenp: the size of the user buffer
1273  * @ppos: file position
1274  *
1275  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1276  * values from/to the user buffer, treated as an ASCII string.
1277  * The values read are assumed to be in seconds, and are converted into
1278  * jiffies.
1279  *
1280  * Returns 0 on success.
1281  */
1282 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1283               void *buffer, size_t *lenp, loff_t *ppos)
1284 {
1285     return do_proc_dointvec(table,write,buffer,lenp,ppos,
1286                     do_proc_dointvec_jiffies_conv,NULL);
1287 }
1288 
1289 int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1290               void *buffer, size_t *lenp, loff_t *ppos)
1291 {
1292     struct do_proc_dointvec_minmax_conv_param param = {
1293         .min = (int *) table->extra1,
1294         .max = (int *) table->extra2,
1295     };
1296     return do_proc_dointvec(table, write, buffer, lenp, ppos,
1297             do_proc_dointvec_ms_jiffies_minmax_conv, &param);
1298 }
1299 
1300 /**
1301  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
1302  * @table: the sysctl table
1303  * @write: %TRUE if this is a write to the sysctl file
1304  * @buffer: the user buffer
1305  * @lenp: the size of the user buffer
1306  * @ppos: pointer to the file position
1307  *
1308  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1309  * values from/to the user buffer, treated as an ASCII string.
1310  * The values read are assumed to be in 1/USER_HZ seconds, and
1311  * are converted into jiffies.
1312  *
1313  * Returns 0 on success.
1314  */
1315 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1316                  void *buffer, size_t *lenp, loff_t *ppos)
1317 {
1318     return do_proc_dointvec(table, write, buffer, lenp, ppos,
1319                 do_proc_dointvec_userhz_jiffies_conv, NULL);
1320 }
1321 
1322 /**
1323  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
1324  * @table: the sysctl table
1325  * @write: %TRUE if this is a write to the sysctl file
1326  * @buffer: the user buffer
1327  * @lenp: the size of the user buffer
1328  * @ppos: file position
1329  * @ppos: the current position in the file
1330  *
1331  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1332  * values from/to the user buffer, treated as an ASCII string.
1333  * The values read are assumed to be in 1/1000 seconds, and
1334  * are converted into jiffies.
1335  *
1336  * Returns 0 on success.
1337  */
1338 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
1339         size_t *lenp, loff_t *ppos)
1340 {
1341     return do_proc_dointvec(table, write, buffer, lenp, ppos,
1342                 do_proc_dointvec_ms_jiffies_conv, NULL);
1343 }
1344 
1345 static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
1346         size_t *lenp, loff_t *ppos)
1347 {
1348     struct pid *new_pid;
1349     pid_t tmp;
1350     int r;
1351 
1352     tmp = pid_vnr(cad_pid);
1353 
1354     r = __do_proc_dointvec(&tmp, table, write, buffer,
1355                    lenp, ppos, NULL, NULL);
1356     if (r || !write)
1357         return r;
1358 
1359     new_pid = find_get_pid(tmp);
1360     if (!new_pid)
1361         return -ESRCH;
1362 
1363     put_pid(xchg(&cad_pid, new_pid));
1364     return 0;
1365 }
1366 
1367 /**
1368  * proc_do_large_bitmap - read/write from/to a large bitmap
1369  * @table: the sysctl table
1370  * @write: %TRUE if this is a write to the sysctl file
1371  * @buffer: the user buffer
1372  * @lenp: the size of the user buffer
1373  * @ppos: file position
1374  *
1375  * The bitmap is stored at table->data and the bitmap length (in bits)
1376  * in table->maxlen.
1377  *
1378  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
1379  * large bitmaps may be represented in a compact manner. Writing into
1380  * the file will clear the bitmap then update it with the given input.
1381  *
1382  * Returns 0 on success.
1383  */
1384 int proc_do_large_bitmap(struct ctl_table *table, int write,
1385              void *buffer, size_t *lenp, loff_t *ppos)
1386 {
1387     int err = 0;
1388     size_t left = *lenp;
1389     unsigned long bitmap_len = table->maxlen;
1390     unsigned long *bitmap = *(unsigned long **) table->data;
1391     unsigned long *tmp_bitmap = NULL;
1392     char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
1393 
1394     if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
1395         *lenp = 0;
1396         return 0;
1397     }
1398 
1399     if (write) {
1400         char *p = buffer;
1401         size_t skipped = 0;
1402 
1403         if (left > PAGE_SIZE - 1) {
1404             left = PAGE_SIZE - 1;
1405             /* How much of the buffer we'll skip this pass */
1406             skipped = *lenp - left;
1407         }
1408 
1409         tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
1410         if (!tmp_bitmap)
1411             return -ENOMEM;
1412         proc_skip_char(&p, &left, '\n');
1413         while (!err && left) {
1414             unsigned long val_a, val_b;
1415             bool neg;
1416             size_t saved_left;
1417 
1418             /* In case we stop parsing mid-number, we can reset */
1419             saved_left = left;
1420             err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
1421                          sizeof(tr_a), &c);
1422             /*
1423              * If we consumed the entirety of a truncated buffer or
1424              * only one char is left (may be a "-"), then stop here,
1425              * reset, & come back for more.
1426              */
1427             if ((left <= 1) && skipped) {
1428                 left = saved_left;
1429                 break;
1430             }
1431 
1432             if (err)
1433                 break;
1434             if (val_a >= bitmap_len || neg) {
1435                 err = -EINVAL;
1436                 break;
1437             }
1438 
1439             val_b = val_a;
1440             if (left) {
1441                 p++;
1442                 left--;
1443             }
1444 
1445             if (c == '-') {
1446                 err = proc_get_long(&p, &left, &val_b,
1447                              &neg, tr_b, sizeof(tr_b),
1448                              &c);
1449                 /*
1450                  * If we consumed all of a truncated buffer or
1451                  * then stop here, reset, & come back for more.
1452                  */
1453                 if (!left && skipped) {
1454                     left = saved_left;
1455                     break;
1456                 }
1457 
1458                 if (err)
1459                     break;
1460                 if (val_b >= bitmap_len || neg ||
1461                     val_a > val_b) {
1462                     err = -EINVAL;
1463                     break;
1464                 }
1465                 if (left) {
1466                     p++;
1467                     left--;
1468                 }
1469             }
1470 
1471             bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
1472             proc_skip_char(&p, &left, '\n');
1473         }
1474         left += skipped;
1475     } else {
1476         unsigned long bit_a, bit_b = 0;
1477         bool first = 1;
1478 
1479         while (left) {
1480             bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
1481             if (bit_a >= bitmap_len)
1482                 break;
1483             bit_b = find_next_zero_bit(bitmap, bitmap_len,
1484                            bit_a + 1) - 1;
1485 
1486             if (!first)
1487                 proc_put_char(&buffer, &left, ',');
1488             proc_put_long(&buffer, &left, bit_a, false);
1489             if (bit_a != bit_b) {
1490                 proc_put_char(&buffer, &left, '-');
1491                 proc_put_long(&buffer, &left, bit_b, false);
1492             }
1493 
1494             first = 0; bit_b++;
1495         }
1496         proc_put_char(&buffer, &left, '\n');
1497     }
1498 
1499     if (!err) {
1500         if (write) {
1501             if (*ppos)
1502                 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
1503             else
1504                 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
1505         }
1506         *lenp -= left;
1507         *ppos += *lenp;
1508     }
1509 
1510     bitmap_free(tmp_bitmap);
1511     return err;
1512 }
1513 
1514 #else /* CONFIG_PROC_SYSCTL */
1515 
1516 int proc_dostring(struct ctl_table *table, int write,
1517           void *buffer, size_t *lenp, loff_t *ppos)
1518 {
1519     return -ENOSYS;
1520 }
1521 
1522 int proc_dobool(struct ctl_table *table, int write,
1523         void *buffer, size_t *lenp, loff_t *ppos)
1524 {
1525     return -ENOSYS;
1526 }
1527 
1528 int proc_dointvec(struct ctl_table *table, int write,
1529           void *buffer, size_t *lenp, loff_t *ppos)
1530 {
1531     return -ENOSYS;
1532 }
1533 
1534 int proc_douintvec(struct ctl_table *table, int write,
1535           void *buffer, size_t *lenp, loff_t *ppos)
1536 {
1537     return -ENOSYS;
1538 }
1539 
1540 int proc_dointvec_minmax(struct ctl_table *table, int write,
1541             void *buffer, size_t *lenp, loff_t *ppos)
1542 {
1543     return -ENOSYS;
1544 }
1545 
1546 int proc_douintvec_minmax(struct ctl_table *table, int write,
1547               void *buffer, size_t *lenp, loff_t *ppos)
1548 {
1549     return -ENOSYS;
1550 }
1551 
1552 int proc_dou8vec_minmax(struct ctl_table *table, int write,
1553             void *buffer, size_t *lenp, loff_t *ppos)
1554 {
1555     return -ENOSYS;
1556 }
1557 
1558 int proc_dointvec_jiffies(struct ctl_table *table, int write,
1559             void *buffer, size_t *lenp, loff_t *ppos)
1560 {
1561     return -ENOSYS;
1562 }
1563 
1564 int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1565                     void *buffer, size_t *lenp, loff_t *ppos)
1566 {
1567     return -ENOSYS;
1568 }
1569 
1570 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1571             void *buffer, size_t *lenp, loff_t *ppos)
1572 {
1573     return -ENOSYS;
1574 }
1575 
1576 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
1577                  void *buffer, size_t *lenp, loff_t *ppos)
1578 {
1579     return -ENOSYS;
1580 }
1581 
1582 int proc_doulongvec_minmax(struct ctl_table *table, int write,
1583             void *buffer, size_t *lenp, loff_t *ppos)
1584 {
1585     return -ENOSYS;
1586 }
1587 
1588 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1589                       void *buffer, size_t *lenp, loff_t *ppos)
1590 {
1591     return -ENOSYS;
1592 }
1593 
1594 int proc_do_large_bitmap(struct ctl_table *table, int write,
1595              void *buffer, size_t *lenp, loff_t *ppos)
1596 {
1597     return -ENOSYS;
1598 }
1599 
1600 #endif /* CONFIG_PROC_SYSCTL */
1601 
1602 #if defined(CONFIG_SYSCTL)
1603 int proc_do_static_key(struct ctl_table *table, int write,
1604                void *buffer, size_t *lenp, loff_t *ppos)
1605 {
1606     struct static_key *key = (struct static_key *)table->data;
1607     static DEFINE_MUTEX(static_key_mutex);
1608     int val, ret;
1609     struct ctl_table tmp = {
1610         .data   = &val,
1611         .maxlen = sizeof(val),
1612         .mode   = table->mode,
1613         .extra1 = SYSCTL_ZERO,
1614         .extra2 = SYSCTL_ONE,
1615     };
1616 
1617     if (write && !capable(CAP_SYS_ADMIN))
1618         return -EPERM;
1619 
1620     mutex_lock(&static_key_mutex);
1621     val = static_key_enabled(key);
1622     ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1623     if (write && !ret) {
1624         if (val)
1625             static_key_enable(key);
1626         else
1627             static_key_disable(key);
1628     }
1629     mutex_unlock(&static_key_mutex);
1630     return ret;
1631 }
1632 
1633 static struct ctl_table kern_table[] = {
1634 #ifdef CONFIG_NUMA_BALANCING
1635     {
1636         .procname   = "numa_balancing",
1637         .data       = NULL, /* filled in by handler */
1638         .maxlen     = sizeof(unsigned int),
1639         .mode       = 0644,
1640         .proc_handler   = sysctl_numa_balancing,
1641         .extra1     = SYSCTL_ZERO,
1642         .extra2     = SYSCTL_FOUR,
1643     },
1644 #endif /* CONFIG_NUMA_BALANCING */
1645     {
1646         .procname   = "panic",
1647         .data       = &panic_timeout,
1648         .maxlen     = sizeof(int),
1649         .mode       = 0644,
1650         .proc_handler   = proc_dointvec,
1651     },
1652 #ifdef CONFIG_PROC_SYSCTL
1653     {
1654         .procname   = "tainted",
1655         .maxlen     = sizeof(long),
1656         .mode       = 0644,
1657         .proc_handler   = proc_taint,
1658     },
1659     {
1660         .procname   = "sysctl_writes_strict",
1661         .data       = &sysctl_writes_strict,
1662         .maxlen     = sizeof(int),
1663         .mode       = 0644,
1664         .proc_handler   = proc_dointvec_minmax,
1665         .extra1     = SYSCTL_NEG_ONE,
1666         .extra2     = SYSCTL_ONE,
1667     },
1668 #endif
1669     {
1670         .procname   = "print-fatal-signals",
1671         .data       = &print_fatal_signals,
1672         .maxlen     = sizeof(int),
1673         .mode       = 0644,
1674         .proc_handler   = proc_dointvec,
1675     },
1676 #ifdef CONFIG_SPARC
1677     {
1678         .procname   = "reboot-cmd",
1679         .data       = reboot_command,
1680         .maxlen     = 256,
1681         .mode       = 0644,
1682         .proc_handler   = proc_dostring,
1683     },
1684     {
1685         .procname   = "stop-a",
1686         .data       = &stop_a_enabled,
1687         .maxlen     = sizeof (int),
1688         .mode       = 0644,
1689         .proc_handler   = proc_dointvec,
1690     },
1691     {
1692         .procname   = "scons-poweroff",
1693         .data       = &scons_pwroff,
1694         .maxlen     = sizeof (int),
1695         .mode       = 0644,
1696         .proc_handler   = proc_dointvec,
1697     },
1698 #endif
1699 #ifdef CONFIG_SPARC64
1700     {
1701         .procname   = "tsb-ratio",
1702         .data       = &sysctl_tsb_ratio,
1703         .maxlen     = sizeof (int),
1704         .mode       = 0644,
1705         .proc_handler   = proc_dointvec,
1706     },
1707 #endif
1708 #ifdef CONFIG_PARISC
1709     {
1710         .procname   = "soft-power",
1711         .data       = &pwrsw_enabled,
1712         .maxlen     = sizeof (int),
1713         .mode       = 0644,
1714         .proc_handler   = proc_dointvec,
1715     },
1716 #endif
1717 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
1718     {
1719         .procname   = "unaligned-trap",
1720         .data       = &unaligned_enabled,
1721         .maxlen     = sizeof (int),
1722         .mode       = 0644,
1723         .proc_handler   = proc_dointvec,
1724     },
1725 #endif
1726 #ifdef CONFIG_STACK_TRACER
1727     {
1728         .procname   = "stack_tracer_enabled",
1729         .data       = &stack_tracer_enabled,
1730         .maxlen     = sizeof(int),
1731         .mode       = 0644,
1732         .proc_handler   = stack_trace_sysctl,
1733     },
1734 #endif
1735 #ifdef CONFIG_TRACING
1736     {
1737         .procname   = "ftrace_dump_on_oops",
1738         .data       = &ftrace_dump_on_oops,
1739         .maxlen     = sizeof(int),
1740         .mode       = 0644,
1741         .proc_handler   = proc_dointvec,
1742     },
1743     {
1744         .procname   = "traceoff_on_warning",
1745         .data       = &__disable_trace_on_warning,
1746         .maxlen     = sizeof(__disable_trace_on_warning),
1747         .mode       = 0644,
1748         .proc_handler   = proc_dointvec,
1749     },
1750     {
1751         .procname   = "tracepoint_printk",
1752         .data       = &tracepoint_printk,
1753         .maxlen     = sizeof(tracepoint_printk),
1754         .mode       = 0644,
1755         .proc_handler   = tracepoint_printk_sysctl,
1756     },
1757 #endif
1758 #ifdef CONFIG_MODULES
1759     {
1760         .procname   = "modprobe",
1761         .data       = &modprobe_path,
1762         .maxlen     = KMOD_PATH_LEN,
1763         .mode       = 0644,
1764         .proc_handler   = proc_dostring,
1765     },
1766     {
1767         .procname   = "modules_disabled",
1768         .data       = &modules_disabled,
1769         .maxlen     = sizeof(int),
1770         .mode       = 0644,
1771         /* only handle a transition from default "0" to "1" */
1772         .proc_handler   = proc_dointvec_minmax,
1773         .extra1     = SYSCTL_ONE,
1774         .extra2     = SYSCTL_ONE,
1775     },
1776 #endif
1777 #ifdef CONFIG_UEVENT_HELPER
1778     {
1779         .procname   = "hotplug",
1780         .data       = &uevent_helper,
1781         .maxlen     = UEVENT_HELPER_PATH_LEN,
1782         .mode       = 0644,
1783         .proc_handler   = proc_dostring,
1784     },
1785 #endif
1786 #ifdef CONFIG_MAGIC_SYSRQ
1787     {
1788         .procname   = "sysrq",
1789         .data       = NULL,
1790         .maxlen     = sizeof (int),
1791         .mode       = 0644,
1792         .proc_handler   = sysrq_sysctl_handler,
1793     },
1794 #endif
1795 #ifdef CONFIG_PROC_SYSCTL
1796     {
1797         .procname   = "cad_pid",
1798         .data       = NULL,
1799         .maxlen     = sizeof (int),
1800         .mode       = 0600,
1801         .proc_handler   = proc_do_cad_pid,
1802     },
1803 #endif
1804     {
1805         .procname   = "threads-max",
1806         .data       = NULL,
1807         .maxlen     = sizeof(int),
1808         .mode       = 0644,
1809         .proc_handler   = sysctl_max_threads,
1810     },
1811     {
1812         .procname   = "usermodehelper",
1813         .mode       = 0555,
1814         .child      = usermodehelper_table,
1815     },
1816     {
1817         .procname   = "overflowuid",
1818         .data       = &overflowuid,
1819         .maxlen     = sizeof(int),
1820         .mode       = 0644,
1821         .proc_handler   = proc_dointvec_minmax,
1822         .extra1     = SYSCTL_ZERO,
1823         .extra2     = SYSCTL_MAXOLDUID,
1824     },
1825     {
1826         .procname   = "overflowgid",
1827         .data       = &overflowgid,
1828         .maxlen     = sizeof(int),
1829         .mode       = 0644,
1830         .proc_handler   = proc_dointvec_minmax,
1831         .extra1     = SYSCTL_ZERO,
1832         .extra2     = SYSCTL_MAXOLDUID,
1833     },
1834 #ifdef CONFIG_S390
1835     {
1836         .procname   = "userprocess_debug",
1837         .data       = &show_unhandled_signals,
1838         .maxlen     = sizeof(int),
1839         .mode       = 0644,
1840         .proc_handler   = proc_dointvec,
1841     },
1842 #endif
1843     {
1844         .procname   = "pid_max",
1845         .data       = &pid_max,
1846         .maxlen     = sizeof (int),
1847         .mode       = 0644,
1848         .proc_handler   = proc_dointvec_minmax,
1849         .extra1     = &pid_max_min,
1850         .extra2     = &pid_max_max,
1851     },
1852     {
1853         .procname   = "panic_on_oops",
1854         .data       = &panic_on_oops,
1855         .maxlen     = sizeof(int),
1856         .mode       = 0644,
1857         .proc_handler   = proc_dointvec,
1858     },
1859     {
1860         .procname   = "panic_print",
1861         .data       = &panic_print,
1862         .maxlen     = sizeof(unsigned long),
1863         .mode       = 0644,
1864         .proc_handler   = proc_doulongvec_minmax,
1865     },
1866     {
1867         .procname   = "ngroups_max",
1868         .data       = (void *)&ngroups_max,
1869         .maxlen     = sizeof (int),
1870         .mode       = 0444,
1871         .proc_handler   = proc_dointvec,
1872     },
1873     {
1874         .procname   = "cap_last_cap",
1875         .data       = (void *)&cap_last_cap,
1876         .maxlen     = sizeof(int),
1877         .mode       = 0444,
1878         .proc_handler   = proc_dointvec,
1879     },
1880 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
1881     {
1882         .procname       = "unknown_nmi_panic",
1883         .data           = &unknown_nmi_panic,
1884         .maxlen         = sizeof (int),
1885         .mode           = 0644,
1886         .proc_handler   = proc_dointvec,
1887     },
1888 #endif
1889 
1890 #if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
1891     defined(CONFIG_DEBUG_STACKOVERFLOW)
1892     {
1893         .procname   = "panic_on_stackoverflow",
1894         .data       = &sysctl_panic_on_stackoverflow,
1895         .maxlen     = sizeof(int),
1896         .mode       = 0644,
1897         .proc_handler   = proc_dointvec,
1898     },
1899 #endif
1900 #if defined(CONFIG_X86)
1901     {
1902         .procname   = "panic_on_unrecovered_nmi",
1903         .data       = &panic_on_unrecovered_nmi,
1904         .maxlen     = sizeof(int),
1905         .mode       = 0644,
1906         .proc_handler   = proc_dointvec,
1907     },
1908     {
1909         .procname   = "panic_on_io_nmi",
1910         .data       = &panic_on_io_nmi,
1911         .maxlen     = sizeof(int),
1912         .mode       = 0644,
1913         .proc_handler   = proc_dointvec,
1914     },
1915     {
1916         .procname   = "bootloader_type",
1917         .data       = &bootloader_type,
1918         .maxlen     = sizeof (int),
1919         .mode       = 0444,
1920         .proc_handler   = proc_dointvec,
1921     },
1922     {
1923         .procname   = "bootloader_version",
1924         .data       = &bootloader_version,
1925         .maxlen     = sizeof (int),
1926         .mode       = 0444,
1927         .proc_handler   = proc_dointvec,
1928     },
1929     {
1930         .procname   = "io_delay_type",
1931         .data       = &io_delay_type,
1932         .maxlen     = sizeof(int),
1933         .mode       = 0644,
1934         .proc_handler   = proc_dointvec,
1935     },
1936 #endif
1937 #if defined(CONFIG_MMU)
1938     {
1939         .procname   = "randomize_va_space",
1940         .data       = &randomize_va_space,
1941         .maxlen     = sizeof(int),
1942         .mode       = 0644,
1943         .proc_handler   = proc_dointvec,
1944     },
1945 #endif
1946 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1947     {
1948         .procname   = "spin_retry",
1949         .data       = &spin_retry,
1950         .maxlen     = sizeof (int),
1951         .mode       = 0644,
1952         .proc_handler   = proc_dointvec,
1953     },
1954 #endif
1955 #if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1956     {
1957         .procname   = "acpi_video_flags",
1958         .data       = &acpi_realmode_flags,
1959         .maxlen     = sizeof (unsigned long),
1960         .mode       = 0644,
1961         .proc_handler   = proc_doulongvec_minmax,
1962     },
1963 #endif
1964 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1965     {
1966         .procname   = "ignore-unaligned-usertrap",
1967         .data       = &no_unaligned_warning,
1968         .maxlen     = sizeof (int),
1969         .mode       = 0644,
1970         .proc_handler   = proc_dointvec,
1971     },
1972 #endif
1973 #ifdef CONFIG_IA64
1974     {
1975         .procname   = "unaligned-dump-stack",
1976         .data       = &unaligned_dump_stack,
1977         .maxlen     = sizeof (int),
1978         .mode       = 0644,
1979         .proc_handler   = proc_dointvec,
1980     },
1981 #endif
1982 #ifdef CONFIG_RT_MUTEXES
1983     {
1984         .procname   = "max_lock_depth",
1985         .data       = &max_lock_depth,
1986         .maxlen     = sizeof(int),
1987         .mode       = 0644,
1988         .proc_handler   = proc_dointvec,
1989     },
1990 #endif
1991 #ifdef CONFIG_KEYS
1992     {
1993         .procname   = "keys",
1994         .mode       = 0555,
1995         .child      = key_sysctls,
1996     },
1997 #endif
1998 #ifdef CONFIG_PERF_EVENTS
1999     /*
2000      * User-space scripts rely on the existence of this file
2001      * as a feature check for perf_events being enabled.
2002      *
2003      * So it's an ABI, do not remove!
2004      */
2005     {
2006         .procname   = "perf_event_paranoid",
2007         .data       = &sysctl_perf_event_paranoid,
2008         .maxlen     = sizeof(sysctl_perf_event_paranoid),
2009         .mode       = 0644,
2010         .proc_handler   = proc_dointvec,
2011     },
2012     {
2013         .procname   = "perf_event_mlock_kb",
2014         .data       = &sysctl_perf_event_mlock,
2015         .maxlen     = sizeof(sysctl_perf_event_mlock),
2016         .mode       = 0644,
2017         .proc_handler   = proc_dointvec,
2018     },
2019     {
2020         .procname   = "perf_event_max_sample_rate",
2021         .data       = &sysctl_perf_event_sample_rate,
2022         .maxlen     = sizeof(sysctl_perf_event_sample_rate),
2023         .mode       = 0644,
2024         .proc_handler   = perf_proc_update_handler,
2025         .extra1     = SYSCTL_ONE,
2026     },
2027     {
2028         .procname   = "perf_cpu_time_max_percent",
2029         .data       = &sysctl_perf_cpu_time_max_percent,
2030         .maxlen     = sizeof(sysctl_perf_cpu_time_max_percent),
2031         .mode       = 0644,
2032         .proc_handler   = perf_cpu_time_max_percent_handler,
2033         .extra1     = SYSCTL_ZERO,
2034         .extra2     = SYSCTL_ONE_HUNDRED,
2035     },
2036     {
2037         .procname   = "perf_event_max_stack",
2038         .data       = &sysctl_perf_event_max_stack,
2039         .maxlen     = sizeof(sysctl_perf_event_max_stack),
2040         .mode       = 0644,
2041         .proc_handler   = perf_event_max_stack_handler,
2042         .extra1     = SYSCTL_ZERO,
2043         .extra2     = (void *)&six_hundred_forty_kb,
2044     },
2045     {
2046         .procname   = "perf_event_max_contexts_per_stack",
2047         .data       = &sysctl_perf_event_max_contexts_per_stack,
2048         .maxlen     = sizeof(sysctl_perf_event_max_contexts_per_stack),
2049         .mode       = 0644,
2050         .proc_handler   = perf_event_max_stack_handler,
2051         .extra1     = SYSCTL_ZERO,
2052         .extra2     = SYSCTL_ONE_THOUSAND,
2053     },
2054 #endif
2055     {
2056         .procname   = "panic_on_warn",
2057         .data       = &panic_on_warn,
2058         .maxlen     = sizeof(int),
2059         .mode       = 0644,
2060         .proc_handler   = proc_dointvec_minmax,
2061         .extra1     = SYSCTL_ZERO,
2062         .extra2     = SYSCTL_ONE,
2063     },
2064 #ifdef CONFIG_TREE_RCU
2065     {
2066         .procname   = "panic_on_rcu_stall",
2067         .data       = &sysctl_panic_on_rcu_stall,
2068         .maxlen     = sizeof(sysctl_panic_on_rcu_stall),
2069         .mode       = 0644,
2070         .proc_handler   = proc_dointvec_minmax,
2071         .extra1     = SYSCTL_ZERO,
2072         .extra2     = SYSCTL_ONE,
2073     },
2074     {
2075         .procname   = "max_rcu_stall_to_panic",
2076         .data       = &sysctl_max_rcu_stall_to_panic,
2077         .maxlen     = sizeof(sysctl_max_rcu_stall_to_panic),
2078         .mode       = 0644,
2079         .proc_handler   = proc_dointvec_minmax,
2080         .extra1     = SYSCTL_ONE,
2081         .extra2     = SYSCTL_INT_MAX,
2082     },
2083 #endif
2084     { }
2085 };
2086 
2087 static struct ctl_table vm_table[] = {
2088     {
2089         .procname   = "overcommit_memory",
2090         .data       = &sysctl_overcommit_memory,
2091         .maxlen     = sizeof(sysctl_overcommit_memory),
2092         .mode       = 0644,
2093         .proc_handler   = overcommit_policy_handler,
2094         .extra1     = SYSCTL_ZERO,
2095         .extra2     = SYSCTL_TWO,
2096     },
2097     {
2098         .procname   = "overcommit_ratio",
2099         .data       = &sysctl_overcommit_ratio,
2100         .maxlen     = sizeof(sysctl_overcommit_ratio),
2101         .mode       = 0644,
2102         .proc_handler   = overcommit_ratio_handler,
2103     },
2104     {
2105         .procname   = "overcommit_kbytes",
2106         .data       = &sysctl_overcommit_kbytes,
2107         .maxlen     = sizeof(sysctl_overcommit_kbytes),
2108         .mode       = 0644,
2109         .proc_handler   = overcommit_kbytes_handler,
2110     },
2111     {
2112         .procname   = "page-cluster",
2113         .data       = &page_cluster,
2114         .maxlen     = sizeof(int),
2115         .mode       = 0644,
2116         .proc_handler   = proc_dointvec_minmax,
2117         .extra1     = SYSCTL_ZERO,
2118     },
2119     {
2120         .procname   = "dirtytime_expire_seconds",
2121         .data       = &dirtytime_expire_interval,
2122         .maxlen     = sizeof(dirtytime_expire_interval),
2123         .mode       = 0644,
2124         .proc_handler   = dirtytime_interval_handler,
2125         .extra1     = SYSCTL_ZERO,
2126     },
2127     {
2128         .procname   = "swappiness",
2129         .data       = &vm_swappiness,
2130         .maxlen     = sizeof(vm_swappiness),
2131         .mode       = 0644,
2132         .proc_handler   = proc_dointvec_minmax,
2133         .extra1     = SYSCTL_ZERO,
2134         .extra2     = SYSCTL_TWO_HUNDRED,
2135     },
2136 #ifdef CONFIG_NUMA
2137     {
2138         .procname   = "numa_stat",
2139         .data       = &sysctl_vm_numa_stat,
2140         .maxlen     = sizeof(int),
2141         .mode       = 0644,
2142         .proc_handler   = sysctl_vm_numa_stat_handler,
2143         .extra1     = SYSCTL_ZERO,
2144         .extra2     = SYSCTL_ONE,
2145     },
2146 #endif
2147 #ifdef CONFIG_HUGETLB_PAGE
2148     {
2149         .procname   = "nr_hugepages",
2150         .data       = NULL,
2151         .maxlen     = sizeof(unsigned long),
2152         .mode       = 0644,
2153         .proc_handler   = hugetlb_sysctl_handler,
2154     },
2155 #ifdef CONFIG_NUMA
2156     {
2157         .procname       = "nr_hugepages_mempolicy",
2158         .data           = NULL,
2159         .maxlen         = sizeof(unsigned long),
2160         .mode           = 0644,
2161         .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
2162     },
2163 #endif
2164      {
2165         .procname   = "hugetlb_shm_group",
2166         .data       = &sysctl_hugetlb_shm_group,
2167         .maxlen     = sizeof(gid_t),
2168         .mode       = 0644,
2169         .proc_handler   = proc_dointvec,
2170      },
2171     {
2172         .procname   = "nr_overcommit_hugepages",
2173         .data       = NULL,
2174         .maxlen     = sizeof(unsigned long),
2175         .mode       = 0644,
2176         .proc_handler   = hugetlb_overcommit_handler,
2177     },
2178 #endif
2179     {
2180         .procname   = "lowmem_reserve_ratio",
2181         .data       = &sysctl_lowmem_reserve_ratio,
2182         .maxlen     = sizeof(sysctl_lowmem_reserve_ratio),
2183         .mode       = 0644,
2184         .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
2185     },
2186     {
2187         .procname   = "drop_caches",
2188         .data       = &sysctl_drop_caches,
2189         .maxlen     = sizeof(int),
2190         .mode       = 0200,
2191         .proc_handler   = drop_caches_sysctl_handler,
2192         .extra1     = SYSCTL_ONE,
2193         .extra2     = SYSCTL_FOUR,
2194     },
2195 #ifdef CONFIG_COMPACTION
2196     {
2197         .procname   = "compact_memory",
2198         .data       = NULL,
2199         .maxlen     = sizeof(int),
2200         .mode       = 0200,
2201         .proc_handler   = sysctl_compaction_handler,
2202     },
2203     {
2204         .procname   = "compaction_proactiveness",
2205         .data       = &sysctl_compaction_proactiveness,
2206         .maxlen     = sizeof(sysctl_compaction_proactiveness),
2207         .mode       = 0644,
2208         .proc_handler   = compaction_proactiveness_sysctl_handler,
2209         .extra1     = SYSCTL_ZERO,
2210         .extra2     = SYSCTL_ONE_HUNDRED,
2211     },
2212     {
2213         .procname   = "extfrag_threshold",
2214         .data       = &sysctl_extfrag_threshold,
2215         .maxlen     = sizeof(int),
2216         .mode       = 0644,
2217         .proc_handler   = proc_dointvec_minmax,
2218         .extra1     = SYSCTL_ZERO,
2219         .extra2     = (void *)&max_extfrag_threshold,
2220     },
2221     {
2222         .procname   = "compact_unevictable_allowed",
2223         .data       = &sysctl_compact_unevictable_allowed,
2224         .maxlen     = sizeof(int),
2225         .mode       = 0644,
2226         .proc_handler   = proc_dointvec_minmax_warn_RT_change,
2227         .extra1     = SYSCTL_ZERO,
2228         .extra2     = SYSCTL_ONE,
2229     },
2230 
2231 #endif /* CONFIG_COMPACTION */
2232     {
2233         .procname   = "min_free_kbytes",
2234         .data       = &min_free_kbytes,
2235         .maxlen     = sizeof(min_free_kbytes),
2236         .mode       = 0644,
2237         .proc_handler   = min_free_kbytes_sysctl_handler,
2238         .extra1     = SYSCTL_ZERO,
2239     },
2240     {
2241         .procname   = "watermark_boost_factor",
2242         .data       = &watermark_boost_factor,
2243         .maxlen     = sizeof(watermark_boost_factor),
2244         .mode       = 0644,
2245         .proc_handler   = proc_dointvec_minmax,
2246         .extra1     = SYSCTL_ZERO,
2247     },
2248     {
2249         .procname   = "watermark_scale_factor",
2250         .data       = &watermark_scale_factor,
2251         .maxlen     = sizeof(watermark_scale_factor),
2252         .mode       = 0644,
2253         .proc_handler   = watermark_scale_factor_sysctl_handler,
2254         .extra1     = SYSCTL_ONE,
2255         .extra2     = SYSCTL_THREE_THOUSAND,
2256     },
2257     {
2258         .procname   = "percpu_pagelist_high_fraction",
2259         .data       = &percpu_pagelist_high_fraction,
2260         .maxlen     = sizeof(percpu_pagelist_high_fraction),
2261         .mode       = 0644,
2262         .proc_handler   = percpu_pagelist_high_fraction_sysctl_handler,
2263         .extra1     = SYSCTL_ZERO,
2264     },
2265     {
2266         .procname   = "page_lock_unfairness",
2267         .data       = &sysctl_page_lock_unfairness,
2268         .maxlen     = sizeof(sysctl_page_lock_unfairness),
2269         .mode       = 0644,
2270         .proc_handler   = proc_dointvec_minmax,
2271         .extra1     = SYSCTL_ZERO,
2272     },
2273 #ifdef CONFIG_MMU
2274     {
2275         .procname   = "max_map_count",
2276         .data       = &sysctl_max_map_count,
2277         .maxlen     = sizeof(sysctl_max_map_count),
2278         .mode       = 0644,
2279         .proc_handler   = proc_dointvec_minmax,
2280         .extra1     = SYSCTL_ZERO,
2281     },
2282 #else
2283     {
2284         .procname   = "nr_trim_pages",
2285         .data       = &sysctl_nr_trim_pages,
2286         .maxlen     = sizeof(sysctl_nr_trim_pages),
2287         .mode       = 0644,
2288         .proc_handler   = proc_dointvec_minmax,
2289         .extra1     = SYSCTL_ZERO,
2290     },
2291 #endif
2292     {
2293         .procname   = "vfs_cache_pressure",
2294         .data       = &sysctl_vfs_cache_pressure,
2295         .maxlen     = sizeof(sysctl_vfs_cache_pressure),
2296         .mode       = 0644,
2297         .proc_handler   = proc_dointvec_minmax,
2298         .extra1     = SYSCTL_ZERO,
2299     },
2300 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
2301     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
2302     {
2303         .procname   = "legacy_va_layout",
2304         .data       = &sysctl_legacy_va_layout,
2305         .maxlen     = sizeof(sysctl_legacy_va_layout),
2306         .mode       = 0644,
2307         .proc_handler   = proc_dointvec_minmax,
2308         .extra1     = SYSCTL_ZERO,
2309     },
2310 #endif
2311 #ifdef CONFIG_NUMA
2312     {
2313         .procname   = "zone_reclaim_mode",
2314         .data       = &node_reclaim_mode,
2315         .maxlen     = sizeof(node_reclaim_mode),
2316         .mode       = 0644,
2317         .proc_handler   = proc_dointvec_minmax,
2318         .extra1     = SYSCTL_ZERO,
2319     },
2320     {
2321         .procname   = "min_unmapped_ratio",
2322         .data       = &sysctl_min_unmapped_ratio,
2323         .maxlen     = sizeof(sysctl_min_unmapped_ratio),
2324         .mode       = 0644,
2325         .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
2326         .extra1     = SYSCTL_ZERO,
2327         .extra2     = SYSCTL_ONE_HUNDRED,
2328     },
2329     {
2330         .procname   = "min_slab_ratio",
2331         .data       = &sysctl_min_slab_ratio,
2332         .maxlen     = sizeof(sysctl_min_slab_ratio),
2333         .mode       = 0644,
2334         .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
2335         .extra1     = SYSCTL_ZERO,
2336         .extra2     = SYSCTL_ONE_HUNDRED,
2337     },
2338 #endif
2339 #ifdef CONFIG_SMP
2340     {
2341         .procname   = "stat_interval",
2342         .data       = &sysctl_stat_interval,
2343         .maxlen     = sizeof(sysctl_stat_interval),
2344         .mode       = 0644,
2345         .proc_handler   = proc_dointvec_jiffies,
2346     },
2347     {
2348         .procname   = "stat_refresh",
2349         .data       = NULL,
2350         .maxlen     = 0,
2351         .mode       = 0600,
2352         .proc_handler   = vmstat_refresh,
2353     },
2354 #endif
2355 #ifdef CONFIG_MMU
2356     {
2357         .procname   = "mmap_min_addr",
2358         .data       = &dac_mmap_min_addr,
2359         .maxlen     = sizeof(unsigned long),
2360         .mode       = 0644,
2361         .proc_handler   = mmap_min_addr_handler,
2362     },
2363 #endif
2364 #ifdef CONFIG_NUMA
2365     {
2366         .procname   = "numa_zonelist_order",
2367         .data       = &numa_zonelist_order,
2368         .maxlen     = NUMA_ZONELIST_ORDER_LEN,
2369         .mode       = 0644,
2370         .proc_handler   = numa_zonelist_order_handler,
2371     },
2372 #endif
2373 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
2374    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
2375     {
2376         .procname   = "vdso_enabled",
2377 #ifdef CONFIG_X86_32
2378         .data       = &vdso32_enabled,
2379         .maxlen     = sizeof(vdso32_enabled),
2380 #else
2381         .data       = &vdso_enabled,
2382         .maxlen     = sizeof(vdso_enabled),
2383 #endif
2384         .mode       = 0644,
2385         .proc_handler   = proc_dointvec,
2386         .extra1     = SYSCTL_ZERO,
2387     },
2388 #endif
2389 #ifdef CONFIG_MEMORY_FAILURE
2390     {
2391         .procname   = "memory_failure_early_kill",
2392         .data       = &sysctl_memory_failure_early_kill,
2393         .maxlen     = sizeof(sysctl_memory_failure_early_kill),
2394         .mode       = 0644,
2395         .proc_handler   = proc_dointvec_minmax,
2396         .extra1     = SYSCTL_ZERO,
2397         .extra2     = SYSCTL_ONE,
2398     },
2399     {
2400         .procname   = "memory_failure_recovery",
2401         .data       = &sysctl_memory_failure_recovery,
2402         .maxlen     = sizeof(sysctl_memory_failure_recovery),
2403         .mode       = 0644,
2404         .proc_handler   = proc_dointvec_minmax,
2405         .extra1     = SYSCTL_ZERO,
2406         .extra2     = SYSCTL_ONE,
2407     },
2408 #endif
2409     {
2410         .procname   = "user_reserve_kbytes",
2411         .data       = &sysctl_user_reserve_kbytes,
2412         .maxlen     = sizeof(sysctl_user_reserve_kbytes),
2413         .mode       = 0644,
2414         .proc_handler   = proc_doulongvec_minmax,
2415     },
2416     {
2417         .procname   = "admin_reserve_kbytes",
2418         .data       = &sysctl_admin_reserve_kbytes,
2419         .maxlen     = sizeof(sysctl_admin_reserve_kbytes),
2420         .mode       = 0644,
2421         .proc_handler   = proc_doulongvec_minmax,
2422     },
2423 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
2424     {
2425         .procname   = "mmap_rnd_bits",
2426         .data       = &mmap_rnd_bits,
2427         .maxlen     = sizeof(mmap_rnd_bits),
2428         .mode       = 0600,
2429         .proc_handler   = proc_dointvec_minmax,
2430         .extra1     = (void *)&mmap_rnd_bits_min,
2431         .extra2     = (void *)&mmap_rnd_bits_max,
2432     },
2433 #endif
2434 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
2435     {
2436         .procname   = "mmap_rnd_compat_bits",
2437         .data       = &mmap_rnd_compat_bits,
2438         .maxlen     = sizeof(mmap_rnd_compat_bits),
2439         .mode       = 0600,
2440         .proc_handler   = proc_dointvec_minmax,
2441         .extra1     = (void *)&mmap_rnd_compat_bits_min,
2442         .extra2     = (void *)&mmap_rnd_compat_bits_max,
2443     },
2444 #endif
2445 #ifdef CONFIG_USERFAULTFD
2446     {
2447         .procname   = "unprivileged_userfaultfd",
2448         .data       = &sysctl_unprivileged_userfaultfd,
2449         .maxlen     = sizeof(sysctl_unprivileged_userfaultfd),
2450         .mode       = 0644,
2451         .proc_handler   = proc_dointvec_minmax,
2452         .extra1     = SYSCTL_ZERO,
2453         .extra2     = SYSCTL_ONE,
2454     },
2455 #endif
2456     { }
2457 };
2458 
2459 static struct ctl_table debug_table[] = {
2460 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
2461     {
2462         .procname   = "exception-trace",
2463         .data       = &show_unhandled_signals,
2464         .maxlen     = sizeof(int),
2465         .mode       = 0644,
2466         .proc_handler   = proc_dointvec
2467     },
2468 #endif
2469     { }
2470 };
2471 
2472 static struct ctl_table dev_table[] = {
2473     { }
2474 };
2475 
2476 DECLARE_SYSCTL_BASE(kernel, kern_table);
2477 DECLARE_SYSCTL_BASE(vm, vm_table);
2478 DECLARE_SYSCTL_BASE(debug, debug_table);
2479 DECLARE_SYSCTL_BASE(dev, dev_table);
2480 
2481 int __init sysctl_init_bases(void)
2482 {
2483     register_sysctl_base(kernel);
2484     register_sysctl_base(vm);
2485     register_sysctl_base(debug);
2486     register_sysctl_base(dev);
2487 
2488     return 0;
2489 }
2490 #endif /* CONFIG_SYSCTL */
2491 /*
2492  * No sense putting this after each symbol definition, twice,
2493  * exception granted :-)
2494  */
2495 EXPORT_SYMBOL(proc_dobool);
2496 EXPORT_SYMBOL(proc_dointvec);
2497 EXPORT_SYMBOL(proc_douintvec);
2498 EXPORT_SYMBOL(proc_dointvec_jiffies);
2499 EXPORT_SYMBOL(proc_dointvec_minmax);
2500 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
2501 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2502 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2503 EXPORT_SYMBOL(proc_dostring);
2504 EXPORT_SYMBOL(proc_doulongvec_minmax);
2505 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2506 EXPORT_SYMBOL(proc_do_large_bitmap);