Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 /* -*- linux-c -*-
0003  * sysctl_net_core.c: sysctl interface to net core subsystem.
0004  *
0005  * Begun April 1, 1996, Mike Shaver.
0006  * Added /proc/sys/net/core directory entry (empty =) ). [MS]
0007  */
0008 
0009 #include <linux/filter.h>
0010 #include <linux/mm.h>
0011 #include <linux/sysctl.h>
0012 #include <linux/module.h>
0013 #include <linux/socket.h>
0014 #include <linux/netdevice.h>
0015 #include <linux/ratelimit.h>
0016 #include <linux/vmalloc.h>
0017 #include <linux/init.h>
0018 #include <linux/slab.h>
0019 
0020 #include <net/ip.h>
0021 #include <net/sock.h>
0022 #include <net/net_ratelimit.h>
0023 #include <net/busy_poll.h>
0024 #include <net/pkt_sched.h>
0025 
0026 #include "dev.h"
0027 
0028 static int int_3600 = 3600;
0029 static int min_sndbuf = SOCK_MIN_SNDBUF;
0030 static int min_rcvbuf = SOCK_MIN_RCVBUF;
0031 static int max_skb_frags = MAX_SKB_FRAGS;
0032 static long long_max __maybe_unused = LONG_MAX;
0033 
0034 static int net_msg_warn;    /* Unused, but still a sysctl */
0035 
0036 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
0037 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
0038 
0039 /* 0 - Keep current behavior:
0040  *     IPv4: inherit all current settings from init_net
0041  *     IPv6: reset all settings to default
0042  * 1 - Both inherit all current settings from init_net
0043  * 2 - Both reset all settings to default
0044  * 3 - Both inherit all settings from current netns
0045  */
0046 int sysctl_devconf_inherit_init_net __read_mostly;
0047 EXPORT_SYMBOL(sysctl_devconf_inherit_init_net);
0048 
0049 #ifdef CONFIG_RPS
0050 static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
0051                 void *buffer, size_t *lenp, loff_t *ppos)
0052 {
0053     unsigned int orig_size, size;
0054     int ret, i;
0055     struct ctl_table tmp = {
0056         .data = &size,
0057         .maxlen = sizeof(size),
0058         .mode = table->mode
0059     };
0060     struct rps_sock_flow_table *orig_sock_table, *sock_table;
0061     static DEFINE_MUTEX(sock_flow_mutex);
0062 
0063     mutex_lock(&sock_flow_mutex);
0064 
0065     orig_sock_table = rcu_dereference_protected(rps_sock_flow_table,
0066                     lockdep_is_held(&sock_flow_mutex));
0067     size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
0068 
0069     ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
0070 
0071     if (write) {
0072         if (size) {
0073             if (size > 1<<29) {
0074                 /* Enforce limit to prevent overflow */
0075                 mutex_unlock(&sock_flow_mutex);
0076                 return -EINVAL;
0077             }
0078             size = roundup_pow_of_two(size);
0079             if (size != orig_size) {
0080                 sock_table =
0081                     vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
0082                 if (!sock_table) {
0083                     mutex_unlock(&sock_flow_mutex);
0084                     return -ENOMEM;
0085                 }
0086                 rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1;
0087                 sock_table->mask = size - 1;
0088             } else
0089                 sock_table = orig_sock_table;
0090 
0091             for (i = 0; i < size; i++)
0092                 sock_table->ents[i] = RPS_NO_CPU;
0093         } else
0094             sock_table = NULL;
0095 
0096         if (sock_table != orig_sock_table) {
0097             rcu_assign_pointer(rps_sock_flow_table, sock_table);
0098             if (sock_table) {
0099                 static_branch_inc(&rps_needed);
0100                 static_branch_inc(&rfs_needed);
0101             }
0102             if (orig_sock_table) {
0103                 static_branch_dec(&rps_needed);
0104                 static_branch_dec(&rfs_needed);
0105                 kvfree_rcu(orig_sock_table);
0106             }
0107         }
0108     }
0109 
0110     mutex_unlock(&sock_flow_mutex);
0111 
0112     return ret;
0113 }
0114 #endif /* CONFIG_RPS */
0115 
0116 #ifdef CONFIG_NET_FLOW_LIMIT
0117 static DEFINE_MUTEX(flow_limit_update_mutex);
0118 
0119 static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
0120                  void *buffer, size_t *lenp, loff_t *ppos)
0121 {
0122     struct sd_flow_limit *cur;
0123     struct softnet_data *sd;
0124     cpumask_var_t mask;
0125     int i, len, ret = 0;
0126 
0127     if (!alloc_cpumask_var(&mask, GFP_KERNEL))
0128         return -ENOMEM;
0129 
0130     if (write) {
0131         ret = cpumask_parse(buffer, mask);
0132         if (ret)
0133             goto done;
0134 
0135         mutex_lock(&flow_limit_update_mutex);
0136         len = sizeof(*cur) + netdev_flow_limit_table_len;
0137         for_each_possible_cpu(i) {
0138             sd = &per_cpu(softnet_data, i);
0139             cur = rcu_dereference_protected(sd->flow_limit,
0140                      lockdep_is_held(&flow_limit_update_mutex));
0141             if (cur && !cpumask_test_cpu(i, mask)) {
0142                 RCU_INIT_POINTER(sd->flow_limit, NULL);
0143                 kfree_rcu(cur);
0144             } else if (!cur && cpumask_test_cpu(i, mask)) {
0145                 cur = kzalloc_node(len, GFP_KERNEL,
0146                            cpu_to_node(i));
0147                 if (!cur) {
0148                     /* not unwinding previous changes */
0149                     ret = -ENOMEM;
0150                     goto write_unlock;
0151                 }
0152                 cur->num_buckets = netdev_flow_limit_table_len;
0153                 rcu_assign_pointer(sd->flow_limit, cur);
0154             }
0155         }
0156 write_unlock:
0157         mutex_unlock(&flow_limit_update_mutex);
0158     } else {
0159         char kbuf[128];
0160 
0161         if (*ppos || !*lenp) {
0162             *lenp = 0;
0163             goto done;
0164         }
0165 
0166         cpumask_clear(mask);
0167         rcu_read_lock();
0168         for_each_possible_cpu(i) {
0169             sd = &per_cpu(softnet_data, i);
0170             if (rcu_dereference(sd->flow_limit))
0171                 cpumask_set_cpu(i, mask);
0172         }
0173         rcu_read_unlock();
0174 
0175         len = min(sizeof(kbuf) - 1, *lenp);
0176         len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask));
0177         if (!len) {
0178             *lenp = 0;
0179             goto done;
0180         }
0181         if (len < *lenp)
0182             kbuf[len++] = '\n';
0183         memcpy(buffer, kbuf, len);
0184         *lenp = len;
0185         *ppos += len;
0186     }
0187 
0188 done:
0189     free_cpumask_var(mask);
0190     return ret;
0191 }
0192 
0193 static int flow_limit_table_len_sysctl(struct ctl_table *table, int write,
0194                        void *buffer, size_t *lenp, loff_t *ppos)
0195 {
0196     unsigned int old, *ptr;
0197     int ret;
0198 
0199     mutex_lock(&flow_limit_update_mutex);
0200 
0201     ptr = table->data;
0202     old = *ptr;
0203     ret = proc_dointvec(table, write, buffer, lenp, ppos);
0204     if (!ret && write && !is_power_of_2(*ptr)) {
0205         *ptr = old;
0206         ret = -EINVAL;
0207     }
0208 
0209     mutex_unlock(&flow_limit_update_mutex);
0210     return ret;
0211 }
0212 #endif /* CONFIG_NET_FLOW_LIMIT */
0213 
0214 #ifdef CONFIG_NET_SCHED
0215 static int set_default_qdisc(struct ctl_table *table, int write,
0216                  void *buffer, size_t *lenp, loff_t *ppos)
0217 {
0218     char id[IFNAMSIZ];
0219     struct ctl_table tbl = {
0220         .data = id,
0221         .maxlen = IFNAMSIZ,
0222     };
0223     int ret;
0224 
0225     qdisc_get_default(id, IFNAMSIZ);
0226 
0227     ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
0228     if (write && ret == 0)
0229         ret = qdisc_set_default(id);
0230     return ret;
0231 }
0232 #endif
0233 
0234 static int proc_do_dev_weight(struct ctl_table *table, int write,
0235                void *buffer, size_t *lenp, loff_t *ppos)
0236 {
0237     static DEFINE_MUTEX(dev_weight_mutex);
0238     int ret, weight;
0239 
0240     mutex_lock(&dev_weight_mutex);
0241     ret = proc_dointvec(table, write, buffer, lenp, ppos);
0242     if (!ret && write) {
0243         weight = READ_ONCE(weight_p);
0244         WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias);
0245         WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias);
0246     }
0247     mutex_unlock(&dev_weight_mutex);
0248 
0249     return ret;
0250 }
0251 
0252 static int proc_do_rss_key(struct ctl_table *table, int write,
0253                void *buffer, size_t *lenp, loff_t *ppos)
0254 {
0255     struct ctl_table fake_table;
0256     char buf[NETDEV_RSS_KEY_LEN * 3];
0257 
0258     snprintf(buf, sizeof(buf), "%*phC", NETDEV_RSS_KEY_LEN, netdev_rss_key);
0259     fake_table.data = buf;
0260     fake_table.maxlen = sizeof(buf);
0261     return proc_dostring(&fake_table, write, buffer, lenp, ppos);
0262 }
0263 
0264 #ifdef CONFIG_BPF_JIT
0265 static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
0266                        void *buffer, size_t *lenp,
0267                        loff_t *ppos)
0268 {
0269     int ret, jit_enable = *(int *)table->data;
0270     int min = *(int *)table->extra1;
0271     int max = *(int *)table->extra2;
0272     struct ctl_table tmp = *table;
0273 
0274     if (write && !capable(CAP_SYS_ADMIN))
0275         return -EPERM;
0276 
0277     tmp.data = &jit_enable;
0278     ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
0279     if (write && !ret) {
0280         if (jit_enable < 2 ||
0281             (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) {
0282             *(int *)table->data = jit_enable;
0283             if (jit_enable == 2)
0284                 pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n");
0285         } else {
0286             ret = -EPERM;
0287         }
0288     }
0289 
0290     if (write && ret && min == max)
0291         pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n");
0292 
0293     return ret;
0294 }
0295 
0296 # ifdef CONFIG_HAVE_EBPF_JIT
0297 static int
0298 proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
0299                     void *buffer, size_t *lenp, loff_t *ppos)
0300 {
0301     if (!capable(CAP_SYS_ADMIN))
0302         return -EPERM;
0303 
0304     return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
0305 }
0306 # endif /* CONFIG_HAVE_EBPF_JIT */
0307 
0308 static int
0309 proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write,
0310                      void *buffer, size_t *lenp, loff_t *ppos)
0311 {
0312     if (!capable(CAP_SYS_ADMIN))
0313         return -EPERM;
0314 
0315     return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
0316 }
0317 #endif
0318 
0319 static struct ctl_table net_core_table[] = {
0320     {
0321         .procname   = "wmem_max",
0322         .data       = &sysctl_wmem_max,
0323         .maxlen     = sizeof(int),
0324         .mode       = 0644,
0325         .proc_handler   = proc_dointvec_minmax,
0326         .extra1     = &min_sndbuf,
0327     },
0328     {
0329         .procname   = "rmem_max",
0330         .data       = &sysctl_rmem_max,
0331         .maxlen     = sizeof(int),
0332         .mode       = 0644,
0333         .proc_handler   = proc_dointvec_minmax,
0334         .extra1     = &min_rcvbuf,
0335     },
0336     {
0337         .procname   = "wmem_default",
0338         .data       = &sysctl_wmem_default,
0339         .maxlen     = sizeof(int),
0340         .mode       = 0644,
0341         .proc_handler   = proc_dointvec_minmax,
0342         .extra1     = &min_sndbuf,
0343     },
0344     {
0345         .procname   = "rmem_default",
0346         .data       = &sysctl_rmem_default,
0347         .maxlen     = sizeof(int),
0348         .mode       = 0644,
0349         .proc_handler   = proc_dointvec_minmax,
0350         .extra1     = &min_rcvbuf,
0351     },
0352     {
0353         .procname   = "dev_weight",
0354         .data       = &weight_p,
0355         .maxlen     = sizeof(int),
0356         .mode       = 0644,
0357         .proc_handler   = proc_do_dev_weight,
0358     },
0359     {
0360         .procname   = "dev_weight_rx_bias",
0361         .data       = &dev_weight_rx_bias,
0362         .maxlen     = sizeof(int),
0363         .mode       = 0644,
0364         .proc_handler   = proc_do_dev_weight,
0365     },
0366     {
0367         .procname   = "dev_weight_tx_bias",
0368         .data       = &dev_weight_tx_bias,
0369         .maxlen     = sizeof(int),
0370         .mode       = 0644,
0371         .proc_handler   = proc_do_dev_weight,
0372     },
0373     {
0374         .procname   = "netdev_max_backlog",
0375         .data       = &netdev_max_backlog,
0376         .maxlen     = sizeof(int),
0377         .mode       = 0644,
0378         .proc_handler   = proc_dointvec
0379     },
0380     {
0381         .procname   = "netdev_rss_key",
0382         .data       = &netdev_rss_key,
0383         .maxlen     = sizeof(int),
0384         .mode       = 0444,
0385         .proc_handler   = proc_do_rss_key,
0386     },
0387 #ifdef CONFIG_BPF_JIT
0388     {
0389         .procname   = "bpf_jit_enable",
0390         .data       = &bpf_jit_enable,
0391         .maxlen     = sizeof(int),
0392         .mode       = 0644,
0393         .proc_handler   = proc_dointvec_minmax_bpf_enable,
0394 # ifdef CONFIG_BPF_JIT_ALWAYS_ON
0395         .extra1     = SYSCTL_ONE,
0396         .extra2     = SYSCTL_ONE,
0397 # else
0398         .extra1     = SYSCTL_ZERO,
0399         .extra2     = SYSCTL_TWO,
0400 # endif
0401     },
0402 # ifdef CONFIG_HAVE_EBPF_JIT
0403     {
0404         .procname   = "bpf_jit_harden",
0405         .data       = &bpf_jit_harden,
0406         .maxlen     = sizeof(int),
0407         .mode       = 0600,
0408         .proc_handler   = proc_dointvec_minmax_bpf_restricted,
0409         .extra1     = SYSCTL_ZERO,
0410         .extra2     = SYSCTL_TWO,
0411     },
0412     {
0413         .procname   = "bpf_jit_kallsyms",
0414         .data       = &bpf_jit_kallsyms,
0415         .maxlen     = sizeof(int),
0416         .mode       = 0600,
0417         .proc_handler   = proc_dointvec_minmax_bpf_restricted,
0418         .extra1     = SYSCTL_ZERO,
0419         .extra2     = SYSCTL_ONE,
0420     },
0421 # endif
0422     {
0423         .procname   = "bpf_jit_limit",
0424         .data       = &bpf_jit_limit,
0425         .maxlen     = sizeof(long),
0426         .mode       = 0600,
0427         .proc_handler   = proc_dolongvec_minmax_bpf_restricted,
0428         .extra1     = SYSCTL_LONG_ONE,
0429         .extra2     = &bpf_jit_limit_max,
0430     },
0431 #endif
0432     {
0433         .procname   = "netdev_tstamp_prequeue",
0434         .data       = &netdev_tstamp_prequeue,
0435         .maxlen     = sizeof(int),
0436         .mode       = 0644,
0437         .proc_handler   = proc_dointvec
0438     },
0439     {
0440         .procname   = "message_cost",
0441         .data       = &net_ratelimit_state.interval,
0442         .maxlen     = sizeof(int),
0443         .mode       = 0644,
0444         .proc_handler   = proc_dointvec_jiffies,
0445     },
0446     {
0447         .procname   = "message_burst",
0448         .data       = &net_ratelimit_state.burst,
0449         .maxlen     = sizeof(int),
0450         .mode       = 0644,
0451         .proc_handler   = proc_dointvec,
0452     },
0453     {
0454         .procname   = "optmem_max",
0455         .data       = &sysctl_optmem_max,
0456         .maxlen     = sizeof(int),
0457         .mode       = 0644,
0458         .proc_handler   = proc_dointvec
0459     },
0460     {
0461         .procname   = "tstamp_allow_data",
0462         .data       = &sysctl_tstamp_allow_data,
0463         .maxlen     = sizeof(int),
0464         .mode       = 0644,
0465         .proc_handler   = proc_dointvec_minmax,
0466         .extra1     = SYSCTL_ZERO,
0467         .extra2     = SYSCTL_ONE
0468     },
0469 #ifdef CONFIG_RPS
0470     {
0471         .procname   = "rps_sock_flow_entries",
0472         .maxlen     = sizeof(int),
0473         .mode       = 0644,
0474         .proc_handler   = rps_sock_flow_sysctl
0475     },
0476 #endif
0477 #ifdef CONFIG_NET_FLOW_LIMIT
0478     {
0479         .procname   = "flow_limit_cpu_bitmap",
0480         .mode       = 0644,
0481         .proc_handler   = flow_limit_cpu_sysctl
0482     },
0483     {
0484         .procname   = "flow_limit_table_len",
0485         .data       = &netdev_flow_limit_table_len,
0486         .maxlen     = sizeof(int),
0487         .mode       = 0644,
0488         .proc_handler   = flow_limit_table_len_sysctl
0489     },
0490 #endif /* CONFIG_NET_FLOW_LIMIT */
0491 #ifdef CONFIG_NET_RX_BUSY_POLL
0492     {
0493         .procname   = "busy_poll",
0494         .data       = &sysctl_net_busy_poll,
0495         .maxlen     = sizeof(unsigned int),
0496         .mode       = 0644,
0497         .proc_handler   = proc_dointvec_minmax,
0498         .extra1     = SYSCTL_ZERO,
0499     },
0500     {
0501         .procname   = "busy_read",
0502         .data       = &sysctl_net_busy_read,
0503         .maxlen     = sizeof(unsigned int),
0504         .mode       = 0644,
0505         .proc_handler   = proc_dointvec_minmax,
0506         .extra1     = SYSCTL_ZERO,
0507     },
0508 #endif
0509 #ifdef CONFIG_NET_SCHED
0510     {
0511         .procname   = "default_qdisc",
0512         .mode       = 0644,
0513         .maxlen     = IFNAMSIZ,
0514         .proc_handler   = set_default_qdisc
0515     },
0516 #endif
0517     {
0518         .procname   = "netdev_budget",
0519         .data       = &netdev_budget,
0520         .maxlen     = sizeof(int),
0521         .mode       = 0644,
0522         .proc_handler   = proc_dointvec
0523     },
0524     {
0525         .procname   = "warnings",
0526         .data       = &net_msg_warn,
0527         .maxlen     = sizeof(int),
0528         .mode       = 0644,
0529         .proc_handler   = proc_dointvec
0530     },
0531     {
0532         .procname   = "max_skb_frags",
0533         .data       = &sysctl_max_skb_frags,
0534         .maxlen     = sizeof(int),
0535         .mode       = 0644,
0536         .proc_handler   = proc_dointvec_minmax,
0537         .extra1     = SYSCTL_ONE,
0538         .extra2     = &max_skb_frags,
0539     },
0540     {
0541         .procname   = "netdev_budget_usecs",
0542         .data       = &netdev_budget_usecs,
0543         .maxlen     = sizeof(unsigned int),
0544         .mode       = 0644,
0545         .proc_handler   = proc_dointvec_minmax,
0546         .extra1     = SYSCTL_ZERO,
0547     },
0548     {
0549         .procname   = "fb_tunnels_only_for_init_net",
0550         .data       = &sysctl_fb_tunnels_only_for_init_net,
0551         .maxlen     = sizeof(int),
0552         .mode       = 0644,
0553         .proc_handler   = proc_dointvec_minmax,
0554         .extra1     = SYSCTL_ZERO,
0555         .extra2     = SYSCTL_TWO,
0556     },
0557     {
0558         .procname   = "devconf_inherit_init_net",
0559         .data       = &sysctl_devconf_inherit_init_net,
0560         .maxlen     = sizeof(int),
0561         .mode       = 0644,
0562         .proc_handler   = proc_dointvec_minmax,
0563         .extra1     = SYSCTL_ZERO,
0564         .extra2     = SYSCTL_THREE,
0565     },
0566     {
0567         .procname   = "high_order_alloc_disable",
0568         .data       = &net_high_order_alloc_disable_key.key,
0569         .maxlen         = sizeof(net_high_order_alloc_disable_key),
0570         .mode       = 0644,
0571         .proc_handler   = proc_do_static_key,
0572     },
0573     {
0574         .procname   = "gro_normal_batch",
0575         .data       = &gro_normal_batch,
0576         .maxlen     = sizeof(unsigned int),
0577         .mode       = 0644,
0578         .proc_handler   = proc_dointvec_minmax,
0579         .extra1     = SYSCTL_ONE,
0580     },
0581     {
0582         .procname   = "netdev_unregister_timeout_secs",
0583         .data       = &netdev_unregister_timeout_secs,
0584         .maxlen     = sizeof(unsigned int),
0585         .mode       = 0644,
0586         .proc_handler   = proc_dointvec_minmax,
0587         .extra1     = SYSCTL_ONE,
0588         .extra2     = &int_3600,
0589     },
0590     {
0591         .procname   = "skb_defer_max",
0592         .data       = &sysctl_skb_defer_max,
0593         .maxlen     = sizeof(unsigned int),
0594         .mode       = 0644,
0595         .proc_handler   = proc_dointvec_minmax,
0596         .extra1     = SYSCTL_ZERO,
0597     },
0598     { }
0599 };
0600 
0601 static struct ctl_table netns_core_table[] = {
0602     {
0603         .procname   = "somaxconn",
0604         .data       = &init_net.core.sysctl_somaxconn,
0605         .maxlen     = sizeof(int),
0606         .mode       = 0644,
0607         .extra1     = SYSCTL_ZERO,
0608         .proc_handler   = proc_dointvec_minmax
0609     },
0610     {
0611         .procname   = "txrehash",
0612         .data       = &init_net.core.sysctl_txrehash,
0613         .maxlen     = sizeof(u8),
0614         .mode       = 0644,
0615         .extra1     = SYSCTL_ZERO,
0616         .extra2     = SYSCTL_ONE,
0617         .proc_handler   = proc_dou8vec_minmax,
0618     },
0619     { }
0620 };
0621 
0622 static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str)
0623 {
0624     /* fallback tunnels for initns only */
0625     if (!strncmp(str, "initns", 6))
0626         sysctl_fb_tunnels_only_for_init_net = 1;
0627     /* no fallback tunnels anywhere */
0628     else if (!strncmp(str, "none", 4))
0629         sysctl_fb_tunnels_only_for_init_net = 2;
0630 
0631     return 1;
0632 }
0633 __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup);
0634 
0635 static __net_init int sysctl_core_net_init(struct net *net)
0636 {
0637     struct ctl_table *tbl, *tmp;
0638 
0639     tbl = netns_core_table;
0640     if (!net_eq(net, &init_net)) {
0641         tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
0642         if (tbl == NULL)
0643             goto err_dup;
0644 
0645         for (tmp = tbl; tmp->procname; tmp++)
0646             tmp->data += (char *)net - (char *)&init_net;
0647 
0648         /* Don't export any sysctls to unprivileged users */
0649         if (net->user_ns != &init_user_ns) {
0650             tbl[0].procname = NULL;
0651         }
0652     }
0653 
0654     net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl);
0655     if (net->core.sysctl_hdr == NULL)
0656         goto err_reg;
0657 
0658     return 0;
0659 
0660 err_reg:
0661     if (tbl != netns_core_table)
0662         kfree(tbl);
0663 err_dup:
0664     return -ENOMEM;
0665 }
0666 
0667 static __net_exit void sysctl_core_net_exit(struct net *net)
0668 {
0669     struct ctl_table *tbl;
0670 
0671     tbl = net->core.sysctl_hdr->ctl_table_arg;
0672     unregister_net_sysctl_table(net->core.sysctl_hdr);
0673     BUG_ON(tbl == netns_core_table);
0674     kfree(tbl);
0675 }
0676 
0677 static __net_initdata struct pernet_operations sysctl_core_ops = {
0678     .init = sysctl_core_net_init,
0679     .exit = sysctl_core_net_exit,
0680 };
0681 
0682 static __init int sysctl_core_init(void)
0683 {
0684     register_net_sysctl(&init_net, "net/core", net_core_table);
0685     return register_pernet_subsys(&sysctl_core_ops);
0686 }
0687 
0688 fs_initcall(sysctl_core_init);