Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  *  ip6_flowlabel.c     IPv6 flowlabel manager.
0004  *
0005  *  Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
0006  */
0007 
0008 #include <linux/capability.h>
0009 #include <linux/errno.h>
0010 #include <linux/types.h>
0011 #include <linux/socket.h>
0012 #include <linux/net.h>
0013 #include <linux/netdevice.h>
0014 #include <linux/in6.h>
0015 #include <linux/proc_fs.h>
0016 #include <linux/seq_file.h>
0017 #include <linux/slab.h>
0018 #include <linux/export.h>
0019 #include <linux/pid_namespace.h>
0020 #include <linux/jump_label_ratelimit.h>
0021 
0022 #include <net/net_namespace.h>
0023 #include <net/sock.h>
0024 
0025 #include <net/ipv6.h>
0026 #include <net/rawv6.h>
0027 #include <net/transp_v6.h>
0028 
0029 #include <linux/uaccess.h>
0030 
0031 #define FL_MIN_LINGER   6   /* Minimal linger. It is set to 6sec specified
0032                    in old IPv6 RFC. Well, it was reasonable value.
0033                  */
0034 #define FL_MAX_LINGER   150 /* Maximal linger timeout */
0035 
0036 /* FL hash table */
0037 
0038 #define FL_MAX_PER_SOCK 32
0039 #define FL_MAX_SIZE 4096
0040 #define FL_HASH_MASK    255
0041 #define FL_HASH(l)  (ntohl(l)&FL_HASH_MASK)
0042 
0043 static atomic_t fl_size = ATOMIC_INIT(0);
0044 static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
0045 
0046 static void ip6_fl_gc(struct timer_list *unused);
0047 static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc);
0048 
0049 /* FL hash table lock: it protects only of GC */
0050 
0051 static DEFINE_SPINLOCK(ip6_fl_lock);
0052 
0053 /* Big socket sock */
0054 
0055 static DEFINE_SPINLOCK(ip6_sk_fl_lock);
0056 
0057 DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ);
0058 EXPORT_SYMBOL(ipv6_flowlabel_exclusive);
0059 
0060 #define for_each_fl_rcu(hash, fl)               \
0061     for (fl = rcu_dereference_bh(fl_ht[(hash)]);        \
0062          fl != NULL;                    \
0063          fl = rcu_dereference_bh(fl->next))
0064 #define for_each_fl_continue_rcu(fl)                \
0065     for (fl = rcu_dereference_bh(fl->next);         \
0066          fl != NULL;                    \
0067          fl = rcu_dereference_bh(fl->next))
0068 
0069 #define for_each_sk_fl_rcu(np, sfl)             \
0070     for (sfl = rcu_dereference_bh(np->ipv6_fl_list);    \
0071          sfl != NULL;                   \
0072          sfl = rcu_dereference_bh(sfl->next))
0073 
0074 static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
0075 {
0076     struct ip6_flowlabel *fl;
0077 
0078     for_each_fl_rcu(FL_HASH(label), fl) {
0079         if (fl->label == label && net_eq(fl->fl_net, net))
0080             return fl;
0081     }
0082     return NULL;
0083 }
0084 
0085 static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
0086 {
0087     struct ip6_flowlabel *fl;
0088 
0089     rcu_read_lock_bh();
0090     fl = __fl_lookup(net, label);
0091     if (fl && !atomic_inc_not_zero(&fl->users))
0092         fl = NULL;
0093     rcu_read_unlock_bh();
0094     return fl;
0095 }
0096 
0097 static bool fl_shared_exclusive(struct ip6_flowlabel *fl)
0098 {
0099     return fl->share == IPV6_FL_S_EXCL ||
0100            fl->share == IPV6_FL_S_PROCESS ||
0101            fl->share == IPV6_FL_S_USER;
0102 }
0103 
0104 static void fl_free_rcu(struct rcu_head *head)
0105 {
0106     struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
0107 
0108     if (fl->share == IPV6_FL_S_PROCESS)
0109         put_pid(fl->owner.pid);
0110     kfree(fl->opt);
0111     kfree(fl);
0112 }
0113 
0114 
0115 static void fl_free(struct ip6_flowlabel *fl)
0116 {
0117     if (!fl)
0118         return;
0119 
0120     if (fl_shared_exclusive(fl) || fl->opt)
0121         static_branch_slow_dec_deferred(&ipv6_flowlabel_exclusive);
0122 
0123     call_rcu(&fl->rcu, fl_free_rcu);
0124 }
0125 
0126 static void fl_release(struct ip6_flowlabel *fl)
0127 {
0128     spin_lock_bh(&ip6_fl_lock);
0129 
0130     fl->lastuse = jiffies;
0131     if (atomic_dec_and_test(&fl->users)) {
0132         unsigned long ttd = fl->lastuse + fl->linger;
0133         if (time_after(ttd, fl->expires))
0134             fl->expires = ttd;
0135         ttd = fl->expires;
0136         if (fl->opt && fl->share == IPV6_FL_S_EXCL) {
0137             struct ipv6_txoptions *opt = fl->opt;
0138             fl->opt = NULL;
0139             kfree(opt);
0140         }
0141         if (!timer_pending(&ip6_fl_gc_timer) ||
0142             time_after(ip6_fl_gc_timer.expires, ttd))
0143             mod_timer(&ip6_fl_gc_timer, ttd);
0144     }
0145     spin_unlock_bh(&ip6_fl_lock);
0146 }
0147 
0148 static void ip6_fl_gc(struct timer_list *unused)
0149 {
0150     int i;
0151     unsigned long now = jiffies;
0152     unsigned long sched = 0;
0153 
0154     spin_lock(&ip6_fl_lock);
0155 
0156     for (i = 0; i <= FL_HASH_MASK; i++) {
0157         struct ip6_flowlabel *fl;
0158         struct ip6_flowlabel __rcu **flp;
0159 
0160         flp = &fl_ht[i];
0161         while ((fl = rcu_dereference_protected(*flp,
0162                                lockdep_is_held(&ip6_fl_lock))) != NULL) {
0163             if (atomic_read(&fl->users) == 0) {
0164                 unsigned long ttd = fl->lastuse + fl->linger;
0165                 if (time_after(ttd, fl->expires))
0166                     fl->expires = ttd;
0167                 ttd = fl->expires;
0168                 if (time_after_eq(now, ttd)) {
0169                     *flp = fl->next;
0170                     fl_free(fl);
0171                     atomic_dec(&fl_size);
0172                     continue;
0173                 }
0174                 if (!sched || time_before(ttd, sched))
0175                     sched = ttd;
0176             }
0177             flp = &fl->next;
0178         }
0179     }
0180     if (!sched && atomic_read(&fl_size))
0181         sched = now + FL_MAX_LINGER;
0182     if (sched) {
0183         mod_timer(&ip6_fl_gc_timer, sched);
0184     }
0185     spin_unlock(&ip6_fl_lock);
0186 }
0187 
0188 static void __net_exit ip6_fl_purge(struct net *net)
0189 {
0190     int i;
0191 
0192     spin_lock_bh(&ip6_fl_lock);
0193     for (i = 0; i <= FL_HASH_MASK; i++) {
0194         struct ip6_flowlabel *fl;
0195         struct ip6_flowlabel __rcu **flp;
0196 
0197         flp = &fl_ht[i];
0198         while ((fl = rcu_dereference_protected(*flp,
0199                                lockdep_is_held(&ip6_fl_lock))) != NULL) {
0200             if (net_eq(fl->fl_net, net) &&
0201                 atomic_read(&fl->users) == 0) {
0202                 *flp = fl->next;
0203                 fl_free(fl);
0204                 atomic_dec(&fl_size);
0205                 continue;
0206             }
0207             flp = &fl->next;
0208         }
0209     }
0210     spin_unlock_bh(&ip6_fl_lock);
0211 }
0212 
0213 static struct ip6_flowlabel *fl_intern(struct net *net,
0214                        struct ip6_flowlabel *fl, __be32 label)
0215 {
0216     struct ip6_flowlabel *lfl;
0217 
0218     fl->label = label & IPV6_FLOWLABEL_MASK;
0219 
0220     spin_lock_bh(&ip6_fl_lock);
0221     if (label == 0) {
0222         for (;;) {
0223             fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK;
0224             if (fl->label) {
0225                 lfl = __fl_lookup(net, fl->label);
0226                 if (!lfl)
0227                     break;
0228             }
0229         }
0230     } else {
0231         /*
0232          * we dropper the ip6_fl_lock, so this entry could reappear
0233          * and we need to recheck with it.
0234          *
0235          * OTOH no need to search the active socket first, like it is
0236          * done in ipv6_flowlabel_opt - sock is locked, so new entry
0237          * with the same label can only appear on another sock
0238          */
0239         lfl = __fl_lookup(net, fl->label);
0240         if (lfl) {
0241             atomic_inc(&lfl->users);
0242             spin_unlock_bh(&ip6_fl_lock);
0243             return lfl;
0244         }
0245     }
0246 
0247     fl->lastuse = jiffies;
0248     fl->next = fl_ht[FL_HASH(fl->label)];
0249     rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
0250     atomic_inc(&fl_size);
0251     spin_unlock_bh(&ip6_fl_lock);
0252     return NULL;
0253 }
0254 
0255 
0256 
0257 /* Socket flowlabel lists */
0258 
0259 struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label)
0260 {
0261     struct ipv6_fl_socklist *sfl;
0262     struct ipv6_pinfo *np = inet6_sk(sk);
0263 
0264     label &= IPV6_FLOWLABEL_MASK;
0265 
0266     rcu_read_lock_bh();
0267     for_each_sk_fl_rcu(np, sfl) {
0268         struct ip6_flowlabel *fl = sfl->fl;
0269 
0270         if (fl->label == label && atomic_inc_not_zero(&fl->users)) {
0271             fl->lastuse = jiffies;
0272             rcu_read_unlock_bh();
0273             return fl;
0274         }
0275     }
0276     rcu_read_unlock_bh();
0277     return NULL;
0278 }
0279 EXPORT_SYMBOL_GPL(__fl6_sock_lookup);
0280 
0281 void fl6_free_socklist(struct sock *sk)
0282 {
0283     struct ipv6_pinfo *np = inet6_sk(sk);
0284     struct ipv6_fl_socklist *sfl;
0285 
0286     if (!rcu_access_pointer(np->ipv6_fl_list))
0287         return;
0288 
0289     spin_lock_bh(&ip6_sk_fl_lock);
0290     while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
0291                         lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
0292         np->ipv6_fl_list = sfl->next;
0293         spin_unlock_bh(&ip6_sk_fl_lock);
0294 
0295         fl_release(sfl->fl);
0296         kfree_rcu(sfl, rcu);
0297 
0298         spin_lock_bh(&ip6_sk_fl_lock);
0299     }
0300     spin_unlock_bh(&ip6_sk_fl_lock);
0301 }
0302 
0303 /* Service routines */
0304 
0305 
0306 /*
0307    It is the only difficult place. flowlabel enforces equal headers
0308    before and including routing header, however user may supply options
0309    following rthdr.
0310  */
0311 
0312 struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
0313                      struct ip6_flowlabel *fl,
0314                      struct ipv6_txoptions *fopt)
0315 {
0316     struct ipv6_txoptions *fl_opt = fl->opt;
0317 
0318     if (!fopt || fopt->opt_flen == 0)
0319         return fl_opt;
0320 
0321     if (fl_opt) {
0322         opt_space->hopopt = fl_opt->hopopt;
0323         opt_space->dst0opt = fl_opt->dst0opt;
0324         opt_space->srcrt = fl_opt->srcrt;
0325         opt_space->opt_nflen = fl_opt->opt_nflen;
0326     } else {
0327         if (fopt->opt_nflen == 0)
0328             return fopt;
0329         opt_space->hopopt = NULL;
0330         opt_space->dst0opt = NULL;
0331         opt_space->srcrt = NULL;
0332         opt_space->opt_nflen = 0;
0333     }
0334     opt_space->dst1opt = fopt->dst1opt;
0335     opt_space->opt_flen = fopt->opt_flen;
0336     opt_space->tot_len = fopt->tot_len;
0337     return opt_space;
0338 }
0339 EXPORT_SYMBOL_GPL(fl6_merge_options);
0340 
0341 static unsigned long check_linger(unsigned long ttl)
0342 {
0343     if (ttl < FL_MIN_LINGER)
0344         return FL_MIN_LINGER*HZ;
0345     if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN))
0346         return 0;
0347     return ttl*HZ;
0348 }
0349 
0350 static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires)
0351 {
0352     linger = check_linger(linger);
0353     if (!linger)
0354         return -EPERM;
0355     expires = check_linger(expires);
0356     if (!expires)
0357         return -EPERM;
0358 
0359     spin_lock_bh(&ip6_fl_lock);
0360     fl->lastuse = jiffies;
0361     if (time_before(fl->linger, linger))
0362         fl->linger = linger;
0363     if (time_before(expires, fl->linger))
0364         expires = fl->linger;
0365     if (time_before(fl->expires, fl->lastuse + expires))
0366         fl->expires = fl->lastuse + expires;
0367     spin_unlock_bh(&ip6_fl_lock);
0368 
0369     return 0;
0370 }
0371 
0372 static struct ip6_flowlabel *
0373 fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
0374       sockptr_t optval, int optlen, int *err_p)
0375 {
0376     struct ip6_flowlabel *fl = NULL;
0377     int olen;
0378     int addr_type;
0379     int err;
0380 
0381     olen = optlen - CMSG_ALIGN(sizeof(*freq));
0382     err = -EINVAL;
0383     if (olen > 64 * 1024)
0384         goto done;
0385 
0386     err = -ENOMEM;
0387     fl = kzalloc(sizeof(*fl), GFP_KERNEL);
0388     if (!fl)
0389         goto done;
0390 
0391     if (olen > 0) {
0392         struct msghdr msg;
0393         struct flowi6 flowi6;
0394         struct ipcm6_cookie ipc6;
0395 
0396         err = -ENOMEM;
0397         fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
0398         if (!fl->opt)
0399             goto done;
0400 
0401         memset(fl->opt, 0, sizeof(*fl->opt));
0402         fl->opt->tot_len = sizeof(*fl->opt) + olen;
0403         err = -EFAULT;
0404         if (copy_from_sockptr_offset(fl->opt + 1, optval,
0405                 CMSG_ALIGN(sizeof(*freq)), olen))
0406             goto done;
0407 
0408         msg.msg_controllen = olen;
0409         msg.msg_control = (void *)(fl->opt+1);
0410         memset(&flowi6, 0, sizeof(flowi6));
0411 
0412         ipc6.opt = fl->opt;
0413         err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6);
0414         if (err)
0415             goto done;
0416         err = -EINVAL;
0417         if (fl->opt->opt_flen)
0418             goto done;
0419         if (fl->opt->opt_nflen == 0) {
0420             kfree(fl->opt);
0421             fl->opt = NULL;
0422         }
0423     }
0424 
0425     fl->fl_net = net;
0426     fl->expires = jiffies;
0427     err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
0428     if (err)
0429         goto done;
0430     fl->share = freq->flr_share;
0431     addr_type = ipv6_addr_type(&freq->flr_dst);
0432     if ((addr_type & IPV6_ADDR_MAPPED) ||
0433         addr_type == IPV6_ADDR_ANY) {
0434         err = -EINVAL;
0435         goto done;
0436     }
0437     fl->dst = freq->flr_dst;
0438     atomic_set(&fl->users, 1);
0439     switch (fl->share) {
0440     case IPV6_FL_S_EXCL:
0441     case IPV6_FL_S_ANY:
0442         break;
0443     case IPV6_FL_S_PROCESS:
0444         fl->owner.pid = get_task_pid(current, PIDTYPE_PID);
0445         break;
0446     case IPV6_FL_S_USER:
0447         fl->owner.uid = current_euid();
0448         break;
0449     default:
0450         err = -EINVAL;
0451         goto done;
0452     }
0453     if (fl_shared_exclusive(fl) || fl->opt) {
0454         WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1);
0455         static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
0456     }
0457     return fl;
0458 
0459 done:
0460     if (fl) {
0461         kfree(fl->opt);
0462         kfree(fl);
0463     }
0464     *err_p = err;
0465     return NULL;
0466 }
0467 
0468 static int mem_check(struct sock *sk)
0469 {
0470     struct ipv6_pinfo *np = inet6_sk(sk);
0471     struct ipv6_fl_socklist *sfl;
0472     int room = FL_MAX_SIZE - atomic_read(&fl_size);
0473     int count = 0;
0474 
0475     if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
0476         return 0;
0477 
0478     rcu_read_lock_bh();
0479     for_each_sk_fl_rcu(np, sfl)
0480         count++;
0481     rcu_read_unlock_bh();
0482 
0483     if (room <= 0 ||
0484         ((count >= FL_MAX_PER_SOCK ||
0485           (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
0486          !capable(CAP_NET_ADMIN)))
0487         return -ENOBUFS;
0488 
0489     return 0;
0490 }
0491 
0492 static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
0493         struct ip6_flowlabel *fl)
0494 {
0495     spin_lock_bh(&ip6_sk_fl_lock);
0496     sfl->fl = fl;
0497     sfl->next = np->ipv6_fl_list;
0498     rcu_assign_pointer(np->ipv6_fl_list, sfl);
0499     spin_unlock_bh(&ip6_sk_fl_lock);
0500 }
0501 
0502 int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
0503                int flags)
0504 {
0505     struct ipv6_pinfo *np = inet6_sk(sk);
0506     struct ipv6_fl_socklist *sfl;
0507 
0508     if (flags & IPV6_FL_F_REMOTE) {
0509         freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK;
0510         return 0;
0511     }
0512 
0513     if (np->repflow) {
0514         freq->flr_label = np->flow_label;
0515         return 0;
0516     }
0517 
0518     rcu_read_lock_bh();
0519 
0520     for_each_sk_fl_rcu(np, sfl) {
0521         if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) {
0522             spin_lock_bh(&ip6_fl_lock);
0523             freq->flr_label = sfl->fl->label;
0524             freq->flr_dst = sfl->fl->dst;
0525             freq->flr_share = sfl->fl->share;
0526             freq->flr_expires = (sfl->fl->expires - jiffies) / HZ;
0527             freq->flr_linger = sfl->fl->linger / HZ;
0528 
0529             spin_unlock_bh(&ip6_fl_lock);
0530             rcu_read_unlock_bh();
0531             return 0;
0532         }
0533     }
0534     rcu_read_unlock_bh();
0535 
0536     return -ENOENT;
0537 }
0538 
0539 #define socklist_dereference(__sflp) \
0540     rcu_dereference_protected(__sflp, lockdep_is_held(&ip6_sk_fl_lock))
0541 
0542 static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq)
0543 {
0544     struct ipv6_pinfo *np = inet6_sk(sk);
0545     struct ipv6_fl_socklist __rcu **sflp;
0546     struct ipv6_fl_socklist *sfl;
0547 
0548     if (freq->flr_flags & IPV6_FL_F_REFLECT) {
0549         if (sk->sk_protocol != IPPROTO_TCP)
0550             return -ENOPROTOOPT;
0551         if (!np->repflow)
0552             return -ESRCH;
0553         np->flow_label = 0;
0554         np->repflow = 0;
0555         return 0;
0556     }
0557 
0558     spin_lock_bh(&ip6_sk_fl_lock);
0559     for (sflp = &np->ipv6_fl_list;
0560          (sfl = socklist_dereference(*sflp)) != NULL;
0561          sflp = &sfl->next) {
0562         if (sfl->fl->label == freq->flr_label)
0563             goto found;
0564     }
0565     spin_unlock_bh(&ip6_sk_fl_lock);
0566     return -ESRCH;
0567 found:
0568     if (freq->flr_label == (np->flow_label & IPV6_FLOWLABEL_MASK))
0569         np->flow_label &= ~IPV6_FLOWLABEL_MASK;
0570     *sflp = sfl->next;
0571     spin_unlock_bh(&ip6_sk_fl_lock);
0572     fl_release(sfl->fl);
0573     kfree_rcu(sfl, rcu);
0574     return 0;
0575 }
0576 
0577 static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq)
0578 {
0579     struct ipv6_pinfo *np = inet6_sk(sk);
0580     struct net *net = sock_net(sk);
0581     struct ipv6_fl_socklist *sfl;
0582     int err;
0583 
0584     rcu_read_lock_bh();
0585     for_each_sk_fl_rcu(np, sfl) {
0586         if (sfl->fl->label == freq->flr_label) {
0587             err = fl6_renew(sfl->fl, freq->flr_linger,
0588                     freq->flr_expires);
0589             rcu_read_unlock_bh();
0590             return err;
0591         }
0592     }
0593     rcu_read_unlock_bh();
0594 
0595     if (freq->flr_share == IPV6_FL_S_NONE &&
0596         ns_capable(net->user_ns, CAP_NET_ADMIN)) {
0597         struct ip6_flowlabel *fl = fl_lookup(net, freq->flr_label);
0598 
0599         if (fl) {
0600             err = fl6_renew(fl, freq->flr_linger,
0601                     freq->flr_expires);
0602             fl_release(fl);
0603             return err;
0604         }
0605     }
0606     return -ESRCH;
0607 }
0608 
0609 static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
0610         sockptr_t optval, int optlen)
0611 {
0612     struct ipv6_fl_socklist *sfl, *sfl1 = NULL;
0613     struct ip6_flowlabel *fl, *fl1 = NULL;
0614     struct ipv6_pinfo *np = inet6_sk(sk);
0615     struct net *net = sock_net(sk);
0616     int err;
0617 
0618     if (freq->flr_flags & IPV6_FL_F_REFLECT) {
0619         if (net->ipv6.sysctl.flowlabel_consistency) {
0620             net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n");
0621             return -EPERM;
0622         }
0623 
0624         if (sk->sk_protocol != IPPROTO_TCP)
0625             return -ENOPROTOOPT;
0626         np->repflow = 1;
0627         return 0;
0628     }
0629 
0630     if (freq->flr_label & ~IPV6_FLOWLABEL_MASK)
0631         return -EINVAL;
0632     if (net->ipv6.sysctl.flowlabel_state_ranges &&
0633         (freq->flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
0634         return -ERANGE;
0635 
0636     fl = fl_create(net, sk, freq, optval, optlen, &err);
0637     if (!fl)
0638         return err;
0639 
0640     sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
0641 
0642     if (freq->flr_label) {
0643         err = -EEXIST;
0644         rcu_read_lock_bh();
0645         for_each_sk_fl_rcu(np, sfl) {
0646             if (sfl->fl->label == freq->flr_label) {
0647                 if (freq->flr_flags & IPV6_FL_F_EXCL) {
0648                     rcu_read_unlock_bh();
0649                     goto done;
0650                 }
0651                 fl1 = sfl->fl;
0652                 if (!atomic_inc_not_zero(&fl1->users))
0653                     fl1 = NULL;
0654                 break;
0655             }
0656         }
0657         rcu_read_unlock_bh();
0658 
0659         if (!fl1)
0660             fl1 = fl_lookup(net, freq->flr_label);
0661         if (fl1) {
0662 recheck:
0663             err = -EEXIST;
0664             if (freq->flr_flags&IPV6_FL_F_EXCL)
0665                 goto release;
0666             err = -EPERM;
0667             if (fl1->share == IPV6_FL_S_EXCL ||
0668                 fl1->share != fl->share ||
0669                 ((fl1->share == IPV6_FL_S_PROCESS) &&
0670                  (fl1->owner.pid != fl->owner.pid)) ||
0671                 ((fl1->share == IPV6_FL_S_USER) &&
0672                  !uid_eq(fl1->owner.uid, fl->owner.uid)))
0673                 goto release;
0674 
0675             err = -ENOMEM;
0676             if (!sfl1)
0677                 goto release;
0678             if (fl->linger > fl1->linger)
0679                 fl1->linger = fl->linger;
0680             if ((long)(fl->expires - fl1->expires) > 0)
0681                 fl1->expires = fl->expires;
0682             fl_link(np, sfl1, fl1);
0683             fl_free(fl);
0684             return 0;
0685 
0686 release:
0687             fl_release(fl1);
0688             goto done;
0689         }
0690     }
0691     err = -ENOENT;
0692     if (!(freq->flr_flags & IPV6_FL_F_CREATE))
0693         goto done;
0694 
0695     err = -ENOMEM;
0696     if (!sfl1)
0697         goto done;
0698 
0699     err = mem_check(sk);
0700     if (err != 0)
0701         goto done;
0702 
0703     fl1 = fl_intern(net, fl, freq->flr_label);
0704     if (fl1)
0705         goto recheck;
0706 
0707     if (!freq->flr_label) {
0708         size_t offset = offsetof(struct in6_flowlabel_req, flr_label);
0709 
0710         if (copy_to_sockptr_offset(optval, offset, &fl->label,
0711                 sizeof(fl->label))) {
0712             /* Intentionally ignore fault. */
0713         }
0714     }
0715 
0716     fl_link(np, sfl1, fl);
0717     return 0;
0718 done:
0719     fl_free(fl);
0720     kfree(sfl1);
0721     return err;
0722 }
0723 
0724 int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen)
0725 {
0726     struct in6_flowlabel_req freq;
0727 
0728     if (optlen < sizeof(freq))
0729         return -EINVAL;
0730     if (copy_from_sockptr(&freq, optval, sizeof(freq)))
0731         return -EFAULT;
0732 
0733     switch (freq.flr_action) {
0734     case IPV6_FL_A_PUT:
0735         return ipv6_flowlabel_put(sk, &freq);
0736     case IPV6_FL_A_RENEW:
0737         return ipv6_flowlabel_renew(sk, &freq);
0738     case IPV6_FL_A_GET:
0739         return ipv6_flowlabel_get(sk, &freq, optval, optlen);
0740     default:
0741         return -EINVAL;
0742     }
0743 }
0744 
0745 #ifdef CONFIG_PROC_FS
0746 
0747 struct ip6fl_iter_state {
0748     struct seq_net_private p;
0749     struct pid_namespace *pid_ns;
0750     int bucket;
0751 };
0752 
0753 #define ip6fl_seq_private(seq)  ((struct ip6fl_iter_state *)(seq)->private)
0754 
0755 static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
0756 {
0757     struct ip6_flowlabel *fl = NULL;
0758     struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
0759     struct net *net = seq_file_net(seq);
0760 
0761     for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
0762         for_each_fl_rcu(state->bucket, fl) {
0763             if (net_eq(fl->fl_net, net))
0764                 goto out;
0765         }
0766     }
0767     fl = NULL;
0768 out:
0769     return fl;
0770 }
0771 
0772 static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
0773 {
0774     struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
0775     struct net *net = seq_file_net(seq);
0776 
0777     for_each_fl_continue_rcu(fl) {
0778         if (net_eq(fl->fl_net, net))
0779             goto out;
0780     }
0781 
0782 try_again:
0783     if (++state->bucket <= FL_HASH_MASK) {
0784         for_each_fl_rcu(state->bucket, fl) {
0785             if (net_eq(fl->fl_net, net))
0786                 goto out;
0787         }
0788         goto try_again;
0789     }
0790     fl = NULL;
0791 
0792 out:
0793     return fl;
0794 }
0795 
0796 static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
0797 {
0798     struct ip6_flowlabel *fl = ip6fl_get_first(seq);
0799     if (fl)
0800         while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL)
0801             --pos;
0802     return pos ? NULL : fl;
0803 }
0804 
0805 static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
0806     __acquires(RCU)
0807 {
0808     struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
0809 
0810     state->pid_ns = proc_pid_ns(file_inode(seq->file)->i_sb);
0811 
0812     rcu_read_lock_bh();
0813     return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
0814 }
0815 
0816 static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
0817 {
0818     struct ip6_flowlabel *fl;
0819 
0820     if (v == SEQ_START_TOKEN)
0821         fl = ip6fl_get_first(seq);
0822     else
0823         fl = ip6fl_get_next(seq, v);
0824     ++*pos;
0825     return fl;
0826 }
0827 
0828 static void ip6fl_seq_stop(struct seq_file *seq, void *v)
0829     __releases(RCU)
0830 {
0831     rcu_read_unlock_bh();
0832 }
0833 
0834 static int ip6fl_seq_show(struct seq_file *seq, void *v)
0835 {
0836     struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
0837     if (v == SEQ_START_TOKEN) {
0838         seq_puts(seq, "Label S Owner  Users  Linger Expires  Dst                              Opt\n");
0839     } else {
0840         struct ip6_flowlabel *fl = v;
0841         seq_printf(seq,
0842                "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
0843                (unsigned int)ntohl(fl->label),
0844                fl->share,
0845                ((fl->share == IPV6_FL_S_PROCESS) ?
0846                 pid_nr_ns(fl->owner.pid, state->pid_ns) :
0847                 ((fl->share == IPV6_FL_S_USER) ?
0848                  from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
0849                  0)),
0850                atomic_read(&fl->users),
0851                fl->linger/HZ,
0852                (long)(fl->expires - jiffies)/HZ,
0853                &fl->dst,
0854                fl->opt ? fl->opt->opt_nflen : 0);
0855     }
0856     return 0;
0857 }
0858 
0859 static const struct seq_operations ip6fl_seq_ops = {
0860     .start  =   ip6fl_seq_start,
0861     .next   =   ip6fl_seq_next,
0862     .stop   =   ip6fl_seq_stop,
0863     .show   =   ip6fl_seq_show,
0864 };
0865 
0866 static int __net_init ip6_flowlabel_proc_init(struct net *net)
0867 {
0868     if (!proc_create_net("ip6_flowlabel", 0444, net->proc_net,
0869             &ip6fl_seq_ops, sizeof(struct ip6fl_iter_state)))
0870         return -ENOMEM;
0871     return 0;
0872 }
0873 
0874 static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
0875 {
0876     remove_proc_entry("ip6_flowlabel", net->proc_net);
0877 }
0878 #else
0879 static inline int ip6_flowlabel_proc_init(struct net *net)
0880 {
0881     return 0;
0882 }
0883 static inline void ip6_flowlabel_proc_fini(struct net *net)
0884 {
0885 }
0886 #endif
0887 
0888 static void __net_exit ip6_flowlabel_net_exit(struct net *net)
0889 {
0890     ip6_fl_purge(net);
0891     ip6_flowlabel_proc_fini(net);
0892 }
0893 
0894 static struct pernet_operations ip6_flowlabel_net_ops = {
0895     .init = ip6_flowlabel_proc_init,
0896     .exit = ip6_flowlabel_net_exit,
0897 };
0898 
0899 int ip6_flowlabel_init(void)
0900 {
0901     return register_pernet_subsys(&ip6_flowlabel_net_ops);
0902 }
0903 
0904 void ip6_flowlabel_cleanup(void)
0905 {
0906     static_key_deferred_flush(&ipv6_flowlabel_exclusive);
0907     del_timer(&ip6_fl_gc_timer);
0908     unregister_pernet_subsys(&ip6_flowlabel_net_ops);
0909 }