0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016 #define pr_fmt(fmt) "IPv6: " fmt
0017
0018 #include <linux/bpf.h>
0019 #include <linux/errno.h>
0020 #include <linux/types.h>
0021 #include <linux/net.h>
0022 #include <linux/route.h>
0023 #include <linux/netdevice.h>
0024 #include <linux/in6.h>
0025 #include <linux/init.h>
0026 #include <linux/list.h>
0027 #include <linux/slab.h>
0028
0029 #include <net/ip.h>
0030 #include <net/ipv6.h>
0031 #include <net/ndisc.h>
0032 #include <net/addrconf.h>
0033 #include <net/lwtunnel.h>
0034 #include <net/fib_notifier.h>
0035
0036 #include <net/ip_fib.h>
0037 #include <net/ip6_fib.h>
0038 #include <net/ip6_route.h>
0039
0040 static struct kmem_cache *fib6_node_kmem __read_mostly;
0041
0042 struct fib6_cleaner {
0043 struct fib6_walker w;
0044 struct net *net;
0045 int (*func)(struct fib6_info *, void *arg);
0046 int sernum;
0047 void *arg;
0048 bool skip_notify;
0049 };
0050
0051 #ifdef CONFIG_IPV6_SUBTREES
0052 #define FWS_INIT FWS_S
0053 #else
0054 #define FWS_INIT FWS_L
0055 #endif
0056
0057 static struct fib6_info *fib6_find_prefix(struct net *net,
0058 struct fib6_table *table,
0059 struct fib6_node *fn);
0060 static struct fib6_node *fib6_repair_tree(struct net *net,
0061 struct fib6_table *table,
0062 struct fib6_node *fn);
0063 static int fib6_walk(struct net *net, struct fib6_walker *w);
0064 static int fib6_walk_continue(struct fib6_walker *w);
0065
0066
0067
0068
0069
0070
0071
0072
0073 static void fib6_gc_timer_cb(struct timer_list *t);
0074
0075 #define FOR_WALKERS(net, w) \
0076 list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh)
0077
0078 static void fib6_walker_link(struct net *net, struct fib6_walker *w)
0079 {
0080 write_lock_bh(&net->ipv6.fib6_walker_lock);
0081 list_add(&w->lh, &net->ipv6.fib6_walkers);
0082 write_unlock_bh(&net->ipv6.fib6_walker_lock);
0083 }
0084
0085 static void fib6_walker_unlink(struct net *net, struct fib6_walker *w)
0086 {
0087 write_lock_bh(&net->ipv6.fib6_walker_lock);
0088 list_del(&w->lh);
0089 write_unlock_bh(&net->ipv6.fib6_walker_lock);
0090 }
0091
0092 static int fib6_new_sernum(struct net *net)
0093 {
0094 int new, old;
0095
0096 do {
0097 old = atomic_read(&net->ipv6.fib6_sernum);
0098 new = old < INT_MAX ? old + 1 : 1;
0099 } while (atomic_cmpxchg(&net->ipv6.fib6_sernum,
0100 old, new) != old);
0101 return new;
0102 }
0103
0104 enum {
0105 FIB6_NO_SERNUM_CHANGE = 0,
0106 };
0107
0108 void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
0109 {
0110 struct fib6_node *fn;
0111
0112 fn = rcu_dereference_protected(f6i->fib6_node,
0113 lockdep_is_held(&f6i->fib6_table->tb6_lock));
0114 if (fn)
0115 WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net));
0116 }
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128 #if defined(__LITTLE_ENDIAN)
0129 # define BITOP_BE32_SWIZZLE (0x1F & ~7)
0130 #else
0131 # define BITOP_BE32_SWIZZLE 0
0132 #endif
0133
0134 static __be32 addr_bit_set(const void *token, int fn_bit)
0135 {
0136 const __be32 *addr = token;
0137
0138
0139
0140
0141
0142
0143
0144 return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) &
0145 addr[fn_bit >> 5];
0146 }
0147
0148 struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
0149 {
0150 struct fib6_info *f6i;
0151 size_t sz = sizeof(*f6i);
0152
0153 if (with_fib6_nh)
0154 sz += sizeof(struct fib6_nh);
0155
0156 f6i = kzalloc(sz, gfp_flags);
0157 if (!f6i)
0158 return NULL;
0159
0160
0161 INIT_LIST_HEAD(&f6i->fib6_siblings);
0162 refcount_set(&f6i->fib6_ref, 1);
0163
0164 return f6i;
0165 }
0166
0167 void fib6_info_destroy_rcu(struct rcu_head *head)
0168 {
0169 struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
0170
0171 WARN_ON(f6i->fib6_node);
0172
0173 if (f6i->nh)
0174 nexthop_put(f6i->nh);
0175 else
0176 fib6_nh_release(f6i->fib6_nh);
0177
0178 ip_fib_metrics_put(f6i->fib6_metrics);
0179 kfree(f6i);
0180 }
0181 EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);
0182
0183 static struct fib6_node *node_alloc(struct net *net)
0184 {
0185 struct fib6_node *fn;
0186
0187 fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
0188 if (fn)
0189 net->ipv6.rt6_stats->fib_nodes++;
0190
0191 return fn;
0192 }
0193
0194 static void node_free_immediate(struct net *net, struct fib6_node *fn)
0195 {
0196 kmem_cache_free(fib6_node_kmem, fn);
0197 net->ipv6.rt6_stats->fib_nodes--;
0198 }
0199
0200 static void node_free_rcu(struct rcu_head *head)
0201 {
0202 struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
0203
0204 kmem_cache_free(fib6_node_kmem, fn);
0205 }
0206
0207 static void node_free(struct net *net, struct fib6_node *fn)
0208 {
0209 call_rcu(&fn->rcu, node_free_rcu);
0210 net->ipv6.rt6_stats->fib_nodes--;
0211 }
0212
0213 static void fib6_free_table(struct fib6_table *table)
0214 {
0215 inetpeer_invalidate_tree(&table->tb6_peers);
0216 kfree(table);
0217 }
0218
0219 static void fib6_link_table(struct net *net, struct fib6_table *tb)
0220 {
0221 unsigned int h;
0222
0223
0224
0225
0226
0227 spin_lock_init(&tb->tb6_lock);
0228 h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
0229
0230
0231
0232
0233
0234 hlist_add_head_rcu(&tb->tb6_hlist, &net->ipv6.fib_table_hash[h]);
0235 }
0236
0237 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
0238
0239 static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
0240 {
0241 struct fib6_table *table;
0242
0243 table = kzalloc(sizeof(*table), GFP_ATOMIC);
0244 if (table) {
0245 table->tb6_id = id;
0246 rcu_assign_pointer(table->tb6_root.leaf,
0247 net->ipv6.fib6_null_entry);
0248 table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
0249 inet_peer_base_init(&table->tb6_peers);
0250 }
0251
0252 return table;
0253 }
0254
0255 struct fib6_table *fib6_new_table(struct net *net, u32 id)
0256 {
0257 struct fib6_table *tb;
0258
0259 if (id == 0)
0260 id = RT6_TABLE_MAIN;
0261 tb = fib6_get_table(net, id);
0262 if (tb)
0263 return tb;
0264
0265 tb = fib6_alloc_table(net, id);
0266 if (tb)
0267 fib6_link_table(net, tb);
0268
0269 return tb;
0270 }
0271 EXPORT_SYMBOL_GPL(fib6_new_table);
0272
0273 struct fib6_table *fib6_get_table(struct net *net, u32 id)
0274 {
0275 struct fib6_table *tb;
0276 struct hlist_head *head;
0277 unsigned int h;
0278
0279 if (id == 0)
0280 id = RT6_TABLE_MAIN;
0281 h = id & (FIB6_TABLE_HASHSZ - 1);
0282 rcu_read_lock();
0283 head = &net->ipv6.fib_table_hash[h];
0284 hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
0285 if (tb->tb6_id == id) {
0286 rcu_read_unlock();
0287 return tb;
0288 }
0289 }
0290 rcu_read_unlock();
0291
0292 return NULL;
0293 }
0294 EXPORT_SYMBOL_GPL(fib6_get_table);
0295
0296 static void __net_init fib6_tables_init(struct net *net)
0297 {
0298 fib6_link_table(net, net->ipv6.fib6_main_tbl);
0299 fib6_link_table(net, net->ipv6.fib6_local_tbl);
0300 }
0301 #else
0302
0303 struct fib6_table *fib6_new_table(struct net *net, u32 id)
0304 {
0305 return fib6_get_table(net, id);
0306 }
0307
0308 struct fib6_table *fib6_get_table(struct net *net, u32 id)
0309 {
0310 return net->ipv6.fib6_main_tbl;
0311 }
0312
0313 struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
0314 const struct sk_buff *skb,
0315 int flags, pol_lookup_t lookup)
0316 {
0317 struct rt6_info *rt;
0318
0319 rt = pol_lookup_func(lookup,
0320 net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
0321 if (rt->dst.error == -EAGAIN) {
0322 ip6_rt_put_flags(rt, flags);
0323 rt = net->ipv6.ip6_null_entry;
0324 if (!(flags & RT6_LOOKUP_F_DST_NOREF))
0325 dst_hold(&rt->dst);
0326 }
0327
0328 return &rt->dst;
0329 }
0330
0331
0332 int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
0333 struct fib6_result *res, int flags)
0334 {
0335 return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6,
0336 res, flags);
0337 }
0338
0339 static void __net_init fib6_tables_init(struct net *net)
0340 {
0341 fib6_link_table(net, net->ipv6.fib6_main_tbl);
0342 }
0343
0344 #endif
0345
0346 unsigned int fib6_tables_seq_read(struct net *net)
0347 {
0348 unsigned int h, fib_seq = 0;
0349
0350 rcu_read_lock();
0351 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
0352 struct hlist_head *head = &net->ipv6.fib_table_hash[h];
0353 struct fib6_table *tb;
0354
0355 hlist_for_each_entry_rcu(tb, head, tb6_hlist)
0356 fib_seq += tb->fib_seq;
0357 }
0358 rcu_read_unlock();
0359
0360 return fib_seq;
0361 }
0362
0363 static int call_fib6_entry_notifier(struct notifier_block *nb,
0364 enum fib_event_type event_type,
0365 struct fib6_info *rt,
0366 struct netlink_ext_ack *extack)
0367 {
0368 struct fib6_entry_notifier_info info = {
0369 .info.extack = extack,
0370 .rt = rt,
0371 };
0372
0373 return call_fib6_notifier(nb, event_type, &info.info);
0374 }
0375
0376 static int call_fib6_multipath_entry_notifier(struct notifier_block *nb,
0377 enum fib_event_type event_type,
0378 struct fib6_info *rt,
0379 unsigned int nsiblings,
0380 struct netlink_ext_ack *extack)
0381 {
0382 struct fib6_entry_notifier_info info = {
0383 .info.extack = extack,
0384 .rt = rt,
0385 .nsiblings = nsiblings,
0386 };
0387
0388 return call_fib6_notifier(nb, event_type, &info.info);
0389 }
0390
0391 int call_fib6_entry_notifiers(struct net *net,
0392 enum fib_event_type event_type,
0393 struct fib6_info *rt,
0394 struct netlink_ext_ack *extack)
0395 {
0396 struct fib6_entry_notifier_info info = {
0397 .info.extack = extack,
0398 .rt = rt,
0399 };
0400
0401 rt->fib6_table->fib_seq++;
0402 return call_fib6_notifiers(net, event_type, &info.info);
0403 }
0404
0405 int call_fib6_multipath_entry_notifiers(struct net *net,
0406 enum fib_event_type event_type,
0407 struct fib6_info *rt,
0408 unsigned int nsiblings,
0409 struct netlink_ext_ack *extack)
0410 {
0411 struct fib6_entry_notifier_info info = {
0412 .info.extack = extack,
0413 .rt = rt,
0414 .nsiblings = nsiblings,
0415 };
0416
0417 rt->fib6_table->fib_seq++;
0418 return call_fib6_notifiers(net, event_type, &info.info);
0419 }
0420
0421 int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt)
0422 {
0423 struct fib6_entry_notifier_info info = {
0424 .rt = rt,
0425 .nsiblings = rt->fib6_nsiblings,
0426 };
0427
0428 rt->fib6_table->fib_seq++;
0429 return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info);
0430 }
0431
0432 struct fib6_dump_arg {
0433 struct net *net;
0434 struct notifier_block *nb;
0435 struct netlink_ext_ack *extack;
0436 };
0437
0438 static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
0439 {
0440 enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE;
0441 int err;
0442
0443 if (!rt || rt == arg->net->ipv6.fib6_null_entry)
0444 return 0;
0445
0446 if (rt->fib6_nsiblings)
0447 err = call_fib6_multipath_entry_notifier(arg->nb, fib_event,
0448 rt,
0449 rt->fib6_nsiblings,
0450 arg->extack);
0451 else
0452 err = call_fib6_entry_notifier(arg->nb, fib_event, rt,
0453 arg->extack);
0454
0455 return err;
0456 }
0457
0458 static int fib6_node_dump(struct fib6_walker *w)
0459 {
0460 int err;
0461
0462 err = fib6_rt_dump(w->leaf, w->args);
0463 w->leaf = NULL;
0464 return err;
0465 }
0466
0467 static int fib6_table_dump(struct net *net, struct fib6_table *tb,
0468 struct fib6_walker *w)
0469 {
0470 int err;
0471
0472 w->root = &tb->tb6_root;
0473 spin_lock_bh(&tb->tb6_lock);
0474 err = fib6_walk(net, w);
0475 spin_unlock_bh(&tb->tb6_lock);
0476 return err;
0477 }
0478
0479
0480 int fib6_tables_dump(struct net *net, struct notifier_block *nb,
0481 struct netlink_ext_ack *extack)
0482 {
0483 struct fib6_dump_arg arg;
0484 struct fib6_walker *w;
0485 unsigned int h;
0486 int err = 0;
0487
0488 w = kzalloc(sizeof(*w), GFP_ATOMIC);
0489 if (!w)
0490 return -ENOMEM;
0491
0492 w->func = fib6_node_dump;
0493 arg.net = net;
0494 arg.nb = nb;
0495 arg.extack = extack;
0496 w->args = &arg;
0497
0498 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
0499 struct hlist_head *head = &net->ipv6.fib_table_hash[h];
0500 struct fib6_table *tb;
0501
0502 hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
0503 err = fib6_table_dump(net, tb, w);
0504 if (err)
0505 goto out;
0506 }
0507 }
0508
0509 out:
0510 kfree(w);
0511
0512
0513 return err > 0 ? -EINVAL : err;
0514 }
0515
0516 static int fib6_dump_node(struct fib6_walker *w)
0517 {
0518 int res;
0519 struct fib6_info *rt;
0520
0521 for_each_fib6_walker_rt(w) {
0522 res = rt6_dump_route(rt, w->args, w->skip_in_node);
0523 if (res >= 0) {
0524
0525 w->leaf = rt;
0526
0527
0528
0529
0530 w->skip_in_node += res;
0531
0532 return 1;
0533 }
0534 w->skip_in_node = 0;
0535
0536
0537
0538
0539
0540
0541 if (rt->fib6_nsiblings)
0542 rt = list_last_entry(&rt->fib6_siblings,
0543 struct fib6_info,
0544 fib6_siblings);
0545 }
0546 w->leaf = NULL;
0547 return 0;
0548 }
0549
0550 static void fib6_dump_end(struct netlink_callback *cb)
0551 {
0552 struct net *net = sock_net(cb->skb->sk);
0553 struct fib6_walker *w = (void *)cb->args[2];
0554
0555 if (w) {
0556 if (cb->args[4]) {
0557 cb->args[4] = 0;
0558 fib6_walker_unlink(net, w);
0559 }
0560 cb->args[2] = 0;
0561 kfree(w);
0562 }
0563 cb->done = (void *)cb->args[3];
0564 cb->args[1] = 3;
0565 }
0566
0567 static int fib6_dump_done(struct netlink_callback *cb)
0568 {
0569 fib6_dump_end(cb);
0570 return cb->done ? cb->done(cb) : 0;
0571 }
0572
0573 static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
0574 struct netlink_callback *cb)
0575 {
0576 struct net *net = sock_net(skb->sk);
0577 struct fib6_walker *w;
0578 int res;
0579
0580 w = (void *)cb->args[2];
0581 w->root = &table->tb6_root;
0582
0583 if (cb->args[4] == 0) {
0584 w->count = 0;
0585 w->skip = 0;
0586 w->skip_in_node = 0;
0587
0588 spin_lock_bh(&table->tb6_lock);
0589 res = fib6_walk(net, w);
0590 spin_unlock_bh(&table->tb6_lock);
0591 if (res > 0) {
0592 cb->args[4] = 1;
0593 cb->args[5] = READ_ONCE(w->root->fn_sernum);
0594 }
0595 } else {
0596 int sernum = READ_ONCE(w->root->fn_sernum);
0597 if (cb->args[5] != sernum) {
0598
0599 cb->args[5] = sernum;
0600 w->state = FWS_INIT;
0601 w->node = w->root;
0602 w->skip = w->count;
0603 w->skip_in_node = 0;
0604 } else
0605 w->skip = 0;
0606
0607 spin_lock_bh(&table->tb6_lock);
0608 res = fib6_walk_continue(w);
0609 spin_unlock_bh(&table->tb6_lock);
0610 if (res <= 0) {
0611 fib6_walker_unlink(net, w);
0612 cb->args[4] = 0;
0613 }
0614 }
0615
0616 return res;
0617 }
0618
0619 static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
0620 {
0621 struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true,
0622 .filter.dump_routes = true };
0623 const struct nlmsghdr *nlh = cb->nlh;
0624 struct net *net = sock_net(skb->sk);
0625 unsigned int h, s_h;
0626 unsigned int e = 0, s_e;
0627 struct fib6_walker *w;
0628 struct fib6_table *tb;
0629 struct hlist_head *head;
0630 int res = 0;
0631
0632 if (cb->strict_check) {
0633 int err;
0634
0635 err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb);
0636 if (err < 0)
0637 return err;
0638 } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) {
0639 struct rtmsg *rtm = nlmsg_data(nlh);
0640
0641 if (rtm->rtm_flags & RTM_F_PREFIX)
0642 arg.filter.flags = RTM_F_PREFIX;
0643 }
0644
0645 w = (void *)cb->args[2];
0646 if (!w) {
0647
0648
0649
0650
0651 cb->args[3] = (long)cb->done;
0652 cb->done = fib6_dump_done;
0653
0654
0655
0656
0657 w = kzalloc(sizeof(*w), GFP_ATOMIC);
0658 if (!w)
0659 return -ENOMEM;
0660 w->func = fib6_dump_node;
0661 cb->args[2] = (long)w;
0662 }
0663
0664 arg.skb = skb;
0665 arg.cb = cb;
0666 arg.net = net;
0667 w->args = &arg;
0668
0669 if (arg.filter.table_id) {
0670 tb = fib6_get_table(net, arg.filter.table_id);
0671 if (!tb) {
0672 if (rtnl_msg_family(cb->nlh) != PF_INET6)
0673 goto out;
0674
0675 NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
0676 return -ENOENT;
0677 }
0678
0679 if (!cb->args[0]) {
0680 res = fib6_dump_table(tb, skb, cb);
0681 if (!res)
0682 cb->args[0] = 1;
0683 }
0684 goto out;
0685 }
0686
0687 s_h = cb->args[0];
0688 s_e = cb->args[1];
0689
0690 rcu_read_lock();
0691 for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
0692 e = 0;
0693 head = &net->ipv6.fib_table_hash[h];
0694 hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
0695 if (e < s_e)
0696 goto next;
0697 res = fib6_dump_table(tb, skb, cb);
0698 if (res != 0)
0699 goto out_unlock;
0700 next:
0701 e++;
0702 }
0703 }
0704 out_unlock:
0705 rcu_read_unlock();
0706 cb->args[1] = e;
0707 cb->args[0] = h;
0708 out:
0709 res = res < 0 ? res : skb->len;
0710 if (res <= 0)
0711 fib6_dump_end(cb);
0712 return res;
0713 }
0714
0715 void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val)
0716 {
0717 if (!f6i)
0718 return;
0719
0720 if (f6i->fib6_metrics == &dst_default_metrics) {
0721 struct dst_metrics *p = kzalloc(sizeof(*p), GFP_ATOMIC);
0722
0723 if (!p)
0724 return;
0725
0726 refcount_set(&p->refcnt, 1);
0727 f6i->fib6_metrics = p;
0728 }
0729
0730 f6i->fib6_metrics->metrics[metric - 1] = val;
0731 }
0732
0733
0734
0735
0736
0737
0738
0739
0740
0741 static struct fib6_node *fib6_add_1(struct net *net,
0742 struct fib6_table *table,
0743 struct fib6_node *root,
0744 struct in6_addr *addr, int plen,
0745 int offset, int allow_create,
0746 int replace_required,
0747 struct netlink_ext_ack *extack)
0748 {
0749 struct fib6_node *fn, *in, *ln;
0750 struct fib6_node *pn = NULL;
0751 struct rt6key *key;
0752 int bit;
0753 __be32 dir = 0;
0754
0755 RT6_TRACE("fib6_add_1\n");
0756
0757
0758
0759 fn = root;
0760
0761 do {
0762 struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
0763 lockdep_is_held(&table->tb6_lock));
0764 key = (struct rt6key *)((u8 *)leaf + offset);
0765
0766
0767
0768
0769 if (plen < fn->fn_bit ||
0770 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) {
0771 if (!allow_create) {
0772 if (replace_required) {
0773 NL_SET_ERR_MSG(extack,
0774 "Can not replace route - no match found");
0775 pr_warn("Can't replace route, no match found\n");
0776 return ERR_PTR(-ENOENT);
0777 }
0778 pr_warn("NLM_F_CREATE should be set when creating new route\n");
0779 }
0780 goto insert_above;
0781 }
0782
0783
0784
0785
0786
0787 if (plen == fn->fn_bit) {
0788
0789 if (!(fn->fn_flags & RTN_RTINFO)) {
0790 RCU_INIT_POINTER(fn->leaf, NULL);
0791 fib6_info_release(leaf);
0792
0793 } else if (fn->fn_flags & RTN_TL_ROOT &&
0794 rcu_access_pointer(fn->leaf) ==
0795 net->ipv6.fib6_null_entry) {
0796 RCU_INIT_POINTER(fn->leaf, NULL);
0797 }
0798
0799 return fn;
0800 }
0801
0802
0803
0804
0805
0806
0807 dir = addr_bit_set(addr, fn->fn_bit);
0808 pn = fn;
0809 fn = dir ?
0810 rcu_dereference_protected(fn->right,
0811 lockdep_is_held(&table->tb6_lock)) :
0812 rcu_dereference_protected(fn->left,
0813 lockdep_is_held(&table->tb6_lock));
0814 } while (fn);
0815
0816 if (!allow_create) {
0817
0818
0819
0820
0821
0822
0823
0824
0825
0826 if (replace_required) {
0827 NL_SET_ERR_MSG(extack,
0828 "Can not replace route - no match found");
0829 pr_warn("Can't replace route, no match found\n");
0830 return ERR_PTR(-ENOENT);
0831 }
0832 pr_warn("NLM_F_CREATE should be set when creating new route\n");
0833 }
0834
0835
0836
0837
0838
0839 ln = node_alloc(net);
0840
0841 if (!ln)
0842 return ERR_PTR(-ENOMEM);
0843 ln->fn_bit = plen;
0844 RCU_INIT_POINTER(ln->parent, pn);
0845
0846 if (dir)
0847 rcu_assign_pointer(pn->right, ln);
0848 else
0849 rcu_assign_pointer(pn->left, ln);
0850
0851 return ln;
0852
0853
0854 insert_above:
0855
0856
0857
0858
0859
0860
0861
0862
0863 pn = rcu_dereference_protected(fn->parent,
0864 lockdep_is_held(&table->tb6_lock));
0865
0866
0867
0868
0869
0870
0871
0872 bit = __ipv6_addr_diff(addr, &key->addr, sizeof(*addr));
0873
0874
0875
0876
0877
0878
0879 if (plen > bit) {
0880 in = node_alloc(net);
0881 ln = node_alloc(net);
0882
0883 if (!in || !ln) {
0884 if (in)
0885 node_free_immediate(net, in);
0886 if (ln)
0887 node_free_immediate(net, ln);
0888 return ERR_PTR(-ENOMEM);
0889 }
0890
0891
0892
0893
0894
0895
0896
0897
0898
0899 in->fn_bit = bit;
0900
0901 RCU_INIT_POINTER(in->parent, pn);
0902 in->leaf = fn->leaf;
0903 fib6_info_hold(rcu_dereference_protected(in->leaf,
0904 lockdep_is_held(&table->tb6_lock)));
0905
0906
0907 if (dir)
0908 rcu_assign_pointer(pn->right, in);
0909 else
0910 rcu_assign_pointer(pn->left, in);
0911
0912 ln->fn_bit = plen;
0913
0914 RCU_INIT_POINTER(ln->parent, in);
0915 rcu_assign_pointer(fn->parent, in);
0916
0917 if (addr_bit_set(addr, bit)) {
0918 rcu_assign_pointer(in->right, ln);
0919 rcu_assign_pointer(in->left, fn);
0920 } else {
0921 rcu_assign_pointer(in->left, ln);
0922 rcu_assign_pointer(in->right, fn);
0923 }
0924 } else {
0925
0926
0927
0928
0929
0930
0931
0932 ln = node_alloc(net);
0933
0934 if (!ln)
0935 return ERR_PTR(-ENOMEM);
0936
0937 ln->fn_bit = plen;
0938
0939 RCU_INIT_POINTER(ln->parent, pn);
0940
0941 if (addr_bit_set(&key->addr, plen))
0942 RCU_INIT_POINTER(ln->right, fn);
0943 else
0944 RCU_INIT_POINTER(ln->left, fn);
0945
0946 rcu_assign_pointer(fn->parent, ln);
0947
0948 if (dir)
0949 rcu_assign_pointer(pn->right, ln);
0950 else
0951 rcu_assign_pointer(pn->left, ln);
0952 }
0953 return ln;
0954 }
0955
0956 static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
0957 const struct fib6_info *match,
0958 const struct fib6_table *table)
0959 {
0960 int cpu;
0961
0962 if (!fib6_nh->rt6i_pcpu)
0963 return;
0964
0965
0966
0967
0968 for_each_possible_cpu(cpu) {
0969 struct rt6_info **ppcpu_rt;
0970 struct rt6_info *pcpu_rt;
0971
0972 ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
0973 pcpu_rt = *ppcpu_rt;
0974
0975
0976
0977
0978
0979
0980 if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) {
0981 struct fib6_info *from;
0982
0983 from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
0984 fib6_info_release(from);
0985 }
0986 }
0987 }
0988
0989 struct fib6_nh_pcpu_arg {
0990 struct fib6_info *from;
0991 const struct fib6_table *table;
0992 };
0993
0994 static int fib6_nh_drop_pcpu_from(struct fib6_nh *nh, void *_arg)
0995 {
0996 struct fib6_nh_pcpu_arg *arg = _arg;
0997
0998 __fib6_drop_pcpu_from(nh, arg->from, arg->table);
0999 return 0;
1000 }
1001
1002 static void fib6_drop_pcpu_from(struct fib6_info *f6i,
1003 const struct fib6_table *table)
1004 {
1005
1006
1007
1008 f6i->fib6_destroying = 1;
1009 mb();
1010
1011 if (f6i->nh) {
1012 struct fib6_nh_pcpu_arg arg = {
1013 .from = f6i,
1014 .table = table
1015 };
1016
1017 nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from,
1018 &arg);
1019 } else {
1020 struct fib6_nh *fib6_nh;
1021
1022 fib6_nh = f6i->fib6_nh;
1023 __fib6_drop_pcpu_from(fib6_nh, f6i, table);
1024 }
1025 }
1026
1027 static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
1028 struct net *net)
1029 {
1030 struct fib6_table *table = rt->fib6_table;
1031
1032
1033 rt6_flush_exceptions(rt);
1034 fib6_drop_pcpu_from(rt, table);
1035
1036 if (rt->nh && !list_empty(&rt->nh_list))
1037 list_del_init(&rt->nh_list);
1038
1039 if (refcount_read(&rt->fib6_ref) != 1) {
1040
1041
1042
1043
1044
1045
1046 while (fn) {
1047 struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
1048 lockdep_is_held(&table->tb6_lock));
1049 struct fib6_info *new_leaf;
1050 if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
1051 new_leaf = fib6_find_prefix(net, table, fn);
1052 fib6_info_hold(new_leaf);
1053
1054 rcu_assign_pointer(fn->leaf, new_leaf);
1055 fib6_info_release(rt);
1056 }
1057 fn = rcu_dereference_protected(fn->parent,
1058 lockdep_is_held(&table->tb6_lock));
1059 }
1060 }
1061 }
1062
1063
1064
1065
1066
1067 static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
1068 struct nl_info *info,
1069 struct netlink_ext_ack *extack)
1070 {
1071 struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
1072 lockdep_is_held(&rt->fib6_table->tb6_lock));
1073 struct fib6_info *iter = NULL;
1074 struct fib6_info __rcu **ins;
1075 struct fib6_info __rcu **fallback_ins = NULL;
1076 int replace = (info->nlh &&
1077 (info->nlh->nlmsg_flags & NLM_F_REPLACE));
1078 int add = (!info->nlh ||
1079 (info->nlh->nlmsg_flags & NLM_F_CREATE));
1080 int found = 0;
1081 bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
1082 bool notify_sibling_rt = false;
1083 u16 nlflags = NLM_F_EXCL;
1084 int err;
1085
1086 if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND))
1087 nlflags |= NLM_F_APPEND;
1088
1089 ins = &fn->leaf;
1090
1091 for (iter = leaf; iter;
1092 iter = rcu_dereference_protected(iter->fib6_next,
1093 lockdep_is_held(&rt->fib6_table->tb6_lock))) {
1094
1095
1096
1097
1098 if (iter->fib6_metric == rt->fib6_metric) {
1099
1100
1101
1102 if (info->nlh &&
1103 (info->nlh->nlmsg_flags & NLM_F_EXCL))
1104 return -EEXIST;
1105
1106 nlflags &= ~NLM_F_EXCL;
1107 if (replace) {
1108 if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
1109 found++;
1110 break;
1111 }
1112 fallback_ins = fallback_ins ?: ins;
1113 goto next_iter;
1114 }
1115
1116 if (rt6_duplicate_nexthop(iter, rt)) {
1117 if (rt->fib6_nsiblings)
1118 rt->fib6_nsiblings = 0;
1119 if (!(iter->fib6_flags & RTF_EXPIRES))
1120 return -EEXIST;
1121 if (!(rt->fib6_flags & RTF_EXPIRES))
1122 fib6_clean_expires(iter);
1123 else
1124 fib6_set_expires(iter, rt->expires);
1125
1126 if (rt->fib6_pmtu)
1127 fib6_metric_set(iter, RTAX_MTU,
1128 rt->fib6_pmtu);
1129 return -EEXIST;
1130 }
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142 if (rt_can_ecmp &&
1143 rt6_qualify_for_ecmp(iter))
1144 rt->fib6_nsiblings++;
1145 }
1146
1147 if (iter->fib6_metric > rt->fib6_metric)
1148 break;
1149
1150 next_iter:
1151 ins = &iter->fib6_next;
1152 }
1153
1154 if (fallback_ins && !found) {
1155
1156
1157
1158 ins = fallback_ins;
1159 iter = rcu_dereference_protected(*ins,
1160 lockdep_is_held(&rt->fib6_table->tb6_lock));
1161 found++;
1162 }
1163
1164
1165 if (ins == &fn->leaf)
1166 fn->rr_ptr = NULL;
1167
1168
1169 if (rt->fib6_nsiblings) {
1170 unsigned int fib6_nsiblings;
1171 struct fib6_info *sibling, *temp_sibling;
1172
1173
1174 sibling = leaf;
1175 notify_sibling_rt = true;
1176 while (sibling) {
1177 if (sibling->fib6_metric == rt->fib6_metric &&
1178 rt6_qualify_for_ecmp(sibling)) {
1179 list_add_tail(&rt->fib6_siblings,
1180 &sibling->fib6_siblings);
1181 break;
1182 }
1183 sibling = rcu_dereference_protected(sibling->fib6_next,
1184 lockdep_is_held(&rt->fib6_table->tb6_lock));
1185 notify_sibling_rt = false;
1186 }
1187
1188
1189
1190
1191 fib6_nsiblings = 0;
1192 list_for_each_entry_safe(sibling, temp_sibling,
1193 &rt->fib6_siblings, fib6_siblings) {
1194 sibling->fib6_nsiblings++;
1195 BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
1196 fib6_nsiblings++;
1197 }
1198 BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
1199 rt6_multipath_rebalance(temp_sibling);
1200 }
1201
1202
1203
1204
1205 if (!replace) {
1206 if (!add)
1207 pr_warn("NLM_F_CREATE should be set when creating new route\n");
1208
1209 add:
1210 nlflags |= NLM_F_CREATE;
1211
1212
1213
1214
1215
1216 if (!info->skip_notify_kernel &&
1217 (notify_sibling_rt || ins == &fn->leaf)) {
1218 enum fib_event_type fib_event;
1219
1220 if (notify_sibling_rt)
1221 fib_event = FIB_EVENT_ENTRY_APPEND;
1222 else
1223 fib_event = FIB_EVENT_ENTRY_REPLACE;
1224 err = call_fib6_entry_notifiers(info->nl_net,
1225 fib_event, rt,
1226 extack);
1227 if (err) {
1228 struct fib6_info *sibling, *next_sibling;
1229
1230
1231
1232
1233 if (!rt->fib6_nsiblings)
1234 return err;
1235
1236 list_for_each_entry_safe(sibling, next_sibling,
1237 &rt->fib6_siblings,
1238 fib6_siblings)
1239 sibling->fib6_nsiblings--;
1240 rt->fib6_nsiblings = 0;
1241 list_del_init(&rt->fib6_siblings);
1242 rt6_multipath_rebalance(next_sibling);
1243 return err;
1244 }
1245 }
1246
1247 rcu_assign_pointer(rt->fib6_next, iter);
1248 fib6_info_hold(rt);
1249 rcu_assign_pointer(rt->fib6_node, fn);
1250 rcu_assign_pointer(*ins, rt);
1251 if (!info->skip_notify)
1252 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
1253 info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
1254
1255 if (!(fn->fn_flags & RTN_RTINFO)) {
1256 info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
1257 fn->fn_flags |= RTN_RTINFO;
1258 }
1259
1260 } else {
1261 int nsiblings;
1262
1263 if (!found) {
1264 if (add)
1265 goto add;
1266 pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
1267 return -ENOENT;
1268 }
1269
1270 if (!info->skip_notify_kernel && ins == &fn->leaf) {
1271 err = call_fib6_entry_notifiers(info->nl_net,
1272 FIB_EVENT_ENTRY_REPLACE,
1273 rt, extack);
1274 if (err)
1275 return err;
1276 }
1277
1278 fib6_info_hold(rt);
1279 rcu_assign_pointer(rt->fib6_node, fn);
1280 rt->fib6_next = iter->fib6_next;
1281 rcu_assign_pointer(*ins, rt);
1282 if (!info->skip_notify)
1283 inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
1284 if (!(fn->fn_flags & RTN_RTINFO)) {
1285 info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
1286 fn->fn_flags |= RTN_RTINFO;
1287 }
1288 nsiblings = iter->fib6_nsiblings;
1289 iter->fib6_node = NULL;
1290 fib6_purge_rt(iter, fn, info->nl_net);
1291 if (rcu_access_pointer(fn->rr_ptr) == iter)
1292 fn->rr_ptr = NULL;
1293 fib6_info_release(iter);
1294
1295 if (nsiblings) {
1296
1297 ins = &rt->fib6_next;
1298 iter = rcu_dereference_protected(*ins,
1299 lockdep_is_held(&rt->fib6_table->tb6_lock));
1300 while (iter) {
1301 if (iter->fib6_metric > rt->fib6_metric)
1302 break;
1303 if (rt6_qualify_for_ecmp(iter)) {
1304 *ins = iter->fib6_next;
1305 iter->fib6_node = NULL;
1306 fib6_purge_rt(iter, fn, info->nl_net);
1307 if (rcu_access_pointer(fn->rr_ptr) == iter)
1308 fn->rr_ptr = NULL;
1309 fib6_info_release(iter);
1310 nsiblings--;
1311 info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
1312 } else {
1313 ins = &iter->fib6_next;
1314 }
1315 iter = rcu_dereference_protected(*ins,
1316 lockdep_is_held(&rt->fib6_table->tb6_lock));
1317 }
1318 WARN_ON(nsiblings != 0);
1319 }
1320 }
1321
1322 return 0;
1323 }
1324
1325 static void fib6_start_gc(struct net *net, struct fib6_info *rt)
1326 {
1327 if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
1328 (rt->fib6_flags & RTF_EXPIRES))
1329 mod_timer(&net->ipv6.ip6_fib_timer,
1330 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
1331 }
1332
1333 void fib6_force_start_gc(struct net *net)
1334 {
1335 if (!timer_pending(&net->ipv6.ip6_fib_timer))
1336 mod_timer(&net->ipv6.ip6_fib_timer,
1337 jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
1338 }
1339
1340 static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
1341 int sernum)
1342 {
1343 struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
1344 lockdep_is_held(&rt->fib6_table->tb6_lock));
1345
1346
1347 smp_wmb();
1348 while (fn) {
1349 WRITE_ONCE(fn->fn_sernum, sernum);
1350 fn = rcu_dereference_protected(fn->parent,
1351 lockdep_is_held(&rt->fib6_table->tb6_lock));
1352 }
1353 }
1354
1355 void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt)
1356 {
1357 __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
1358 }
1359
1360
1361 void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i)
1362 {
1363 spin_lock_bh(&f6i->fib6_table->tb6_lock);
1364 fib6_update_sernum_upto_root(net, f6i);
1365 spin_unlock_bh(&f6i->fib6_table->tb6_lock);
1366 }
1367
1368
1369
1370
1371
1372
1373
1374
1375 int fib6_add(struct fib6_node *root, struct fib6_info *rt,
1376 struct nl_info *info, struct netlink_ext_ack *extack)
1377 {
1378 struct fib6_table *table = rt->fib6_table;
1379 struct fib6_node *fn, *pn = NULL;
1380 int err = -ENOMEM;
1381 int allow_create = 1;
1382 int replace_required = 0;
1383
1384 if (info->nlh) {
1385 if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
1386 allow_create = 0;
1387 if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
1388 replace_required = 1;
1389 }
1390 if (!allow_create && !replace_required)
1391 pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
1392
1393 fn = fib6_add_1(info->nl_net, table, root,
1394 &rt->fib6_dst.addr, rt->fib6_dst.plen,
1395 offsetof(struct fib6_info, fib6_dst), allow_create,
1396 replace_required, extack);
1397 if (IS_ERR(fn)) {
1398 err = PTR_ERR(fn);
1399 fn = NULL;
1400 goto out;
1401 }
1402
1403 pn = fn;
1404
1405 #ifdef CONFIG_IPV6_SUBTREES
1406 if (rt->fib6_src.plen) {
1407 struct fib6_node *sn;
1408
1409 if (!rcu_access_pointer(fn->subtree)) {
1410 struct fib6_node *sfn;
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423 sfn = node_alloc(info->nl_net);
1424 if (!sfn)
1425 goto failure;
1426
1427 fib6_info_hold(info->nl_net->ipv6.fib6_null_entry);
1428 rcu_assign_pointer(sfn->leaf,
1429 info->nl_net->ipv6.fib6_null_entry);
1430 sfn->fn_flags = RTN_ROOT;
1431
1432
1433
1434 sn = fib6_add_1(info->nl_net, table, sfn,
1435 &rt->fib6_src.addr, rt->fib6_src.plen,
1436 offsetof(struct fib6_info, fib6_src),
1437 allow_create, replace_required, extack);
1438
1439 if (IS_ERR(sn)) {
1440
1441
1442
1443
1444 node_free_immediate(info->nl_net, sfn);
1445 err = PTR_ERR(sn);
1446 goto failure;
1447 }
1448
1449
1450 rcu_assign_pointer(sfn->parent, fn);
1451 rcu_assign_pointer(fn->subtree, sfn);
1452 } else {
1453 sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
1454 &rt->fib6_src.addr, rt->fib6_src.plen,
1455 offsetof(struct fib6_info, fib6_src),
1456 allow_create, replace_required, extack);
1457
1458 if (IS_ERR(sn)) {
1459 err = PTR_ERR(sn);
1460 goto failure;
1461 }
1462 }
1463
1464 if (!rcu_access_pointer(fn->leaf)) {
1465 if (fn->fn_flags & RTN_TL_ROOT) {
1466
1467 rcu_assign_pointer(fn->leaf,
1468 info->nl_net->ipv6.fib6_null_entry);
1469 } else {
1470 fib6_info_hold(rt);
1471 rcu_assign_pointer(fn->leaf, rt);
1472 }
1473 }
1474 fn = sn;
1475 }
1476 #endif
1477
1478 err = fib6_add_rt2node(fn, rt, info, extack);
1479 if (!err) {
1480 if (rt->nh)
1481 list_add(&rt->nh_list, &rt->nh->f6i_list);
1482 __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
1483 fib6_start_gc(info->nl_net, rt);
1484 }
1485
1486 out:
1487 if (err) {
1488 #ifdef CONFIG_IPV6_SUBTREES
1489
1490
1491
1492
1493 if (pn != fn) {
1494 struct fib6_info *pn_leaf =
1495 rcu_dereference_protected(pn->leaf,
1496 lockdep_is_held(&table->tb6_lock));
1497 if (pn_leaf == rt) {
1498 pn_leaf = NULL;
1499 RCU_INIT_POINTER(pn->leaf, NULL);
1500 fib6_info_release(rt);
1501 }
1502 if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
1503 pn_leaf = fib6_find_prefix(info->nl_net, table,
1504 pn);
1505 #if RT6_DEBUG >= 2
1506 if (!pn_leaf) {
1507 WARN_ON(!pn_leaf);
1508 pn_leaf =
1509 info->nl_net->ipv6.fib6_null_entry;
1510 }
1511 #endif
1512 fib6_info_hold(pn_leaf);
1513 rcu_assign_pointer(pn->leaf, pn_leaf);
1514 }
1515 }
1516 #endif
1517 goto failure;
1518 } else if (fib6_requires_src(rt)) {
1519 fib6_routes_require_src_inc(info->nl_net);
1520 }
1521 return err;
1522
1523 failure:
1524
1525
1526
1527
1528
1529
1530
1531 if (fn &&
1532 (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
1533 (fn->fn_flags & RTN_TL_ROOT &&
1534 !rcu_access_pointer(fn->leaf))))
1535 fib6_repair_tree(info->nl_net, table, fn);
1536 return err;
1537 }
1538
1539
1540
1541
1542
1543
1544 struct lookup_args {
1545 int offset;
1546 const struct in6_addr *addr;
1547 };
1548
1549 static struct fib6_node *fib6_node_lookup_1(struct fib6_node *root,
1550 struct lookup_args *args)
1551 {
1552 struct fib6_node *fn;
1553 __be32 dir;
1554
1555 if (unlikely(args->offset == 0))
1556 return NULL;
1557
1558
1559
1560
1561
1562 fn = root;
1563
1564 for (;;) {
1565 struct fib6_node *next;
1566
1567 dir = addr_bit_set(args->addr, fn->fn_bit);
1568
1569 next = dir ? rcu_dereference(fn->right) :
1570 rcu_dereference(fn->left);
1571
1572 if (next) {
1573 fn = next;
1574 continue;
1575 }
1576 break;
1577 }
1578
1579 while (fn) {
1580 struct fib6_node *subtree = FIB6_SUBTREE(fn);
1581
1582 if (subtree || fn->fn_flags & RTN_RTINFO) {
1583 struct fib6_info *leaf = rcu_dereference(fn->leaf);
1584 struct rt6key *key;
1585
1586 if (!leaf)
1587 goto backtrack;
1588
1589 key = (struct rt6key *) ((u8 *)leaf + args->offset);
1590
1591 if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
1592 #ifdef CONFIG_IPV6_SUBTREES
1593 if (subtree) {
1594 struct fib6_node *sfn;
1595 sfn = fib6_node_lookup_1(subtree,
1596 args + 1);
1597 if (!sfn)
1598 goto backtrack;
1599 fn = sfn;
1600 }
1601 #endif
1602 if (fn->fn_flags & RTN_RTINFO)
1603 return fn;
1604 }
1605 }
1606 backtrack:
1607 if (fn->fn_flags & RTN_ROOT)
1608 break;
1609
1610 fn = rcu_dereference(fn->parent);
1611 }
1612
1613 return NULL;
1614 }
1615
1616
1617
1618 struct fib6_node *fib6_node_lookup(struct fib6_node *root,
1619 const struct in6_addr *daddr,
1620 const struct in6_addr *saddr)
1621 {
1622 struct fib6_node *fn;
1623 struct lookup_args args[] = {
1624 {
1625 .offset = offsetof(struct fib6_info, fib6_dst),
1626 .addr = daddr,
1627 },
1628 #ifdef CONFIG_IPV6_SUBTREES
1629 {
1630 .offset = offsetof(struct fib6_info, fib6_src),
1631 .addr = saddr,
1632 },
1633 #endif
1634 {
1635 .offset = 0,
1636 }
1637 };
1638
1639 fn = fib6_node_lookup_1(root, daddr ? args : args + 1);
1640 if (!fn || fn->fn_flags & RTN_TL_ROOT)
1641 fn = root;
1642
1643 return fn;
1644 }
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658 static struct fib6_node *fib6_locate_1(struct fib6_node *root,
1659 const struct in6_addr *addr,
1660 int plen, int offset,
1661 bool exact_match)
1662 {
1663 struct fib6_node *fn, *prev = NULL;
1664
1665 for (fn = root; fn ; ) {
1666 struct fib6_info *leaf = rcu_dereference(fn->leaf);
1667 struct rt6key *key;
1668
1669
1670 if (!leaf) {
1671 if (plen <= fn->fn_bit)
1672 goto out;
1673 else
1674 goto next;
1675 }
1676
1677 key = (struct rt6key *)((u8 *)leaf + offset);
1678
1679
1680
1681
1682 if (plen < fn->fn_bit ||
1683 !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
1684 goto out;
1685
1686 if (plen == fn->fn_bit)
1687 return fn;
1688
1689 if (fn->fn_flags & RTN_RTINFO)
1690 prev = fn;
1691
1692 next:
1693
1694
1695
1696 if (addr_bit_set(addr, fn->fn_bit))
1697 fn = rcu_dereference(fn->right);
1698 else
1699 fn = rcu_dereference(fn->left);
1700 }
1701 out:
1702 if (exact_match)
1703 return NULL;
1704 else
1705 return prev;
1706 }
1707
1708 struct fib6_node *fib6_locate(struct fib6_node *root,
1709 const struct in6_addr *daddr, int dst_len,
1710 const struct in6_addr *saddr, int src_len,
1711 bool exact_match)
1712 {
1713 struct fib6_node *fn;
1714
1715 fn = fib6_locate_1(root, daddr, dst_len,
1716 offsetof(struct fib6_info, fib6_dst),
1717 exact_match);
1718
1719 #ifdef CONFIG_IPV6_SUBTREES
1720 if (src_len) {
1721 WARN_ON(saddr == NULL);
1722 if (fn) {
1723 struct fib6_node *subtree = FIB6_SUBTREE(fn);
1724
1725 if (subtree) {
1726 fn = fib6_locate_1(subtree, saddr, src_len,
1727 offsetof(struct fib6_info, fib6_src),
1728 exact_match);
1729 }
1730 }
1731 }
1732 #endif
1733
1734 if (fn && fn->fn_flags & RTN_RTINFO)
1735 return fn;
1736
1737 return NULL;
1738 }
1739
1740
1741
1742
1743
1744
1745
1746 static struct fib6_info *fib6_find_prefix(struct net *net,
1747 struct fib6_table *table,
1748 struct fib6_node *fn)
1749 {
1750 struct fib6_node *child_left, *child_right;
1751
1752 if (fn->fn_flags & RTN_ROOT)
1753 return net->ipv6.fib6_null_entry;
1754
1755 while (fn) {
1756 child_left = rcu_dereference_protected(fn->left,
1757 lockdep_is_held(&table->tb6_lock));
1758 child_right = rcu_dereference_protected(fn->right,
1759 lockdep_is_held(&table->tb6_lock));
1760 if (child_left)
1761 return rcu_dereference_protected(child_left->leaf,
1762 lockdep_is_held(&table->tb6_lock));
1763 if (child_right)
1764 return rcu_dereference_protected(child_right->leaf,
1765 lockdep_is_held(&table->tb6_lock));
1766
1767 fn = FIB6_SUBTREE(fn);
1768 }
1769 return NULL;
1770 }
1771
1772
1773
1774
1775
1776
1777
1778 static struct fib6_node *fib6_repair_tree(struct net *net,
1779 struct fib6_table *table,
1780 struct fib6_node *fn)
1781 {
1782 int children;
1783 int nstate;
1784 struct fib6_node *child;
1785 struct fib6_walker *w;
1786 int iter = 0;
1787
1788
1789 if (fn->fn_flags & RTN_TL_ROOT) {
1790 rcu_assign_pointer(fn->leaf, net->ipv6.fib6_null_entry);
1791 return fn;
1792 }
1793
1794 for (;;) {
1795 struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
1796 lockdep_is_held(&table->tb6_lock));
1797 struct fib6_node *fn_l = rcu_dereference_protected(fn->left,
1798 lockdep_is_held(&table->tb6_lock));
1799 struct fib6_node *pn = rcu_dereference_protected(fn->parent,
1800 lockdep_is_held(&table->tb6_lock));
1801 struct fib6_node *pn_r = rcu_dereference_protected(pn->right,
1802 lockdep_is_held(&table->tb6_lock));
1803 struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
1804 lockdep_is_held(&table->tb6_lock));
1805 struct fib6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
1806 lockdep_is_held(&table->tb6_lock));
1807 struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
1808 lockdep_is_held(&table->tb6_lock));
1809 struct fib6_info *new_fn_leaf;
1810
1811 RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
1812 iter++;
1813
1814 WARN_ON(fn->fn_flags & RTN_RTINFO);
1815 WARN_ON(fn->fn_flags & RTN_TL_ROOT);
1816 WARN_ON(fn_leaf);
1817
1818 children = 0;
1819 child = NULL;
1820 if (fn_r) {
1821 child = fn_r;
1822 children |= 1;
1823 }
1824 if (fn_l) {
1825 child = fn_l;
1826 children |= 2;
1827 }
1828
1829 if (children == 3 || FIB6_SUBTREE(fn)
1830 #ifdef CONFIG_IPV6_SUBTREES
1831
1832 || (children && fn->fn_flags & RTN_ROOT)
1833 #endif
1834 ) {
1835 new_fn_leaf = fib6_find_prefix(net, table, fn);
1836 #if RT6_DEBUG >= 2
1837 if (!new_fn_leaf) {
1838 WARN_ON(!new_fn_leaf);
1839 new_fn_leaf = net->ipv6.fib6_null_entry;
1840 }
1841 #endif
1842 fib6_info_hold(new_fn_leaf);
1843 rcu_assign_pointer(fn->leaf, new_fn_leaf);
1844 return pn;
1845 }
1846
1847 #ifdef CONFIG_IPV6_SUBTREES
1848 if (FIB6_SUBTREE(pn) == fn) {
1849 WARN_ON(!(fn->fn_flags & RTN_ROOT));
1850 RCU_INIT_POINTER(pn->subtree, NULL);
1851 nstate = FWS_L;
1852 } else {
1853 WARN_ON(fn->fn_flags & RTN_ROOT);
1854 #endif
1855 if (pn_r == fn)
1856 rcu_assign_pointer(pn->right, child);
1857 else if (pn_l == fn)
1858 rcu_assign_pointer(pn->left, child);
1859 #if RT6_DEBUG >= 2
1860 else
1861 WARN_ON(1);
1862 #endif
1863 if (child)
1864 rcu_assign_pointer(child->parent, pn);
1865 nstate = FWS_R;
1866 #ifdef CONFIG_IPV6_SUBTREES
1867 }
1868 #endif
1869
1870 read_lock(&net->ipv6.fib6_walker_lock);
1871 FOR_WALKERS(net, w) {
1872 if (!child) {
1873 if (w->node == fn) {
1874 RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
1875 w->node = pn;
1876 w->state = nstate;
1877 }
1878 } else {
1879 if (w->node == fn) {
1880 w->node = child;
1881 if (children&2) {
1882 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
1883 w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;
1884 } else {
1885 RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
1886 w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;
1887 }
1888 }
1889 }
1890 }
1891 read_unlock(&net->ipv6.fib6_walker_lock);
1892
1893 node_free(net, fn);
1894 if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
1895 return pn;
1896
1897 RCU_INIT_POINTER(pn->leaf, NULL);
1898 fib6_info_release(pn_leaf);
1899 fn = pn;
1900 }
1901 }
1902
1903 static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
1904 struct fib6_info __rcu **rtp, struct nl_info *info)
1905 {
1906 struct fib6_info *leaf, *replace_rt = NULL;
1907 struct fib6_walker *w;
1908 struct fib6_info *rt = rcu_dereference_protected(*rtp,
1909 lockdep_is_held(&table->tb6_lock));
1910 struct net *net = info->nl_net;
1911 bool notify_del = false;
1912
1913 RT6_TRACE("fib6_del_route\n");
1914
1915
1916
1917
1918
1919 leaf = rcu_dereference_protected(fn->leaf,
1920 lockdep_is_held(&table->tb6_lock));
1921 if (leaf == rt && !rt->fib6_nsiblings) {
1922 if (rcu_access_pointer(rt->fib6_next))
1923 replace_rt = rcu_dereference_protected(rt->fib6_next,
1924 lockdep_is_held(&table->tb6_lock));
1925 else
1926 notify_del = true;
1927 }
1928
1929
1930 *rtp = rt->fib6_next;
1931 rt->fib6_node = NULL;
1932 net->ipv6.rt6_stats->fib_rt_entries--;
1933 net->ipv6.rt6_stats->fib_discarded_routes++;
1934
1935
1936 if (rcu_access_pointer(fn->rr_ptr) == rt)
1937 fn->rr_ptr = NULL;
1938
1939
1940 if (rt->fib6_nsiblings) {
1941 struct fib6_info *sibling, *next_sibling;
1942
1943
1944
1945
1946
1947
1948 if (rt->fib6_metric == leaf->fib6_metric &&
1949 rt6_qualify_for_ecmp(leaf))
1950 notify_del = true;
1951 list_for_each_entry_safe(sibling, next_sibling,
1952 &rt->fib6_siblings, fib6_siblings)
1953 sibling->fib6_nsiblings--;
1954 rt->fib6_nsiblings = 0;
1955 list_del_init(&rt->fib6_siblings);
1956 rt6_multipath_rebalance(next_sibling);
1957 }
1958
1959
1960 read_lock(&net->ipv6.fib6_walker_lock);
1961 FOR_WALKERS(net, w) {
1962 if (w->state == FWS_C && w->leaf == rt) {
1963 RT6_TRACE("walker %p adjusted by delroute\n", w);
1964 w->leaf = rcu_dereference_protected(rt->fib6_next,
1965 lockdep_is_held(&table->tb6_lock));
1966 if (!w->leaf)
1967 w->state = FWS_U;
1968 }
1969 }
1970 read_unlock(&net->ipv6.fib6_walker_lock);
1971
1972
1973
1974
1975
1976 if (!rcu_access_pointer(fn->leaf)) {
1977 if (!(fn->fn_flags & RTN_TL_ROOT)) {
1978 fn->fn_flags &= ~RTN_RTINFO;
1979 net->ipv6.rt6_stats->fib_route_nodes--;
1980 }
1981 fn = fib6_repair_tree(net, table, fn);
1982 }
1983
1984 fib6_purge_rt(rt, fn, net);
1985
1986 if (!info->skip_notify_kernel) {
1987 if (notify_del)
1988 call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
1989 rt, NULL);
1990 else if (replace_rt)
1991 call_fib6_entry_notifiers_replace(net, replace_rt);
1992 }
1993 if (!info->skip_notify)
1994 inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
1995
1996 fib6_info_release(rt);
1997 }
1998
1999
2000 int fib6_del(struct fib6_info *rt, struct nl_info *info)
2001 {
2002 struct net *net = info->nl_net;
2003 struct fib6_info __rcu **rtp;
2004 struct fib6_info __rcu **rtp_next;
2005 struct fib6_table *table;
2006 struct fib6_node *fn;
2007
2008 if (rt == net->ipv6.fib6_null_entry)
2009 return -ENOENT;
2010
2011 table = rt->fib6_table;
2012 fn = rcu_dereference_protected(rt->fib6_node,
2013 lockdep_is_held(&table->tb6_lock));
2014 if (!fn)
2015 return -ENOENT;
2016
2017 WARN_ON(!(fn->fn_flags & RTN_RTINFO));
2018
2019
2020
2021
2022
2023 for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
2024 struct fib6_info *cur = rcu_dereference_protected(*rtp,
2025 lockdep_is_held(&table->tb6_lock));
2026 if (rt == cur) {
2027 if (fib6_requires_src(cur))
2028 fib6_routes_require_src_dec(info->nl_net);
2029 fib6_del_route(table, fn, rtp, info);
2030 return 0;
2031 }
2032 rtp_next = &cur->fib6_next;
2033 }
2034 return -ENOENT;
2035 }
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063 static int fib6_walk_continue(struct fib6_walker *w)
2064 {
2065 struct fib6_node *fn, *pn, *left, *right;
2066
2067
2068 WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT));
2069
2070 for (;;) {
2071 fn = w->node;
2072 if (!fn)
2073 return 0;
2074
2075 switch (w->state) {
2076 #ifdef CONFIG_IPV6_SUBTREES
2077 case FWS_S:
2078 if (FIB6_SUBTREE(fn)) {
2079 w->node = FIB6_SUBTREE(fn);
2080 continue;
2081 }
2082 w->state = FWS_L;
2083 fallthrough;
2084 #endif
2085 case FWS_L:
2086 left = rcu_dereference_protected(fn->left, 1);
2087 if (left) {
2088 w->node = left;
2089 w->state = FWS_INIT;
2090 continue;
2091 }
2092 w->state = FWS_R;
2093 fallthrough;
2094 case FWS_R:
2095 right = rcu_dereference_protected(fn->right, 1);
2096 if (right) {
2097 w->node = right;
2098 w->state = FWS_INIT;
2099 continue;
2100 }
2101 w->state = FWS_C;
2102 w->leaf = rcu_dereference_protected(fn->leaf, 1);
2103 fallthrough;
2104 case FWS_C:
2105 if (w->leaf && fn->fn_flags & RTN_RTINFO) {
2106 int err;
2107
2108 if (w->skip) {
2109 w->skip--;
2110 goto skip;
2111 }
2112
2113 err = w->func(w);
2114 if (err)
2115 return err;
2116
2117 w->count++;
2118 continue;
2119 }
2120 skip:
2121 w->state = FWS_U;
2122 fallthrough;
2123 case FWS_U:
2124 if (fn == w->root)
2125 return 0;
2126 pn = rcu_dereference_protected(fn->parent, 1);
2127 left = rcu_dereference_protected(pn->left, 1);
2128 right = rcu_dereference_protected(pn->right, 1);
2129 w->node = pn;
2130 #ifdef CONFIG_IPV6_SUBTREES
2131 if (FIB6_SUBTREE(pn) == fn) {
2132 WARN_ON(!(fn->fn_flags & RTN_ROOT));
2133 w->state = FWS_L;
2134 continue;
2135 }
2136 #endif
2137 if (left == fn) {
2138 w->state = FWS_R;
2139 continue;
2140 }
2141 if (right == fn) {
2142 w->state = FWS_C;
2143 w->leaf = rcu_dereference_protected(w->node->leaf, 1);
2144 continue;
2145 }
2146 #if RT6_DEBUG >= 2
2147 WARN_ON(1);
2148 #endif
2149 }
2150 }
2151 }
2152
2153 static int fib6_walk(struct net *net, struct fib6_walker *w)
2154 {
2155 int res;
2156
2157 w->state = FWS_INIT;
2158 w->node = w->root;
2159
2160 fib6_walker_link(net, w);
2161 res = fib6_walk_continue(w);
2162 if (res <= 0)
2163 fib6_walker_unlink(net, w);
2164 return res;
2165 }
2166
2167 static int fib6_clean_node(struct fib6_walker *w)
2168 {
2169 int res;
2170 struct fib6_info *rt;
2171 struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w);
2172 struct nl_info info = {
2173 .nl_net = c->net,
2174 .skip_notify = c->skip_notify,
2175 };
2176
2177 if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
2178 READ_ONCE(w->node->fn_sernum) != c->sernum)
2179 WRITE_ONCE(w->node->fn_sernum, c->sernum);
2180
2181 if (!c->func) {
2182 WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
2183 w->leaf = NULL;
2184 return 0;
2185 }
2186
2187 for_each_fib6_walker_rt(w) {
2188 res = c->func(rt, c->arg);
2189 if (res == -1) {
2190 w->leaf = rt;
2191 res = fib6_del(rt, &info);
2192 if (res) {
2193 #if RT6_DEBUG >= 2
2194 pr_debug("%s: del failed: rt=%p@%p err=%d\n",
2195 __func__, rt,
2196 rcu_access_pointer(rt->fib6_node),
2197 res);
2198 #endif
2199 continue;
2200 }
2201 return 0;
2202 } else if (res == -2) {
2203 if (WARN_ON(!rt->fib6_nsiblings))
2204 continue;
2205 rt = list_last_entry(&rt->fib6_siblings,
2206 struct fib6_info, fib6_siblings);
2207 continue;
2208 }
2209 WARN_ON(res != 0);
2210 }
2211 w->leaf = rt;
2212 return 0;
2213 }
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224 static void fib6_clean_tree(struct net *net, struct fib6_node *root,
2225 int (*func)(struct fib6_info *, void *arg),
2226 int sernum, void *arg, bool skip_notify)
2227 {
2228 struct fib6_cleaner c;
2229
2230 c.w.root = root;
2231 c.w.func = fib6_clean_node;
2232 c.w.count = 0;
2233 c.w.skip = 0;
2234 c.w.skip_in_node = 0;
2235 c.func = func;
2236 c.sernum = sernum;
2237 c.arg = arg;
2238 c.net = net;
2239 c.skip_notify = skip_notify;
2240
2241 fib6_walk(net, &c.w);
2242 }
2243
2244 static void __fib6_clean_all(struct net *net,
2245 int (*func)(struct fib6_info *, void *),
2246 int sernum, void *arg, bool skip_notify)
2247 {
2248 struct fib6_table *table;
2249 struct hlist_head *head;
2250 unsigned int h;
2251
2252 rcu_read_lock();
2253 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2254 head = &net->ipv6.fib_table_hash[h];
2255 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2256 spin_lock_bh(&table->tb6_lock);
2257 fib6_clean_tree(net, &table->tb6_root,
2258 func, sernum, arg, skip_notify);
2259 spin_unlock_bh(&table->tb6_lock);
2260 }
2261 }
2262 rcu_read_unlock();
2263 }
2264
2265 void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *),
2266 void *arg)
2267 {
2268 __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, false);
2269 }
2270
2271 void fib6_clean_all_skip_notify(struct net *net,
2272 int (*func)(struct fib6_info *, void *),
2273 void *arg)
2274 {
2275 __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg, true);
2276 }
2277
2278 static void fib6_flush_trees(struct net *net)
2279 {
2280 int new_sernum = fib6_new_sernum(net);
2281
2282 __fib6_clean_all(net, NULL, new_sernum, NULL, false);
2283 }
2284
2285
2286
2287
2288
2289 static int fib6_age(struct fib6_info *rt, void *arg)
2290 {
2291 struct fib6_gc_args *gc_args = arg;
2292 unsigned long now = jiffies;
2293
2294
2295
2296
2297
2298
2299 if (rt->fib6_flags & RTF_EXPIRES && rt->expires) {
2300 if (time_after(now, rt->expires)) {
2301 RT6_TRACE("expiring %p\n", rt);
2302 return -1;
2303 }
2304 gc_args->more++;
2305 }
2306
2307
2308
2309
2310
2311 rt6_age_exceptions(rt, gc_args, now);
2312
2313 return 0;
2314 }
2315
2316 void fib6_run_gc(unsigned long expires, struct net *net, bool force)
2317 {
2318 struct fib6_gc_args gc_args;
2319 unsigned long now;
2320
2321 if (force) {
2322 spin_lock_bh(&net->ipv6.fib6_gc_lock);
2323 } else if (!spin_trylock_bh(&net->ipv6.fib6_gc_lock)) {
2324 mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
2325 return;
2326 }
2327 gc_args.timeout = expires ? (int)expires :
2328 net->ipv6.sysctl.ip6_rt_gc_interval;
2329 gc_args.more = 0;
2330
2331 fib6_clean_all(net, fib6_age, &gc_args);
2332 now = jiffies;
2333 net->ipv6.ip6_rt_last_gc = now;
2334
2335 if (gc_args.more)
2336 mod_timer(&net->ipv6.ip6_fib_timer,
2337 round_jiffies(now
2338 + net->ipv6.sysctl.ip6_rt_gc_interval));
2339 else
2340 del_timer(&net->ipv6.ip6_fib_timer);
2341 spin_unlock_bh(&net->ipv6.fib6_gc_lock);
2342 }
2343
2344 static void fib6_gc_timer_cb(struct timer_list *t)
2345 {
2346 struct net *arg = from_timer(arg, t, ipv6.ip6_fib_timer);
2347
2348 fib6_run_gc(0, arg, true);
2349 }
2350
2351 static int __net_init fib6_net_init(struct net *net)
2352 {
2353 size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
2354 int err;
2355
2356 err = fib6_notifier_init(net);
2357 if (err)
2358 return err;
2359
2360
2361 net->ipv6.sysctl.multipath_hash_fields =
2362 FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK;
2363
2364 spin_lock_init(&net->ipv6.fib6_gc_lock);
2365 rwlock_init(&net->ipv6.fib6_walker_lock);
2366 INIT_LIST_HEAD(&net->ipv6.fib6_walkers);
2367 timer_setup(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, 0);
2368
2369 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
2370 if (!net->ipv6.rt6_stats)
2371 goto out_notifier;
2372
2373
2374 size = max_t(size_t, size, L1_CACHE_BYTES);
2375
2376 net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL);
2377 if (!net->ipv6.fib_table_hash)
2378 goto out_rt6_stats;
2379
2380 net->ipv6.fib6_main_tbl = kzalloc(sizeof(*net->ipv6.fib6_main_tbl),
2381 GFP_KERNEL);
2382 if (!net->ipv6.fib6_main_tbl)
2383 goto out_fib_table_hash;
2384
2385 net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
2386 rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
2387 net->ipv6.fib6_null_entry);
2388 net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
2389 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
2390 inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
2391
2392 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2393 net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
2394 GFP_KERNEL);
2395 if (!net->ipv6.fib6_local_tbl)
2396 goto out_fib6_main_tbl;
2397 net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
2398 rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
2399 net->ipv6.fib6_null_entry);
2400 net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
2401 RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
2402 inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
2403 #endif
2404 fib6_tables_init(net);
2405
2406 return 0;
2407
2408 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2409 out_fib6_main_tbl:
2410 kfree(net->ipv6.fib6_main_tbl);
2411 #endif
2412 out_fib_table_hash:
2413 kfree(net->ipv6.fib_table_hash);
2414 out_rt6_stats:
2415 kfree(net->ipv6.rt6_stats);
2416 out_notifier:
2417 fib6_notifier_exit(net);
2418 return -ENOMEM;
2419 }
2420
2421 static void fib6_net_exit(struct net *net)
2422 {
2423 unsigned int i;
2424
2425 del_timer_sync(&net->ipv6.ip6_fib_timer);
2426
2427 for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
2428 struct hlist_head *head = &net->ipv6.fib_table_hash[i];
2429 struct hlist_node *tmp;
2430 struct fib6_table *tb;
2431
2432 hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) {
2433 hlist_del(&tb->tb6_hlist);
2434 fib6_free_table(tb);
2435 }
2436 }
2437
2438 kfree(net->ipv6.fib_table_hash);
2439 kfree(net->ipv6.rt6_stats);
2440 fib6_notifier_exit(net);
2441 }
2442
2443 static struct pernet_operations fib6_net_ops = {
2444 .init = fib6_net_init,
2445 .exit = fib6_net_exit,
2446 };
2447
2448 int __init fib6_init(void)
2449 {
2450 int ret = -ENOMEM;
2451
2452 fib6_node_kmem = kmem_cache_create("fib6_nodes",
2453 sizeof(struct fib6_node), 0,
2454 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
2455 NULL);
2456 if (!fib6_node_kmem)
2457 goto out;
2458
2459 ret = register_pernet_subsys(&fib6_net_ops);
2460 if (ret)
2461 goto out_kmem_cache_create;
2462
2463 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
2464 inet6_dump_fib, 0);
2465 if (ret)
2466 goto out_unregister_subsys;
2467
2468 __fib6_flush_trees = fib6_flush_trees;
2469 out:
2470 return ret;
2471
2472 out_unregister_subsys:
2473 unregister_pernet_subsys(&fib6_net_ops);
2474 out_kmem_cache_create:
2475 kmem_cache_destroy(fib6_node_kmem);
2476 goto out;
2477 }
2478
2479 void fib6_gc_cleanup(void)
2480 {
2481 unregister_pernet_subsys(&fib6_net_ops);
2482 kmem_cache_destroy(fib6_node_kmem);
2483 }
2484
2485 #ifdef CONFIG_PROC_FS
2486 static int ipv6_route_native_seq_show(struct seq_file *seq, void *v)
2487 {
2488 struct fib6_info *rt = v;
2489 struct ipv6_route_iter *iter = seq->private;
2490 struct fib6_nh *fib6_nh = rt->fib6_nh;
2491 unsigned int flags = rt->fib6_flags;
2492 const struct net_device *dev;
2493
2494 if (rt->nh)
2495 fib6_nh = nexthop_fib6_nh_bh(rt->nh);
2496
2497 seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
2498
2499 #ifdef CONFIG_IPV6_SUBTREES
2500 seq_printf(seq, "%pi6 %02x ", &rt->fib6_src.addr, rt->fib6_src.plen);
2501 #else
2502 seq_puts(seq, "00000000000000000000000000000000 00 ");
2503 #endif
2504 if (fib6_nh->fib_nh_gw_family) {
2505 flags |= RTF_GATEWAY;
2506 seq_printf(seq, "%pi6", &fib6_nh->fib_nh_gw6);
2507 } else {
2508 seq_puts(seq, "00000000000000000000000000000000");
2509 }
2510
2511 dev = fib6_nh->fib_nh_dev;
2512 seq_printf(seq, " %08x %08x %08x %08x %8s\n",
2513 rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
2514 flags, dev ? dev->name : "");
2515 iter->w.leaf = NULL;
2516 return 0;
2517 }
2518
2519 static int ipv6_route_yield(struct fib6_walker *w)
2520 {
2521 struct ipv6_route_iter *iter = w->args;
2522
2523 if (!iter->skip)
2524 return 1;
2525
2526 do {
2527 iter->w.leaf = rcu_dereference_protected(
2528 iter->w.leaf->fib6_next,
2529 lockdep_is_held(&iter->tbl->tb6_lock));
2530 iter->skip--;
2531 if (!iter->skip && iter->w.leaf)
2532 return 1;
2533 } while (iter->w.leaf);
2534
2535 return 0;
2536 }
2537
2538 static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
2539 struct net *net)
2540 {
2541 memset(&iter->w, 0, sizeof(iter->w));
2542 iter->w.func = ipv6_route_yield;
2543 iter->w.root = &iter->tbl->tb6_root;
2544 iter->w.state = FWS_INIT;
2545 iter->w.node = iter->w.root;
2546 iter->w.args = iter;
2547 iter->sernum = READ_ONCE(iter->w.root->fn_sernum);
2548 INIT_LIST_HEAD(&iter->w.lh);
2549 fib6_walker_link(net, &iter->w);
2550 }
2551
2552 static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
2553 struct net *net)
2554 {
2555 unsigned int h;
2556 struct hlist_node *node;
2557
2558 if (tbl) {
2559 h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1;
2560 node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist));
2561 } else {
2562 h = 0;
2563 node = NULL;
2564 }
2565
2566 while (!node && h < FIB6_TABLE_HASHSZ) {
2567 node = rcu_dereference_bh(
2568 hlist_first_rcu(&net->ipv6.fib_table_hash[h++]));
2569 }
2570 return hlist_entry_safe(node, struct fib6_table, tb6_hlist);
2571 }
2572
2573 static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
2574 {
2575 int sernum = READ_ONCE(iter->w.root->fn_sernum);
2576
2577 if (iter->sernum != sernum) {
2578 iter->sernum = sernum;
2579 iter->w.state = FWS_INIT;
2580 iter->w.node = iter->w.root;
2581 WARN_ON(iter->w.skip);
2582 iter->w.skip = iter->w.count;
2583 }
2584 }
2585
2586 static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2587 {
2588 int r;
2589 struct fib6_info *n;
2590 struct net *net = seq_file_net(seq);
2591 struct ipv6_route_iter *iter = seq->private;
2592
2593 ++(*pos);
2594 if (!v)
2595 goto iter_table;
2596
2597 n = rcu_dereference_bh(((struct fib6_info *)v)->fib6_next);
2598 if (n)
2599 return n;
2600
2601 iter_table:
2602 ipv6_route_check_sernum(iter);
2603 spin_lock_bh(&iter->tbl->tb6_lock);
2604 r = fib6_walk_continue(&iter->w);
2605 spin_unlock_bh(&iter->tbl->tb6_lock);
2606 if (r > 0) {
2607 return iter->w.leaf;
2608 } else if (r < 0) {
2609 fib6_walker_unlink(net, &iter->w);
2610 return NULL;
2611 }
2612 fib6_walker_unlink(net, &iter->w);
2613
2614 iter->tbl = ipv6_route_seq_next_table(iter->tbl, net);
2615 if (!iter->tbl)
2616 return NULL;
2617
2618 ipv6_route_seq_setup_walk(iter, net);
2619 goto iter_table;
2620 }
2621
2622 static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
2623 __acquires(RCU_BH)
2624 {
2625 struct net *net = seq_file_net(seq);
2626 struct ipv6_route_iter *iter = seq->private;
2627
2628 rcu_read_lock_bh();
2629 iter->tbl = ipv6_route_seq_next_table(NULL, net);
2630 iter->skip = *pos;
2631
2632 if (iter->tbl) {
2633 loff_t p = 0;
2634
2635 ipv6_route_seq_setup_walk(iter, net);
2636 return ipv6_route_seq_next(seq, NULL, &p);
2637 } else {
2638 return NULL;
2639 }
2640 }
2641
2642 static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
2643 {
2644 struct fib6_walker *w = &iter->w;
2645 return w->node && !(w->state == FWS_U && w->node == w->root);
2646 }
2647
2648 static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
2649 __releases(RCU_BH)
2650 {
2651 struct net *net = seq_file_net(seq);
2652 struct ipv6_route_iter *iter = seq->private;
2653
2654 if (ipv6_route_iter_active(iter))
2655 fib6_walker_unlink(net, &iter->w);
2656
2657 rcu_read_unlock_bh();
2658 }
2659
2660 #if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
2661 static int ipv6_route_prog_seq_show(struct bpf_prog *prog,
2662 struct bpf_iter_meta *meta,
2663 void *v)
2664 {
2665 struct bpf_iter__ipv6_route ctx;
2666
2667 ctx.meta = meta;
2668 ctx.rt = v;
2669 return bpf_iter_run_prog(prog, &ctx);
2670 }
2671
2672 static int ipv6_route_seq_show(struct seq_file *seq, void *v)
2673 {
2674 struct ipv6_route_iter *iter = seq->private;
2675 struct bpf_iter_meta meta;
2676 struct bpf_prog *prog;
2677 int ret;
2678
2679 meta.seq = seq;
2680 prog = bpf_iter_get_info(&meta, false);
2681 if (!prog)
2682 return ipv6_route_native_seq_show(seq, v);
2683
2684 ret = ipv6_route_prog_seq_show(prog, &meta, v);
2685 iter->w.leaf = NULL;
2686
2687 return ret;
2688 }
2689
2690 static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
2691 {
2692 struct bpf_iter_meta meta;
2693 struct bpf_prog *prog;
2694
2695 if (!v) {
2696 meta.seq = seq;
2697 prog = bpf_iter_get_info(&meta, true);
2698 if (prog)
2699 (void)ipv6_route_prog_seq_show(prog, &meta, v);
2700 }
2701
2702 ipv6_route_native_seq_stop(seq, v);
2703 }
2704 #else
2705 static int ipv6_route_seq_show(struct seq_file *seq, void *v)
2706 {
2707 return ipv6_route_native_seq_show(seq, v);
2708 }
2709
2710 static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
2711 {
2712 ipv6_route_native_seq_stop(seq, v);
2713 }
2714 #endif
2715
2716 const struct seq_operations ipv6_route_seq_ops = {
2717 .start = ipv6_route_seq_start,
2718 .next = ipv6_route_seq_next,
2719 .stop = ipv6_route_seq_stop,
2720 .show = ipv6_route_seq_show
2721 };
2722 #endif