0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038 #include <linux/cache.h>
0039 #include <linux/uaccess.h>
0040 #include <linux/bitops.h>
0041 #include <linux/types.h>
0042 #include <linux/kernel.h>
0043 #include <linux/mm.h>
0044 #include <linux/string.h>
0045 #include <linux/socket.h>
0046 #include <linux/sockios.h>
0047 #include <linux/errno.h>
0048 #include <linux/in.h>
0049 #include <linux/inet.h>
0050 #include <linux/inetdevice.h>
0051 #include <linux/netdevice.h>
0052 #include <linux/if_arp.h>
0053 #include <linux/proc_fs.h>
0054 #include <linux/rcupdate.h>
0055 #include <linux/skbuff.h>
0056 #include <linux/netlink.h>
0057 #include <linux/init.h>
0058 #include <linux/list.h>
0059 #include <linux/slab.h>
0060 #include <linux/export.h>
0061 #include <linux/vmalloc.h>
0062 #include <linux/notifier.h>
0063 #include <net/net_namespace.h>
0064 #include <net/inet_dscp.h>
0065 #include <net/ip.h>
0066 #include <net/protocol.h>
0067 #include <net/route.h>
0068 #include <net/tcp.h>
0069 #include <net/sock.h>
0070 #include <net/ip_fib.h>
0071 #include <net/fib_notifier.h>
0072 #include <trace/events/fib.h>
0073 #include "fib_lookup.h"
0074
0075 static int call_fib_entry_notifier(struct notifier_block *nb,
0076 enum fib_event_type event_type, u32 dst,
0077 int dst_len, struct fib_alias *fa,
0078 struct netlink_ext_ack *extack)
0079 {
0080 struct fib_entry_notifier_info info = {
0081 .info.extack = extack,
0082 .dst = dst,
0083 .dst_len = dst_len,
0084 .fi = fa->fa_info,
0085 .dscp = fa->fa_dscp,
0086 .type = fa->fa_type,
0087 .tb_id = fa->tb_id,
0088 };
0089 return call_fib4_notifier(nb, event_type, &info.info);
0090 }
0091
0092 static int call_fib_entry_notifiers(struct net *net,
0093 enum fib_event_type event_type, u32 dst,
0094 int dst_len, struct fib_alias *fa,
0095 struct netlink_ext_ack *extack)
0096 {
0097 struct fib_entry_notifier_info info = {
0098 .info.extack = extack,
0099 .dst = dst,
0100 .dst_len = dst_len,
0101 .fi = fa->fa_info,
0102 .dscp = fa->fa_dscp,
0103 .type = fa->fa_type,
0104 .tb_id = fa->tb_id,
0105 };
0106 return call_fib4_notifiers(net, event_type, &info.info);
0107 }
0108
0109 #define MAX_STAT_DEPTH 32
0110
0111 #define KEYLENGTH (8*sizeof(t_key))
0112 #define KEY_MAX ((t_key)~0)
0113
0114 typedef unsigned int t_key;
0115
0116 #define IS_TRIE(n) ((n)->pos >= KEYLENGTH)
0117 #define IS_TNODE(n) ((n)->bits)
0118 #define IS_LEAF(n) (!(n)->bits)
0119
0120 struct key_vector {
0121 t_key key;
0122 unsigned char pos;
0123 unsigned char bits;
0124 unsigned char slen;
0125 union {
0126
0127 struct hlist_head leaf;
0128
0129 struct key_vector __rcu *tnode[0];
0130 };
0131 };
0132
0133 struct tnode {
0134 struct rcu_head rcu;
0135 t_key empty_children;
0136 t_key full_children;
0137 struct key_vector __rcu *parent;
0138 struct key_vector kv[1];
0139 #define tn_bits kv[0].bits
0140 };
0141
0142 #define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n])
0143 #define LEAF_SIZE TNODE_SIZE(1)
0144
0145 #ifdef CONFIG_IP_FIB_TRIE_STATS
0146 struct trie_use_stats {
0147 unsigned int gets;
0148 unsigned int backtrack;
0149 unsigned int semantic_match_passed;
0150 unsigned int semantic_match_miss;
0151 unsigned int null_node_hit;
0152 unsigned int resize_node_skipped;
0153 };
0154 #endif
0155
0156 struct trie_stat {
0157 unsigned int totdepth;
0158 unsigned int maxdepth;
0159 unsigned int tnodes;
0160 unsigned int leaves;
0161 unsigned int nullpointers;
0162 unsigned int prefixes;
0163 unsigned int nodesizes[MAX_STAT_DEPTH];
0164 };
0165
0166 struct trie {
0167 struct key_vector kv[1];
0168 #ifdef CONFIG_IP_FIB_TRIE_STATS
0169 struct trie_use_stats __percpu *stats;
0170 #endif
0171 };
0172
0173 static struct key_vector *resize(struct trie *t, struct key_vector *tn);
0174 static unsigned int tnode_free_size;
0175
0176
0177
0178
0179
0180
0181 unsigned int sysctl_fib_sync_mem = 512 * 1024;
0182 unsigned int sysctl_fib_sync_mem_min = 64 * 1024;
0183 unsigned int sysctl_fib_sync_mem_max = 64 * 1024 * 1024;
0184
0185 static struct kmem_cache *fn_alias_kmem __ro_after_init;
0186 static struct kmem_cache *trie_leaf_kmem __ro_after_init;
0187
0188 static inline struct tnode *tn_info(struct key_vector *kv)
0189 {
0190 return container_of(kv, struct tnode, kv[0]);
0191 }
0192
0193
0194 #define node_parent(tn) rtnl_dereference(tn_info(tn)->parent)
0195 #define get_child(tn, i) rtnl_dereference((tn)->tnode[i])
0196
0197
0198 #define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent)
0199 #define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i])
0200
0201
0202 static inline void node_set_parent(struct key_vector *n, struct key_vector *tp)
0203 {
0204 if (n)
0205 rcu_assign_pointer(tn_info(n)->parent, tp);
0206 }
0207
0208 #define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p)
0209
0210
0211
0212
0213 static inline unsigned long child_length(const struct key_vector *tn)
0214 {
0215 return (1ul << tn->bits) & ~(1ul);
0216 }
0217
0218 #define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos)
0219
0220 static inline unsigned long get_index(t_key key, struct key_vector *kv)
0221 {
0222 unsigned long index = key ^ kv->key;
0223
0224 if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos))
0225 return 0;
0226
0227 return index >> kv->pos;
0228 }
0229
0230
0231
0232
0233
0234
0235
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289 static const int halve_threshold = 25;
0290 static const int inflate_threshold = 50;
0291 static const int halve_threshold_root = 15;
0292 static const int inflate_threshold_root = 30;
0293
0294 static void __alias_free_mem(struct rcu_head *head)
0295 {
0296 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
0297 kmem_cache_free(fn_alias_kmem, fa);
0298 }
0299
0300 static inline void alias_free_mem_rcu(struct fib_alias *fa)
0301 {
0302 call_rcu(&fa->rcu, __alias_free_mem);
0303 }
0304
0305 #define TNODE_VMALLOC_MAX \
0306 ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *))
0307
0308 static void __node_free_rcu(struct rcu_head *head)
0309 {
0310 struct tnode *n = container_of(head, struct tnode, rcu);
0311
0312 if (!n->tn_bits)
0313 kmem_cache_free(trie_leaf_kmem, n);
0314 else
0315 kvfree(n);
0316 }
0317
0318 #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
0319
0320 static struct tnode *tnode_alloc(int bits)
0321 {
0322 size_t size;
0323
0324
0325 if (bits > TNODE_VMALLOC_MAX)
0326 return NULL;
0327
0328
0329 size = TNODE_SIZE(1ul << bits);
0330
0331 if (size <= PAGE_SIZE)
0332 return kzalloc(size, GFP_KERNEL);
0333 else
0334 return vzalloc(size);
0335 }
0336
0337 static inline void empty_child_inc(struct key_vector *n)
0338 {
0339 tn_info(n)->empty_children++;
0340
0341 if (!tn_info(n)->empty_children)
0342 tn_info(n)->full_children++;
0343 }
0344
0345 static inline void empty_child_dec(struct key_vector *n)
0346 {
0347 if (!tn_info(n)->empty_children)
0348 tn_info(n)->full_children--;
0349
0350 tn_info(n)->empty_children--;
0351 }
0352
0353 static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
0354 {
0355 struct key_vector *l;
0356 struct tnode *kv;
0357
0358 kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
0359 if (!kv)
0360 return NULL;
0361
0362
0363 l = kv->kv;
0364 l->key = key;
0365 l->pos = 0;
0366 l->bits = 0;
0367 l->slen = fa->fa_slen;
0368
0369
0370 INIT_HLIST_HEAD(&l->leaf);
0371 hlist_add_head(&fa->fa_list, &l->leaf);
0372
0373 return l;
0374 }
0375
0376 static struct key_vector *tnode_new(t_key key, int pos, int bits)
0377 {
0378 unsigned int shift = pos + bits;
0379 struct key_vector *tn;
0380 struct tnode *tnode;
0381
0382
0383 BUG_ON(!bits || (shift > KEYLENGTH));
0384
0385 tnode = tnode_alloc(bits);
0386 if (!tnode)
0387 return NULL;
0388
0389 pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0),
0390 sizeof(struct key_vector *) << bits);
0391
0392 if (bits == KEYLENGTH)
0393 tnode->full_children = 1;
0394 else
0395 tnode->empty_children = 1ul << bits;
0396
0397 tn = tnode->kv;
0398 tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
0399 tn->pos = pos;
0400 tn->bits = bits;
0401 tn->slen = pos;
0402
0403 return tn;
0404 }
0405
0406
0407
0408
0409 static inline int tnode_full(struct key_vector *tn, struct key_vector *n)
0410 {
0411 return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
0412 }
0413
0414
0415
0416
0417 static void put_child(struct key_vector *tn, unsigned long i,
0418 struct key_vector *n)
0419 {
0420 struct key_vector *chi = get_child(tn, i);
0421 int isfull, wasfull;
0422
0423 BUG_ON(i >= child_length(tn));
0424
0425
0426 if (!n && chi)
0427 empty_child_inc(tn);
0428 if (n && !chi)
0429 empty_child_dec(tn);
0430
0431
0432 wasfull = tnode_full(tn, chi);
0433 isfull = tnode_full(tn, n);
0434
0435 if (wasfull && !isfull)
0436 tn_info(tn)->full_children--;
0437 else if (!wasfull && isfull)
0438 tn_info(tn)->full_children++;
0439
0440 if (n && (tn->slen < n->slen))
0441 tn->slen = n->slen;
0442
0443 rcu_assign_pointer(tn->tnode[i], n);
0444 }
0445
0446 static void update_children(struct key_vector *tn)
0447 {
0448 unsigned long i;
0449
0450
0451 for (i = child_length(tn); i;) {
0452 struct key_vector *inode = get_child(tn, --i);
0453
0454 if (!inode)
0455 continue;
0456
0457
0458
0459
0460
0461 if (node_parent(inode) == tn)
0462 update_children(inode);
0463 else
0464 node_set_parent(inode, tn);
0465 }
0466 }
0467
0468 static inline void put_child_root(struct key_vector *tp, t_key key,
0469 struct key_vector *n)
0470 {
0471 if (IS_TRIE(tp))
0472 rcu_assign_pointer(tp->tnode[0], n);
0473 else
0474 put_child(tp, get_index(key, tp), n);
0475 }
0476
0477 static inline void tnode_free_init(struct key_vector *tn)
0478 {
0479 tn_info(tn)->rcu.next = NULL;
0480 }
0481
0482 static inline void tnode_free_append(struct key_vector *tn,
0483 struct key_vector *n)
0484 {
0485 tn_info(n)->rcu.next = tn_info(tn)->rcu.next;
0486 tn_info(tn)->rcu.next = &tn_info(n)->rcu;
0487 }
0488
0489 static void tnode_free(struct key_vector *tn)
0490 {
0491 struct callback_head *head = &tn_info(tn)->rcu;
0492
0493 while (head) {
0494 head = head->next;
0495 tnode_free_size += TNODE_SIZE(1ul << tn->bits);
0496 node_free(tn);
0497
0498 tn = container_of(head, struct tnode, rcu)->kv;
0499 }
0500
0501 if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) {
0502 tnode_free_size = 0;
0503 synchronize_rcu();
0504 }
0505 }
0506
0507 static struct key_vector *replace(struct trie *t,
0508 struct key_vector *oldtnode,
0509 struct key_vector *tn)
0510 {
0511 struct key_vector *tp = node_parent(oldtnode);
0512 unsigned long i;
0513
0514
0515 NODE_INIT_PARENT(tn, tp);
0516 put_child_root(tp, tn->key, tn);
0517
0518
0519 update_children(tn);
0520
0521
0522 tnode_free(oldtnode);
0523
0524
0525 for (i = child_length(tn); i;) {
0526 struct key_vector *inode = get_child(tn, --i);
0527
0528
0529 if (tnode_full(tn, inode))
0530 tn = resize(t, inode);
0531 }
0532
0533 return tp;
0534 }
0535
0536 static struct key_vector *inflate(struct trie *t,
0537 struct key_vector *oldtnode)
0538 {
0539 struct key_vector *tn;
0540 unsigned long i;
0541 t_key m;
0542
0543 pr_debug("In inflate\n");
0544
0545 tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
0546 if (!tn)
0547 goto notnode;
0548
0549
0550 tnode_free_init(oldtnode);
0551
0552
0553
0554
0555
0556
0557 for (i = child_length(oldtnode), m = 1u << tn->pos; i;) {
0558 struct key_vector *inode = get_child(oldtnode, --i);
0559 struct key_vector *node0, *node1;
0560 unsigned long j, k;
0561
0562
0563 if (!inode)
0564 continue;
0565
0566
0567 if (!tnode_full(oldtnode, inode)) {
0568 put_child(tn, get_index(inode->key, tn), inode);
0569 continue;
0570 }
0571
0572
0573 tnode_free_append(oldtnode, inode);
0574
0575
0576 if (inode->bits == 1) {
0577 put_child(tn, 2 * i + 1, get_child(inode, 1));
0578 put_child(tn, 2 * i, get_child(inode, 0));
0579 continue;
0580 }
0581
0582
0583
0584
0585
0586
0587
0588
0589
0590
0591
0592
0593
0594
0595
0596 node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1);
0597 if (!node1)
0598 goto nomem;
0599 node0 = tnode_new(inode->key, inode->pos, inode->bits - 1);
0600
0601 tnode_free_append(tn, node1);
0602 if (!node0)
0603 goto nomem;
0604 tnode_free_append(tn, node0);
0605
0606
0607 for (k = child_length(inode), j = k / 2; j;) {
0608 put_child(node1, --j, get_child(inode, --k));
0609 put_child(node0, j, get_child(inode, j));
0610 put_child(node1, --j, get_child(inode, --k));
0611 put_child(node0, j, get_child(inode, j));
0612 }
0613
0614
0615 NODE_INIT_PARENT(node1, tn);
0616 NODE_INIT_PARENT(node0, tn);
0617
0618
0619 put_child(tn, 2 * i + 1, node1);
0620 put_child(tn, 2 * i, node0);
0621 }
0622
0623
0624 return replace(t, oldtnode, tn);
0625 nomem:
0626
0627 tnode_free(tn);
0628 notnode:
0629 return NULL;
0630 }
0631
0632 static struct key_vector *halve(struct trie *t,
0633 struct key_vector *oldtnode)
0634 {
0635 struct key_vector *tn;
0636 unsigned long i;
0637
0638 pr_debug("In halve\n");
0639
0640 tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
0641 if (!tn)
0642 goto notnode;
0643
0644
0645 tnode_free_init(oldtnode);
0646
0647
0648
0649
0650
0651
0652 for (i = child_length(oldtnode); i;) {
0653 struct key_vector *node1 = get_child(oldtnode, --i);
0654 struct key_vector *node0 = get_child(oldtnode, --i);
0655 struct key_vector *inode;
0656
0657
0658 if (!node1 || !node0) {
0659 put_child(tn, i / 2, node1 ? : node0);
0660 continue;
0661 }
0662
0663
0664 inode = tnode_new(node0->key, oldtnode->pos, 1);
0665 if (!inode)
0666 goto nomem;
0667 tnode_free_append(tn, inode);
0668
0669
0670 put_child(inode, 1, node1);
0671 put_child(inode, 0, node0);
0672 NODE_INIT_PARENT(inode, tn);
0673
0674
0675 put_child(tn, i / 2, inode);
0676 }
0677
0678
0679 return replace(t, oldtnode, tn);
0680 nomem:
0681
0682 tnode_free(tn);
0683 notnode:
0684 return NULL;
0685 }
0686
0687 static struct key_vector *collapse(struct trie *t,
0688 struct key_vector *oldtnode)
0689 {
0690 struct key_vector *n, *tp;
0691 unsigned long i;
0692
0693
0694 for (n = NULL, i = child_length(oldtnode); !n && i;)
0695 n = get_child(oldtnode, --i);
0696
0697
0698 tp = node_parent(oldtnode);
0699 put_child_root(tp, oldtnode->key, n);
0700 node_set_parent(n, tp);
0701
0702
0703 node_free(oldtnode);
0704
0705 return tp;
0706 }
0707
0708 static unsigned char update_suffix(struct key_vector *tn)
0709 {
0710 unsigned char slen = tn->pos;
0711 unsigned long stride, i;
0712 unsigned char slen_max;
0713
0714
0715
0716
0717
0718 slen_max = min_t(unsigned char, tn->pos + tn->bits - 1, tn->slen);
0719
0720
0721
0722
0723
0724
0725 for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) {
0726 struct key_vector *n = get_child(tn, i);
0727
0728 if (!n || (n->slen <= slen))
0729 continue;
0730
0731
0732 stride <<= (n->slen - slen);
0733 slen = n->slen;
0734 i &= ~(stride - 1);
0735
0736
0737 if (slen >= slen_max)
0738 break;
0739 }
0740
0741 tn->slen = slen;
0742
0743 return slen;
0744 }
0745
0746
0747
0748
0749
0750
0751
0752
0753
0754
0755
0756
0757
0758
0759
0760
0761
0762
0763
0764
0765
0766
0767
0768
0769
0770
0771
0772
0773
0774
0775
0776
0777
0778
0779
0780
0781
0782
0783
0784
0785
0786
0787
0788
0789
0790
0791
0792
0793
0794
0795
0796
0797
0798
0799
0800
0801
0802
0803 static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn)
0804 {
0805 unsigned long used = child_length(tn);
0806 unsigned long threshold = used;
0807
0808
0809 threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold;
0810 used -= tn_info(tn)->empty_children;
0811 used += tn_info(tn)->full_children;
0812
0813
0814
0815 return (used > 1) && tn->pos && ((50 * used) >= threshold);
0816 }
0817
0818 static inline bool should_halve(struct key_vector *tp, struct key_vector *tn)
0819 {
0820 unsigned long used = child_length(tn);
0821 unsigned long threshold = used;
0822
0823
0824 threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold;
0825 used -= tn_info(tn)->empty_children;
0826
0827
0828
0829 return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold);
0830 }
0831
0832 static inline bool should_collapse(struct key_vector *tn)
0833 {
0834 unsigned long used = child_length(tn);
0835
0836 used -= tn_info(tn)->empty_children;
0837
0838
0839 if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children)
0840 used -= KEY_MAX;
0841
0842
0843 return used < 2;
0844 }
0845
0846 #define MAX_WORK 10
0847 static struct key_vector *resize(struct trie *t, struct key_vector *tn)
0848 {
0849 #ifdef CONFIG_IP_FIB_TRIE_STATS
0850 struct trie_use_stats __percpu *stats = t->stats;
0851 #endif
0852 struct key_vector *tp = node_parent(tn);
0853 unsigned long cindex = get_index(tn->key, tp);
0854 int max_work = MAX_WORK;
0855
0856 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
0857 tn, inflate_threshold, halve_threshold);
0858
0859
0860
0861
0862
0863 BUG_ON(tn != get_child(tp, cindex));
0864
0865
0866
0867
0868 while (should_inflate(tp, tn) && max_work) {
0869 tp = inflate(t, tn);
0870 if (!tp) {
0871 #ifdef CONFIG_IP_FIB_TRIE_STATS
0872 this_cpu_inc(stats->resize_node_skipped);
0873 #endif
0874 break;
0875 }
0876
0877 max_work--;
0878 tn = get_child(tp, cindex);
0879 }
0880
0881
0882 tp = node_parent(tn);
0883
0884
0885 if (max_work != MAX_WORK)
0886 return tp;
0887
0888
0889
0890
0891 while (should_halve(tp, tn) && max_work) {
0892 tp = halve(t, tn);
0893 if (!tp) {
0894 #ifdef CONFIG_IP_FIB_TRIE_STATS
0895 this_cpu_inc(stats->resize_node_skipped);
0896 #endif
0897 break;
0898 }
0899
0900 max_work--;
0901 tn = get_child(tp, cindex);
0902 }
0903
0904
0905 if (should_collapse(tn))
0906 return collapse(t, tn);
0907
0908
0909 return node_parent(tn);
0910 }
0911
0912 static void node_pull_suffix(struct key_vector *tn, unsigned char slen)
0913 {
0914 unsigned char node_slen = tn->slen;
0915
0916 while ((node_slen > tn->pos) && (node_slen > slen)) {
0917 slen = update_suffix(tn);
0918 if (node_slen == slen)
0919 break;
0920
0921 tn = node_parent(tn);
0922 node_slen = tn->slen;
0923 }
0924 }
0925
0926 static void node_push_suffix(struct key_vector *tn, unsigned char slen)
0927 {
0928 while (tn->slen < slen) {
0929 tn->slen = slen;
0930 tn = node_parent(tn);
0931 }
0932 }
0933
0934
0935 static struct key_vector *fib_find_node(struct trie *t,
0936 struct key_vector **tp, u32 key)
0937 {
0938 struct key_vector *pn, *n = t->kv;
0939 unsigned long index = 0;
0940
0941 do {
0942 pn = n;
0943 n = get_child_rcu(n, index);
0944
0945 if (!n)
0946 break;
0947
0948 index = get_cindex(key, n);
0949
0950
0951
0952
0953
0954
0955
0956
0957
0958
0959
0960
0961
0962
0963
0964 if (index >= (1ul << n->bits)) {
0965 n = NULL;
0966 break;
0967 }
0968
0969
0970 } while (IS_TNODE(n));
0971
0972 *tp = pn;
0973
0974 return n;
0975 }
0976
0977
0978
0979
0980
0981
0982 static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
0983 dscp_t dscp, u32 prio, u32 tb_id,
0984 bool find_first)
0985 {
0986 struct fib_alias *fa;
0987
0988 if (!fah)
0989 return NULL;
0990
0991 hlist_for_each_entry(fa, fah, fa_list) {
0992
0993 u8 __fa_dscp = inet_dscp_to_dsfield(fa->fa_dscp);
0994 u8 __dscp = inet_dscp_to_dsfield(dscp);
0995
0996 if (fa->fa_slen < slen)
0997 continue;
0998 if (fa->fa_slen != slen)
0999 break;
1000 if (fa->tb_id > tb_id)
1001 continue;
1002 if (fa->tb_id != tb_id)
1003 break;
1004 if (find_first)
1005 return fa;
1006 if (__fa_dscp > __dscp)
1007 continue;
1008 if (fa->fa_info->fib_priority >= prio || __fa_dscp < __dscp)
1009 return fa;
1010 }
1011
1012 return NULL;
1013 }
1014
1015 static struct fib_alias *
1016 fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
1017 {
1018 u8 slen = KEYLENGTH - fri->dst_len;
1019 struct key_vector *l, *tp;
1020 struct fib_table *tb;
1021 struct fib_alias *fa;
1022 struct trie *t;
1023
1024 tb = fib_get_table(net, fri->tb_id);
1025 if (!tb)
1026 return NULL;
1027
1028 t = (struct trie *)tb->tb_data;
1029 l = fib_find_node(t, &tp, be32_to_cpu(fri->dst));
1030 if (!l)
1031 return NULL;
1032
1033 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
1034 if (fa->fa_slen == slen && fa->tb_id == fri->tb_id &&
1035 fa->fa_dscp == fri->dscp && fa->fa_info == fri->fi &&
1036 fa->fa_type == fri->type)
1037 return fa;
1038 }
1039
1040 return NULL;
1041 }
1042
1043 void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
1044 {
1045 u8 fib_notify_on_flag_change;
1046 struct fib_alias *fa_match;
1047 struct sk_buff *skb;
1048 int err;
1049
1050 rcu_read_lock();
1051
1052 fa_match = fib_find_matching_alias(net, fri);
1053 if (!fa_match)
1054 goto out;
1055
1056
1057
1058
1059 if (READ_ONCE(fa_match->offload) == fri->offload &&
1060 READ_ONCE(fa_match->trap) == fri->trap &&
1061 READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
1062 goto out;
1063
1064 WRITE_ONCE(fa_match->offload, fri->offload);
1065 WRITE_ONCE(fa_match->trap, fri->trap);
1066
1067 fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change);
1068
1069
1070 if (fib_notify_on_flag_change == 2 &&
1071 READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
1072 goto out;
1073
1074 WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);
1075
1076 if (!fib_notify_on_flag_change)
1077 goto out;
1078
1079 skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC);
1080 if (!skb) {
1081 err = -ENOBUFS;
1082 goto errout;
1083 }
1084
1085 err = fib_dump_info(skb, 0, 0, RTM_NEWROUTE, fri, 0);
1086 if (err < 0) {
1087
1088 WARN_ON(err == -EMSGSIZE);
1089 kfree_skb(skb);
1090 goto errout;
1091 }
1092
1093 rtnl_notify(skb, net, 0, RTNLGRP_IPV4_ROUTE, NULL, GFP_ATOMIC);
1094 goto out;
1095
1096 errout:
1097 rtnl_set_sk_err(net, RTNLGRP_IPV4_ROUTE, err);
1098 out:
1099 rcu_read_unlock();
1100 }
1101 EXPORT_SYMBOL_GPL(fib_alias_hw_flags_set);
1102
1103 static void trie_rebalance(struct trie *t, struct key_vector *tn)
1104 {
1105 while (!IS_TRIE(tn))
1106 tn = resize(t, tn);
1107 }
1108
1109 static int fib_insert_node(struct trie *t, struct key_vector *tp,
1110 struct fib_alias *new, t_key key)
1111 {
1112 struct key_vector *n, *l;
1113
1114 l = leaf_new(key, new);
1115 if (!l)
1116 goto noleaf;
1117
1118
1119 n = get_child(tp, get_index(key, tp));
1120
1121
1122
1123
1124
1125
1126
1127 if (n) {
1128 struct key_vector *tn;
1129
1130 tn = tnode_new(key, __fls(key ^ n->key), 1);
1131 if (!tn)
1132 goto notnode;
1133
1134
1135 NODE_INIT_PARENT(tn, tp);
1136 put_child(tn, get_index(key, tn) ^ 1, n);
1137
1138
1139 put_child_root(tp, key, tn);
1140 node_set_parent(n, tn);
1141
1142
1143 tp = tn;
1144 }
1145
1146
1147 node_push_suffix(tp, new->fa_slen);
1148 NODE_INIT_PARENT(l, tp);
1149 put_child_root(tp, key, l);
1150 trie_rebalance(t, tp);
1151
1152 return 0;
1153 notnode:
1154 node_free(l);
1155 noleaf:
1156 return -ENOMEM;
1157 }
1158
1159 static int fib_insert_alias(struct trie *t, struct key_vector *tp,
1160 struct key_vector *l, struct fib_alias *new,
1161 struct fib_alias *fa, t_key key)
1162 {
1163 if (!l)
1164 return fib_insert_node(t, tp, new, key);
1165
1166 if (fa) {
1167 hlist_add_before_rcu(&new->fa_list, &fa->fa_list);
1168 } else {
1169 struct fib_alias *last;
1170
1171 hlist_for_each_entry(last, &l->leaf, fa_list) {
1172 if (new->fa_slen < last->fa_slen)
1173 break;
1174 if ((new->fa_slen == last->fa_slen) &&
1175 (new->tb_id > last->tb_id))
1176 break;
1177 fa = last;
1178 }
1179
1180 if (fa)
1181 hlist_add_behind_rcu(&new->fa_list, &fa->fa_list);
1182 else
1183 hlist_add_head_rcu(&new->fa_list, &l->leaf);
1184 }
1185
1186
1187 if (l->slen < new->fa_slen) {
1188 l->slen = new->fa_slen;
1189 node_push_suffix(tp, new->fa_slen);
1190 }
1191
1192 return 0;
1193 }
1194
1195 static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
1196 {
1197 if (plen > KEYLENGTH) {
1198 NL_SET_ERR_MSG(extack, "Invalid prefix length");
1199 return false;
1200 }
1201
1202 if ((plen < KEYLENGTH) && (key << plen)) {
1203 NL_SET_ERR_MSG(extack,
1204 "Invalid prefix for given prefix length");
1205 return false;
1206 }
1207
1208 return true;
1209 }
1210
1211 static void fib_remove_alias(struct trie *t, struct key_vector *tp,
1212 struct key_vector *l, struct fib_alias *old);
1213
1214
1215 int fib_table_insert(struct net *net, struct fib_table *tb,
1216 struct fib_config *cfg, struct netlink_ext_ack *extack)
1217 {
1218 struct trie *t = (struct trie *)tb->tb_data;
1219 struct fib_alias *fa, *new_fa;
1220 struct key_vector *l, *tp;
1221 u16 nlflags = NLM_F_EXCL;
1222 struct fib_info *fi;
1223 u8 plen = cfg->fc_dst_len;
1224 u8 slen = KEYLENGTH - plen;
1225 dscp_t dscp;
1226 u32 key;
1227 int err;
1228
1229 key = ntohl(cfg->fc_dst);
1230
1231 if (!fib_valid_key_len(key, plen, extack))
1232 return -EINVAL;
1233
1234 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
1235
1236 fi = fib_create_info(cfg, extack);
1237 if (IS_ERR(fi)) {
1238 err = PTR_ERR(fi);
1239 goto err;
1240 }
1241
1242 dscp = cfg->fc_dscp;
1243 l = fib_find_node(t, &tp, key);
1244 fa = l ? fib_find_alias(&l->leaf, slen, dscp, fi->fib_priority,
1245 tb->tb_id, false) : NULL;
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256 if (fa && fa->fa_dscp == dscp &&
1257 fa->fa_info->fib_priority == fi->fib_priority) {
1258 struct fib_alias *fa_first, *fa_match;
1259
1260 err = -EEXIST;
1261 if (cfg->fc_nlflags & NLM_F_EXCL)
1262 goto out;
1263
1264 nlflags &= ~NLM_F_EXCL;
1265
1266
1267
1268
1269
1270
1271 fa_match = NULL;
1272 fa_first = fa;
1273 hlist_for_each_entry_from(fa, fa_list) {
1274 if ((fa->fa_slen != slen) ||
1275 (fa->tb_id != tb->tb_id) ||
1276 (fa->fa_dscp != dscp))
1277 break;
1278 if (fa->fa_info->fib_priority != fi->fib_priority)
1279 break;
1280 if (fa->fa_type == cfg->fc_type &&
1281 fa->fa_info == fi) {
1282 fa_match = fa;
1283 break;
1284 }
1285 }
1286
1287 if (cfg->fc_nlflags & NLM_F_REPLACE) {
1288 struct fib_info *fi_drop;
1289 u8 state;
1290
1291 nlflags |= NLM_F_REPLACE;
1292 fa = fa_first;
1293 if (fa_match) {
1294 if (fa == fa_match)
1295 err = 0;
1296 goto out;
1297 }
1298 err = -ENOBUFS;
1299 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1300 if (!new_fa)
1301 goto out;
1302
1303 fi_drop = fa->fa_info;
1304 new_fa->fa_dscp = fa->fa_dscp;
1305 new_fa->fa_info = fi;
1306 new_fa->fa_type = cfg->fc_type;
1307 state = fa->fa_state;
1308 new_fa->fa_state = state & ~FA_S_ACCESSED;
1309 new_fa->fa_slen = fa->fa_slen;
1310 new_fa->tb_id = tb->tb_id;
1311 new_fa->fa_default = -1;
1312 new_fa->offload = 0;
1313 new_fa->trap = 0;
1314 new_fa->offload_failed = 0;
1315
1316 hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1317
1318 if (fib_find_alias(&l->leaf, fa->fa_slen, 0, 0,
1319 tb->tb_id, true) == new_fa) {
1320 enum fib_event_type fib_event;
1321
1322 fib_event = FIB_EVENT_ENTRY_REPLACE;
1323 err = call_fib_entry_notifiers(net, fib_event,
1324 key, plen,
1325 new_fa, extack);
1326 if (err) {
1327 hlist_replace_rcu(&new_fa->fa_list,
1328 &fa->fa_list);
1329 goto out_free_new_fa;
1330 }
1331 }
1332
1333 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1334 tb->tb_id, &cfg->fc_nlinfo, nlflags);
1335
1336 alias_free_mem_rcu(fa);
1337
1338 fib_release_info(fi_drop);
1339 if (state & FA_S_ACCESSED)
1340 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1341
1342 goto succeeded;
1343 }
1344
1345
1346
1347
1348 if (fa_match)
1349 goto out;
1350
1351 if (cfg->fc_nlflags & NLM_F_APPEND)
1352 nlflags |= NLM_F_APPEND;
1353 else
1354 fa = fa_first;
1355 }
1356 err = -ENOENT;
1357 if (!(cfg->fc_nlflags & NLM_F_CREATE))
1358 goto out;
1359
1360 nlflags |= NLM_F_CREATE;
1361 err = -ENOBUFS;
1362 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1363 if (!new_fa)
1364 goto out;
1365
1366 new_fa->fa_info = fi;
1367 new_fa->fa_dscp = dscp;
1368 new_fa->fa_type = cfg->fc_type;
1369 new_fa->fa_state = 0;
1370 new_fa->fa_slen = slen;
1371 new_fa->tb_id = tb->tb_id;
1372 new_fa->fa_default = -1;
1373 new_fa->offload = 0;
1374 new_fa->trap = 0;
1375 new_fa->offload_failed = 0;
1376
1377
1378 err = fib_insert_alias(t, tp, l, new_fa, fa, key);
1379 if (err)
1380 goto out_free_new_fa;
1381
1382
1383 l = l ? l : fib_find_node(t, &tp, key);
1384 if (WARN_ON_ONCE(!l))
1385 goto out_free_new_fa;
1386
1387 if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) ==
1388 new_fa) {
1389 enum fib_event_type fib_event;
1390
1391 fib_event = FIB_EVENT_ENTRY_REPLACE;
1392 err = call_fib_entry_notifiers(net, fib_event, key, plen,
1393 new_fa, extack);
1394 if (err)
1395 goto out_remove_new_fa;
1396 }
1397
1398 if (!plen)
1399 tb->tb_num_default++;
1400
1401 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1402 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
1403 &cfg->fc_nlinfo, nlflags);
1404 succeeded:
1405 return 0;
1406
1407 out_remove_new_fa:
1408 fib_remove_alias(t, tp, l, new_fa);
1409 out_free_new_fa:
1410 kmem_cache_free(fn_alias_kmem, new_fa);
1411 out:
1412 fib_release_info(fi);
1413 err:
1414 return err;
1415 }
1416
1417 static inline t_key prefix_mismatch(t_key key, struct key_vector *n)
1418 {
1419 t_key prefix = n->key;
1420
1421 return (key ^ prefix) & (prefix | -prefix);
1422 }
1423
1424 bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
1425 const struct flowi4 *flp)
1426 {
1427 if (nhc->nhc_flags & RTNH_F_DEAD)
1428 return false;
1429
1430 if (ip_ignore_linkdown(nhc->nhc_dev) &&
1431 nhc->nhc_flags & RTNH_F_LINKDOWN &&
1432 !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
1433 return false;
1434
1435 if (flp->flowi4_oif && flp->flowi4_oif != nhc->nhc_oif)
1436 return false;
1437
1438 return true;
1439 }
1440
1441
1442 int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
1443 struct fib_result *res, int fib_flags)
1444 {
1445 struct trie *t = (struct trie *) tb->tb_data;
1446 #ifdef CONFIG_IP_FIB_TRIE_STATS
1447 struct trie_use_stats __percpu *stats = t->stats;
1448 #endif
1449 const t_key key = ntohl(flp->daddr);
1450 struct key_vector *n, *pn;
1451 struct fib_alias *fa;
1452 unsigned long index;
1453 t_key cindex;
1454
1455 pn = t->kv;
1456 cindex = 0;
1457
1458 n = get_child_rcu(pn, cindex);
1459 if (!n) {
1460 trace_fib_table_lookup(tb->tb_id, flp, NULL, -EAGAIN);
1461 return -EAGAIN;
1462 }
1463
1464 #ifdef CONFIG_IP_FIB_TRIE_STATS
1465 this_cpu_inc(stats->gets);
1466 #endif
1467
1468
1469 for (;;) {
1470 index = get_cindex(key, n);
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486 if (index >= (1ul << n->bits))
1487 break;
1488
1489
1490 if (IS_LEAF(n))
1491 goto found;
1492
1493
1494
1495
1496 if (n->slen > n->pos) {
1497 pn = n;
1498 cindex = index;
1499 }
1500
1501 n = get_child_rcu(n, index);
1502 if (unlikely(!n))
1503 goto backtrace;
1504 }
1505
1506
1507 for (;;) {
1508
1509 struct key_vector __rcu **cptr = n->tnode;
1510
1511
1512
1513
1514
1515 if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos))
1516 goto backtrace;
1517
1518
1519 if (unlikely(IS_LEAF(n)))
1520 break;
1521
1522
1523
1524
1525
1526
1527 while ((n = rcu_dereference(*cptr)) == NULL) {
1528 backtrace:
1529 #ifdef CONFIG_IP_FIB_TRIE_STATS
1530 if (!n)
1531 this_cpu_inc(stats->null_node_hit);
1532 #endif
1533
1534
1535
1536
1537
1538 while (!cindex) {
1539 t_key pkey = pn->key;
1540
1541
1542
1543
1544
1545 if (IS_TRIE(pn)) {
1546 trace_fib_table_lookup(tb->tb_id, flp,
1547 NULL, -EAGAIN);
1548 return -EAGAIN;
1549 }
1550 #ifdef CONFIG_IP_FIB_TRIE_STATS
1551 this_cpu_inc(stats->backtrack);
1552 #endif
1553
1554 pn = node_parent_rcu(pn);
1555 cindex = get_index(pkey, pn);
1556 }
1557
1558
1559 cindex &= cindex - 1;
1560
1561
1562 cptr = &pn->tnode[cindex];
1563 }
1564 }
1565
1566 found:
1567
1568 index = key ^ n->key;
1569
1570
1571 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
1572 struct fib_info *fi = fa->fa_info;
1573 struct fib_nh_common *nhc;
1574 int nhsel, err;
1575
1576 if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
1577 if (index >= (1ul << fa->fa_slen))
1578 continue;
1579 }
1580 if (fa->fa_dscp &&
1581 inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
1582 continue;
1583 if (fi->fib_dead)
1584 continue;
1585 if (fa->fa_info->fib_scope < flp->flowi4_scope)
1586 continue;
1587 fib_alias_accessed(fa);
1588 err = fib_props[fa->fa_type].error;
1589 if (unlikely(err < 0)) {
1590 out_reject:
1591 #ifdef CONFIG_IP_FIB_TRIE_STATS
1592 this_cpu_inc(stats->semantic_match_passed);
1593 #endif
1594 trace_fib_table_lookup(tb->tb_id, flp, NULL, err);
1595 return err;
1596 }
1597 if (fi->fib_flags & RTNH_F_DEAD)
1598 continue;
1599
1600 if (unlikely(fi->nh)) {
1601 if (nexthop_is_blackhole(fi->nh)) {
1602 err = fib_props[RTN_BLACKHOLE].error;
1603 goto out_reject;
1604 }
1605
1606 nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp,
1607 &nhsel);
1608 if (nhc)
1609 goto set_result;
1610 goto miss;
1611 }
1612
1613 for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
1614 nhc = fib_info_nhc(fi, nhsel);
1615
1616 if (!fib_lookup_good_nhc(nhc, fib_flags, flp))
1617 continue;
1618 set_result:
1619 if (!(fib_flags & FIB_LOOKUP_NOREF))
1620 refcount_inc(&fi->fib_clntref);
1621
1622 res->prefix = htonl(n->key);
1623 res->prefixlen = KEYLENGTH - fa->fa_slen;
1624 res->nh_sel = nhsel;
1625 res->nhc = nhc;
1626 res->type = fa->fa_type;
1627 res->scope = fi->fib_scope;
1628 res->fi = fi;
1629 res->table = tb;
1630 res->fa_head = &n->leaf;
1631 #ifdef CONFIG_IP_FIB_TRIE_STATS
1632 this_cpu_inc(stats->semantic_match_passed);
1633 #endif
1634 trace_fib_table_lookup(tb->tb_id, flp, nhc, err);
1635
1636 return err;
1637 }
1638 }
1639 miss:
1640 #ifdef CONFIG_IP_FIB_TRIE_STATS
1641 this_cpu_inc(stats->semantic_match_miss);
1642 #endif
1643 goto backtrace;
1644 }
1645 EXPORT_SYMBOL_GPL(fib_table_lookup);
1646
1647 static void fib_remove_alias(struct trie *t, struct key_vector *tp,
1648 struct key_vector *l, struct fib_alias *old)
1649 {
1650
1651 struct hlist_node **pprev = old->fa_list.pprev;
1652 struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next);
1653
1654
1655 hlist_del_rcu(&old->fa_list);
1656
1657
1658
1659
1660 if (hlist_empty(&l->leaf)) {
1661 if (tp->slen == l->slen)
1662 node_pull_suffix(tp, tp->pos);
1663 put_child_root(tp, l->key, NULL);
1664 node_free(l);
1665 trie_rebalance(t, tp);
1666 return;
1667 }
1668
1669
1670 if (*pprev)
1671 return;
1672
1673
1674 l->slen = fa->fa_slen;
1675 node_pull_suffix(tp, fa->fa_slen);
1676 }
1677
1678 static void fib_notify_alias_delete(struct net *net, u32 key,
1679 struct hlist_head *fah,
1680 struct fib_alias *fa_to_delete,
1681 struct netlink_ext_ack *extack)
1682 {
1683 struct fib_alias *fa_next, *fa_to_notify;
1684 u32 tb_id = fa_to_delete->tb_id;
1685 u8 slen = fa_to_delete->fa_slen;
1686 enum fib_event_type fib_event;
1687
1688
1689 if (fib_find_alias(fah, slen, 0, 0, tb_id, true) != fa_to_delete)
1690 return;
1691
1692
1693
1694
1695 fa_next = hlist_entry_safe(fa_to_delete->fa_list.next,
1696 struct fib_alias, fa_list);
1697 if (fa_next && fa_next->fa_slen == slen && fa_next->tb_id == tb_id) {
1698 fib_event = FIB_EVENT_ENTRY_REPLACE;
1699 fa_to_notify = fa_next;
1700 } else {
1701 fib_event = FIB_EVENT_ENTRY_DEL;
1702 fa_to_notify = fa_to_delete;
1703 }
1704 call_fib_entry_notifiers(net, fib_event, key, KEYLENGTH - slen,
1705 fa_to_notify, extack);
1706 }
1707
1708
1709 int fib_table_delete(struct net *net, struct fib_table *tb,
1710 struct fib_config *cfg, struct netlink_ext_ack *extack)
1711 {
1712 struct trie *t = (struct trie *) tb->tb_data;
1713 struct fib_alias *fa, *fa_to_delete;
1714 struct key_vector *l, *tp;
1715 u8 plen = cfg->fc_dst_len;
1716 u8 slen = KEYLENGTH - plen;
1717 dscp_t dscp;
1718 u32 key;
1719
1720 key = ntohl(cfg->fc_dst);
1721
1722 if (!fib_valid_key_len(key, plen, extack))
1723 return -EINVAL;
1724
1725 l = fib_find_node(t, &tp, key);
1726 if (!l)
1727 return -ESRCH;
1728
1729 dscp = cfg->fc_dscp;
1730 fa = fib_find_alias(&l->leaf, slen, dscp, 0, tb->tb_id, false);
1731 if (!fa)
1732 return -ESRCH;
1733
1734 pr_debug("Deleting %08x/%d dsfield=0x%02x t=%p\n", key, plen,
1735 inet_dscp_to_dsfield(dscp), t);
1736
1737 fa_to_delete = NULL;
1738 hlist_for_each_entry_from(fa, fa_list) {
1739 struct fib_info *fi = fa->fa_info;
1740
1741 if ((fa->fa_slen != slen) ||
1742 (fa->tb_id != tb->tb_id) ||
1743 (fa->fa_dscp != dscp))
1744 break;
1745
1746 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1747 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1748 fa->fa_info->fib_scope == cfg->fc_scope) &&
1749 (!cfg->fc_prefsrc ||
1750 fi->fib_prefsrc == cfg->fc_prefsrc) &&
1751 (!cfg->fc_protocol ||
1752 fi->fib_protocol == cfg->fc_protocol) &&
1753 fib_nh_match(net, cfg, fi, extack) == 0 &&
1754 fib_metrics_match(cfg, fi)) {
1755 fa_to_delete = fa;
1756 break;
1757 }
1758 }
1759
1760 if (!fa_to_delete)
1761 return -ESRCH;
1762
1763 fib_notify_alias_delete(net, key, &l->leaf, fa_to_delete, extack);
1764 rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
1765 &cfg->fc_nlinfo, 0);
1766
1767 if (!plen)
1768 tb->tb_num_default--;
1769
1770 fib_remove_alias(t, tp, l, fa_to_delete);
1771
1772 if (fa_to_delete->fa_state & FA_S_ACCESSED)
1773 rt_cache_flush(cfg->fc_nlinfo.nl_net);
1774
1775 fib_release_info(fa_to_delete->fa_info);
1776 alias_free_mem_rcu(fa_to_delete);
1777 return 0;
1778 }
1779
1780
1781 static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key)
1782 {
1783 struct key_vector *pn, *n = *tn;
1784 unsigned long cindex;
1785
1786
1787 do {
1788
1789 pn = n;
1790 cindex = (key > pn->key) ? get_index(key, pn) : 0;
1791
1792 if (cindex >> pn->bits)
1793 break;
1794
1795
1796 n = get_child_rcu(pn, cindex++);
1797 if (!n)
1798 break;
1799
1800
1801 if (IS_LEAF(n) && (n->key >= key))
1802 goto found;
1803 } while (IS_TNODE(n));
1804
1805
1806 while (!IS_TRIE(pn)) {
1807
1808 if (cindex >= (1ul << pn->bits)) {
1809 t_key pkey = pn->key;
1810
1811 pn = node_parent_rcu(pn);
1812 cindex = get_index(pkey, pn) + 1;
1813 continue;
1814 }
1815
1816
1817 n = get_child_rcu(pn, cindex++);
1818 if (!n)
1819 continue;
1820
1821
1822 if (IS_LEAF(n))
1823 goto found;
1824
1825
1826 pn = n;
1827 cindex = 0;
1828 }
1829
1830 *tn = pn;
1831 return NULL;
1832 found:
1833
1834 *tn = pn;
1835 return n;
1836 }
1837
1838 static void fib_trie_free(struct fib_table *tb)
1839 {
1840 struct trie *t = (struct trie *)tb->tb_data;
1841 struct key_vector *pn = t->kv;
1842 unsigned long cindex = 1;
1843 struct hlist_node *tmp;
1844 struct fib_alias *fa;
1845
1846
1847 for (;;) {
1848 struct key_vector *n;
1849
1850 if (!(cindex--)) {
1851 t_key pkey = pn->key;
1852
1853 if (IS_TRIE(pn))
1854 break;
1855
1856 n = pn;
1857 pn = node_parent(pn);
1858
1859
1860 put_child_root(pn, n->key, NULL);
1861 node_free(n);
1862
1863 cindex = get_index(pkey, pn);
1864
1865 continue;
1866 }
1867
1868
1869 n = get_child(pn, cindex);
1870 if (!n)
1871 continue;
1872
1873 if (IS_TNODE(n)) {
1874
1875 pn = n;
1876 cindex = 1ul << n->bits;
1877
1878 continue;
1879 }
1880
1881 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
1882 hlist_del_rcu(&fa->fa_list);
1883 alias_free_mem_rcu(fa);
1884 }
1885
1886 put_child_root(pn, n->key, NULL);
1887 node_free(n);
1888 }
1889
1890 #ifdef CONFIG_IP_FIB_TRIE_STATS
1891 free_percpu(t->stats);
1892 #endif
1893 kfree(tb);
1894 }
1895
1896 struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
1897 {
1898 struct trie *ot = (struct trie *)oldtb->tb_data;
1899 struct key_vector *l, *tp = ot->kv;
1900 struct fib_table *local_tb;
1901 struct fib_alias *fa;
1902 struct trie *lt;
1903 t_key key = 0;
1904
1905 if (oldtb->tb_data == oldtb->__data)
1906 return oldtb;
1907
1908 local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL);
1909 if (!local_tb)
1910 return NULL;
1911
1912 lt = (struct trie *)local_tb->tb_data;
1913
1914 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
1915 struct key_vector *local_l = NULL, *local_tp;
1916
1917 hlist_for_each_entry(fa, &l->leaf, fa_list) {
1918 struct fib_alias *new_fa;
1919
1920 if (local_tb->tb_id != fa->tb_id)
1921 continue;
1922
1923
1924 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
1925 if (!new_fa)
1926 goto out;
1927
1928 memcpy(new_fa, fa, sizeof(*fa));
1929
1930
1931 if (!local_l)
1932 local_l = fib_find_node(lt, &local_tp, l->key);
1933
1934 if (fib_insert_alias(lt, local_tp, local_l, new_fa,
1935 NULL, l->key)) {
1936 kmem_cache_free(fn_alias_kmem, new_fa);
1937 goto out;
1938 }
1939 }
1940
1941
1942 key = l->key + 1;
1943 if (key < l->key)
1944 break;
1945 }
1946
1947 return local_tb;
1948 out:
1949 fib_trie_free(local_tb);
1950
1951 return NULL;
1952 }
1953
1954
1955 void fib_table_flush_external(struct fib_table *tb)
1956 {
1957 struct trie *t = (struct trie *)tb->tb_data;
1958 struct key_vector *pn = t->kv;
1959 unsigned long cindex = 1;
1960 struct hlist_node *tmp;
1961 struct fib_alias *fa;
1962
1963
1964 for (;;) {
1965 unsigned char slen = 0;
1966 struct key_vector *n;
1967
1968 if (!(cindex--)) {
1969 t_key pkey = pn->key;
1970
1971
1972 if (IS_TRIE(pn))
1973 break;
1974
1975
1976 if (pn->slen > pn->pos)
1977 update_suffix(pn);
1978
1979
1980 pn = resize(t, pn);
1981 cindex = get_index(pkey, pn);
1982
1983 continue;
1984 }
1985
1986
1987 n = get_child(pn, cindex);
1988 if (!n)
1989 continue;
1990
1991 if (IS_TNODE(n)) {
1992
1993 pn = n;
1994 cindex = 1ul << n->bits;
1995
1996 continue;
1997 }
1998
1999 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
2000
2001
2002
2003 if (tb->tb_id != fa->tb_id) {
2004 hlist_del_rcu(&fa->fa_list);
2005 alias_free_mem_rcu(fa);
2006 continue;
2007 }
2008
2009
2010 slen = fa->fa_slen;
2011 }
2012
2013
2014 n->slen = slen;
2015
2016 if (hlist_empty(&n->leaf)) {
2017 put_child_root(pn, n->key, NULL);
2018 node_free(n);
2019 }
2020 }
2021 }
2022
2023
2024 int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
2025 {
2026 struct trie *t = (struct trie *)tb->tb_data;
2027 struct key_vector *pn = t->kv;
2028 unsigned long cindex = 1;
2029 struct hlist_node *tmp;
2030 struct fib_alias *fa;
2031 int found = 0;
2032
2033
2034 for (;;) {
2035 unsigned char slen = 0;
2036 struct key_vector *n;
2037
2038 if (!(cindex--)) {
2039 t_key pkey = pn->key;
2040
2041
2042 if (IS_TRIE(pn))
2043 break;
2044
2045
2046 if (pn->slen > pn->pos)
2047 update_suffix(pn);
2048
2049
2050 pn = resize(t, pn);
2051 cindex = get_index(pkey, pn);
2052
2053 continue;
2054 }
2055
2056
2057 n = get_child(pn, cindex);
2058 if (!n)
2059 continue;
2060
2061 if (IS_TNODE(n)) {
2062
2063 pn = n;
2064 cindex = 1ul << n->bits;
2065
2066 continue;
2067 }
2068
2069 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
2070 struct fib_info *fi = fa->fa_info;
2071
2072 if (!fi || tb->tb_id != fa->tb_id ||
2073 (!(fi->fib_flags & RTNH_F_DEAD) &&
2074 !fib_props[fa->fa_type].error)) {
2075 slen = fa->fa_slen;
2076 continue;
2077 }
2078
2079
2080
2081
2082 if (!flush_all && fib_props[fa->fa_type].error) {
2083 slen = fa->fa_slen;
2084 continue;
2085 }
2086
2087 fib_notify_alias_delete(net, n->key, &n->leaf, fa,
2088 NULL);
2089 hlist_del_rcu(&fa->fa_list);
2090 fib_release_info(fa->fa_info);
2091 alias_free_mem_rcu(fa);
2092 found++;
2093 }
2094
2095
2096 n->slen = slen;
2097
2098 if (hlist_empty(&n->leaf)) {
2099 put_child_root(pn, n->key, NULL);
2100 node_free(n);
2101 }
2102 }
2103
2104 pr_debug("trie_flush found=%d\n", found);
2105 return found;
2106 }
2107
2108
2109 static void __fib_info_notify_update(struct net *net, struct fib_table *tb,
2110 struct nl_info *info)
2111 {
2112 struct trie *t = (struct trie *)tb->tb_data;
2113 struct key_vector *pn = t->kv;
2114 unsigned long cindex = 1;
2115 struct fib_alias *fa;
2116
2117 for (;;) {
2118 struct key_vector *n;
2119
2120 if (!(cindex--)) {
2121 t_key pkey = pn->key;
2122
2123 if (IS_TRIE(pn))
2124 break;
2125
2126 pn = node_parent(pn);
2127 cindex = get_index(pkey, pn);
2128 continue;
2129 }
2130
2131
2132 n = get_child(pn, cindex);
2133 if (!n)
2134 continue;
2135
2136 if (IS_TNODE(n)) {
2137
2138 pn = n;
2139 cindex = 1ul << n->bits;
2140
2141 continue;
2142 }
2143
2144 hlist_for_each_entry(fa, &n->leaf, fa_list) {
2145 struct fib_info *fi = fa->fa_info;
2146
2147 if (!fi || !fi->nh_updated || fa->tb_id != tb->tb_id)
2148 continue;
2149
2150 rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa,
2151 KEYLENGTH - fa->fa_slen, tb->tb_id,
2152 info, NLM_F_REPLACE);
2153 }
2154 }
2155 }
2156
2157 void fib_info_notify_update(struct net *net, struct nl_info *info)
2158 {
2159 unsigned int h;
2160
2161 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2162 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2163 struct fib_table *tb;
2164
2165 hlist_for_each_entry_rcu(tb, head, tb_hlist,
2166 lockdep_rtnl_is_held())
2167 __fib_info_notify_update(net, tb, info);
2168 }
2169 }
2170
2171 static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
2172 struct notifier_block *nb,
2173 struct netlink_ext_ack *extack)
2174 {
2175 struct fib_alias *fa;
2176 int last_slen = -1;
2177 int err;
2178
2179 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2180 struct fib_info *fi = fa->fa_info;
2181
2182 if (!fi)
2183 continue;
2184
2185
2186
2187
2188 if (tb->tb_id != fa->tb_id)
2189 continue;
2190
2191 if (fa->fa_slen == last_slen)
2192 continue;
2193
2194 last_slen = fa->fa_slen;
2195 err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_REPLACE,
2196 l->key, KEYLENGTH - fa->fa_slen,
2197 fa, extack);
2198 if (err)
2199 return err;
2200 }
2201 return 0;
2202 }
2203
2204 static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb,
2205 struct netlink_ext_ack *extack)
2206 {
2207 struct trie *t = (struct trie *)tb->tb_data;
2208 struct key_vector *l, *tp = t->kv;
2209 t_key key = 0;
2210 int err;
2211
2212 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
2213 err = fib_leaf_notify(l, tb, nb, extack);
2214 if (err)
2215 return err;
2216
2217 key = l->key + 1;
2218
2219 if (key < l->key)
2220 break;
2221 }
2222 return 0;
2223 }
2224
2225 int fib_notify(struct net *net, struct notifier_block *nb,
2226 struct netlink_ext_ack *extack)
2227 {
2228 unsigned int h;
2229 int err;
2230
2231 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2232 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2233 struct fib_table *tb;
2234
2235 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2236 err = fib_table_notify(tb, nb, extack);
2237 if (err)
2238 return err;
2239 }
2240 }
2241 return 0;
2242 }
2243
2244 static void __trie_free_rcu(struct rcu_head *head)
2245 {
2246 struct fib_table *tb = container_of(head, struct fib_table, rcu);
2247 #ifdef CONFIG_IP_FIB_TRIE_STATS
2248 struct trie *t = (struct trie *)tb->tb_data;
2249
2250 if (tb->tb_data == tb->__data)
2251 free_percpu(t->stats);
2252 #endif
2253 kfree(tb);
2254 }
2255
2256 void fib_free_table(struct fib_table *tb)
2257 {
2258 call_rcu(&tb->rcu, __trie_free_rcu);
2259 }
2260
2261 static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
2262 struct sk_buff *skb, struct netlink_callback *cb,
2263 struct fib_dump_filter *filter)
2264 {
2265 unsigned int flags = NLM_F_MULTI;
2266 __be32 xkey = htonl(l->key);
2267 int i, s_i, i_fa, s_fa, err;
2268 struct fib_alias *fa;
2269
2270 if (filter->filter_set ||
2271 !filter->dump_exceptions || !filter->dump_routes)
2272 flags |= NLM_F_DUMP_FILTERED;
2273
2274 s_i = cb->args[4];
2275 s_fa = cb->args[5];
2276 i = 0;
2277
2278
2279 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2280 struct fib_info *fi = fa->fa_info;
2281
2282 if (i < s_i)
2283 goto next;
2284
2285 i_fa = 0;
2286
2287 if (tb->tb_id != fa->tb_id)
2288 goto next;
2289
2290 if (filter->filter_set) {
2291 if (filter->rt_type && fa->fa_type != filter->rt_type)
2292 goto next;
2293
2294 if ((filter->protocol &&
2295 fi->fib_protocol != filter->protocol))
2296 goto next;
2297
2298 if (filter->dev &&
2299 !fib_info_nh_uses_dev(fi, filter->dev))
2300 goto next;
2301 }
2302
2303 if (filter->dump_routes) {
2304 if (!s_fa) {
2305 struct fib_rt_info fri;
2306
2307 fri.fi = fi;
2308 fri.tb_id = tb->tb_id;
2309 fri.dst = xkey;
2310 fri.dst_len = KEYLENGTH - fa->fa_slen;
2311 fri.dscp = fa->fa_dscp;
2312 fri.type = fa->fa_type;
2313 fri.offload = READ_ONCE(fa->offload);
2314 fri.trap = READ_ONCE(fa->trap);
2315 fri.offload_failed = READ_ONCE(fa->offload_failed);
2316 err = fib_dump_info(skb,
2317 NETLINK_CB(cb->skb).portid,
2318 cb->nlh->nlmsg_seq,
2319 RTM_NEWROUTE, &fri, flags);
2320 if (err < 0)
2321 goto stop;
2322 }
2323
2324 i_fa++;
2325 }
2326
2327 if (filter->dump_exceptions) {
2328 err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi,
2329 &i_fa, s_fa, flags);
2330 if (err < 0)
2331 goto stop;
2332 }
2333
2334 next:
2335 i++;
2336 }
2337
2338 cb->args[4] = i;
2339 return skb->len;
2340
2341 stop:
2342 cb->args[4] = i;
2343 cb->args[5] = i_fa;
2344 return err;
2345 }
2346
2347
2348 int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
2349 struct netlink_callback *cb, struct fib_dump_filter *filter)
2350 {
2351 struct trie *t = (struct trie *)tb->tb_data;
2352 struct key_vector *l, *tp = t->kv;
2353
2354
2355
2356 int count = cb->args[2];
2357 t_key key = cb->args[3];
2358
2359
2360
2361
2362 if (count && !key)
2363 return skb->len;
2364
2365 while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
2366 int err;
2367
2368 err = fn_trie_dump_leaf(l, tb, skb, cb, filter);
2369 if (err < 0) {
2370 cb->args[3] = key;
2371 cb->args[2] = count;
2372 return err;
2373 }
2374
2375 ++count;
2376 key = l->key + 1;
2377
2378 memset(&cb->args[4], 0,
2379 sizeof(cb->args) - 4*sizeof(cb->args[0]));
2380
2381
2382 if (key < l->key)
2383 break;
2384 }
2385
2386 cb->args[3] = key;
2387 cb->args[2] = count;
2388
2389 return skb->len;
2390 }
2391
2392 void __init fib_trie_init(void)
2393 {
2394 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
2395 sizeof(struct fib_alias),
2396 0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
2397
2398 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
2399 LEAF_SIZE,
2400 0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
2401 }
2402
2403 struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
2404 {
2405 struct fib_table *tb;
2406 struct trie *t;
2407 size_t sz = sizeof(*tb);
2408
2409 if (!alias)
2410 sz += sizeof(struct trie);
2411
2412 tb = kzalloc(sz, GFP_KERNEL);
2413 if (!tb)
2414 return NULL;
2415
2416 tb->tb_id = id;
2417 tb->tb_num_default = 0;
2418 tb->tb_data = (alias ? alias->__data : tb->__data);
2419
2420 if (alias)
2421 return tb;
2422
2423 t = (struct trie *) tb->tb_data;
2424 t->kv[0].pos = KEYLENGTH;
2425 t->kv[0].slen = KEYLENGTH;
2426 #ifdef CONFIG_IP_FIB_TRIE_STATS
2427 t->stats = alloc_percpu(struct trie_use_stats);
2428 if (!t->stats) {
2429 kfree(tb);
2430 tb = NULL;
2431 }
2432 #endif
2433
2434 return tb;
2435 }
2436
2437 #ifdef CONFIG_PROC_FS
2438
2439 struct fib_trie_iter {
2440 struct seq_net_private p;
2441 struct fib_table *tb;
2442 struct key_vector *tnode;
2443 unsigned int index;
2444 unsigned int depth;
2445 };
2446
2447 static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter)
2448 {
2449 unsigned long cindex = iter->index;
2450 struct key_vector *pn = iter->tnode;
2451 t_key pkey;
2452
2453 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2454 iter->tnode, iter->index, iter->depth);
2455
2456 while (!IS_TRIE(pn)) {
2457 while (cindex < child_length(pn)) {
2458 struct key_vector *n = get_child_rcu(pn, cindex++);
2459
2460 if (!n)
2461 continue;
2462
2463 if (IS_LEAF(n)) {
2464 iter->tnode = pn;
2465 iter->index = cindex;
2466 } else {
2467
2468 iter->tnode = n;
2469 iter->index = 0;
2470 ++iter->depth;
2471 }
2472
2473 return n;
2474 }
2475
2476
2477 pkey = pn->key;
2478 pn = node_parent_rcu(pn);
2479 cindex = get_index(pkey, pn) + 1;
2480 --iter->depth;
2481 }
2482
2483
2484 iter->tnode = pn;
2485 iter->index = 0;
2486
2487 return NULL;
2488 }
2489
2490 static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter,
2491 struct trie *t)
2492 {
2493 struct key_vector *n, *pn;
2494
2495 if (!t)
2496 return NULL;
2497
2498 pn = t->kv;
2499 n = rcu_dereference(pn->tnode[0]);
2500 if (!n)
2501 return NULL;
2502
2503 if (IS_TNODE(n)) {
2504 iter->tnode = n;
2505 iter->index = 0;
2506 iter->depth = 1;
2507 } else {
2508 iter->tnode = pn;
2509 iter->index = 0;
2510 iter->depth = 0;
2511 }
2512
2513 return n;
2514 }
2515
2516 static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2517 {
2518 struct key_vector *n;
2519 struct fib_trie_iter iter;
2520
2521 memset(s, 0, sizeof(*s));
2522
2523 rcu_read_lock();
2524 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
2525 if (IS_LEAF(n)) {
2526 struct fib_alias *fa;
2527
2528 s->leaves++;
2529 s->totdepth += iter.depth;
2530 if (iter.depth > s->maxdepth)
2531 s->maxdepth = iter.depth;
2532
2533 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list)
2534 ++s->prefixes;
2535 } else {
2536 s->tnodes++;
2537 if (n->bits < MAX_STAT_DEPTH)
2538 s->nodesizes[n->bits]++;
2539 s->nullpointers += tn_info(n)->empty_children;
2540 }
2541 }
2542 rcu_read_unlock();
2543 }
2544
2545
2546
2547
2548 static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2549 {
2550 unsigned int i, max, pointers, bytes, avdepth;
2551
2552 if (stat->leaves)
2553 avdepth = stat->totdepth*100 / stat->leaves;
2554 else
2555 avdepth = 0;
2556
2557 seq_printf(seq, "\tAver depth: %u.%02d\n",
2558 avdepth / 100, avdepth % 100);
2559 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
2560
2561 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
2562 bytes = LEAF_SIZE * stat->leaves;
2563
2564 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
2565 bytes += sizeof(struct fib_alias) * stat->prefixes;
2566
2567 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
2568 bytes += TNODE_SIZE(0) * stat->tnodes;
2569
2570 max = MAX_STAT_DEPTH;
2571 while (max > 0 && stat->nodesizes[max-1] == 0)
2572 max--;
2573
2574 pointers = 0;
2575 for (i = 1; i < max; i++)
2576 if (stat->nodesizes[i] != 0) {
2577 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
2578 pointers += (1<<i) * stat->nodesizes[i];
2579 }
2580 seq_putc(seq, '\n');
2581 seq_printf(seq, "\tPointers: %u\n", pointers);
2582
2583 bytes += sizeof(struct key_vector *) * pointers;
2584 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2585 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
2586 }
2587
2588 #ifdef CONFIG_IP_FIB_TRIE_STATS
2589 static void trie_show_usage(struct seq_file *seq,
2590 const struct trie_use_stats __percpu *stats)
2591 {
2592 struct trie_use_stats s = { 0 };
2593 int cpu;
2594
2595
2596 for_each_possible_cpu(cpu) {
2597 const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);
2598
2599 s.gets += pcpu->gets;
2600 s.backtrack += pcpu->backtrack;
2601 s.semantic_match_passed += pcpu->semantic_match_passed;
2602 s.semantic_match_miss += pcpu->semantic_match_miss;
2603 s.null_node_hit += pcpu->null_node_hit;
2604 s.resize_node_skipped += pcpu->resize_node_skipped;
2605 }
2606
2607 seq_printf(seq, "\nCounters:\n---------\n");
2608 seq_printf(seq, "gets = %u\n", s.gets);
2609 seq_printf(seq, "backtracks = %u\n", s.backtrack);
2610 seq_printf(seq, "semantic match passed = %u\n",
2611 s.semantic_match_passed);
2612 seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
2613 seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
2614 seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
2615 }
2616 #endif
2617
2618 static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
2619 {
2620 if (tb->tb_id == RT_TABLE_LOCAL)
2621 seq_puts(seq, "Local:\n");
2622 else if (tb->tb_id == RT_TABLE_MAIN)
2623 seq_puts(seq, "Main:\n");
2624 else
2625 seq_printf(seq, "Id %d:\n", tb->tb_id);
2626 }
2627
2628
2629 static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2630 {
2631 struct net *net = seq->private;
2632 unsigned int h;
2633
2634 seq_printf(seq,
2635 "Basic info: size of leaf:"
2636 " %zd bytes, size of tnode: %zd bytes.\n",
2637 LEAF_SIZE, TNODE_SIZE(0));
2638
2639 rcu_read_lock();
2640 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2641 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2642 struct fib_table *tb;
2643
2644 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2645 struct trie *t = (struct trie *) tb->tb_data;
2646 struct trie_stat stat;
2647
2648 if (!t)
2649 continue;
2650
2651 fib_table_print(seq, tb);
2652
2653 trie_collect_stats(t, &stat);
2654 trie_show_stats(seq, &stat);
2655 #ifdef CONFIG_IP_FIB_TRIE_STATS
2656 trie_show_usage(seq, t->stats);
2657 #endif
2658 }
2659 cond_resched_rcu();
2660 }
2661 rcu_read_unlock();
2662
2663 return 0;
2664 }
2665
2666 static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
2667 {
2668 struct fib_trie_iter *iter = seq->private;
2669 struct net *net = seq_file_net(seq);
2670 loff_t idx = 0;
2671 unsigned int h;
2672
2673 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2674 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2675 struct fib_table *tb;
2676
2677 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2678 struct key_vector *n;
2679
2680 for (n = fib_trie_get_first(iter,
2681 (struct trie *) tb->tb_data);
2682 n; n = fib_trie_get_next(iter))
2683 if (pos == idx++) {
2684 iter->tb = tb;
2685 return n;
2686 }
2687 }
2688 }
2689
2690 return NULL;
2691 }
2692
2693 static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2694 __acquires(RCU)
2695 {
2696 rcu_read_lock();
2697 return fib_trie_get_idx(seq, *pos);
2698 }
2699
2700 static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2701 {
2702 struct fib_trie_iter *iter = seq->private;
2703 struct net *net = seq_file_net(seq);
2704 struct fib_table *tb = iter->tb;
2705 struct hlist_node *tb_node;
2706 unsigned int h;
2707 struct key_vector *n;
2708
2709 ++*pos;
2710
2711 n = fib_trie_get_next(iter);
2712 if (n)
2713 return n;
2714
2715
2716 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2717 while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
2718 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2719 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2720 if (n)
2721 goto found;
2722 }
2723
2724
2725 while (++h < FIB_TABLE_HASHSZ) {
2726 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2727 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
2728 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2729 if (n)
2730 goto found;
2731 }
2732 }
2733 return NULL;
2734
2735 found:
2736 iter->tb = tb;
2737 return n;
2738 }
2739
2740 static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2741 __releases(RCU)
2742 {
2743 rcu_read_unlock();
2744 }
2745
2746 static void seq_indent(struct seq_file *seq, int n)
2747 {
2748 while (n-- > 0)
2749 seq_puts(seq, " ");
2750 }
2751
2752 static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
2753 {
2754 switch (s) {
2755 case RT_SCOPE_UNIVERSE: return "universe";
2756 case RT_SCOPE_SITE: return "site";
2757 case RT_SCOPE_LINK: return "link";
2758 case RT_SCOPE_HOST: return "host";
2759 case RT_SCOPE_NOWHERE: return "nowhere";
2760 default:
2761 snprintf(buf, len, "scope=%d", s);
2762 return buf;
2763 }
2764 }
2765
2766 static const char *const rtn_type_names[__RTN_MAX] = {
2767 [RTN_UNSPEC] = "UNSPEC",
2768 [RTN_UNICAST] = "UNICAST",
2769 [RTN_LOCAL] = "LOCAL",
2770 [RTN_BROADCAST] = "BROADCAST",
2771 [RTN_ANYCAST] = "ANYCAST",
2772 [RTN_MULTICAST] = "MULTICAST",
2773 [RTN_BLACKHOLE] = "BLACKHOLE",
2774 [RTN_UNREACHABLE] = "UNREACHABLE",
2775 [RTN_PROHIBIT] = "PROHIBIT",
2776 [RTN_THROW] = "THROW",
2777 [RTN_NAT] = "NAT",
2778 [RTN_XRESOLVE] = "XRESOLVE",
2779 };
2780
2781 static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2782 {
2783 if (t < __RTN_MAX && rtn_type_names[t])
2784 return rtn_type_names[t];
2785 snprintf(buf, len, "type %u", t);
2786 return buf;
2787 }
2788
2789
2790 static int fib_trie_seq_show(struct seq_file *seq, void *v)
2791 {
2792 const struct fib_trie_iter *iter = seq->private;
2793 struct key_vector *n = v;
2794
2795 if (IS_TRIE(node_parent_rcu(n)))
2796 fib_table_print(seq, iter->tb);
2797
2798 if (IS_TNODE(n)) {
2799 __be32 prf = htonl(n->key);
2800
2801 seq_indent(seq, iter->depth-1);
2802 seq_printf(seq, " +-- %pI4/%zu %u %u %u\n",
2803 &prf, KEYLENGTH - n->pos - n->bits, n->bits,
2804 tn_info(n)->full_children,
2805 tn_info(n)->empty_children);
2806 } else {
2807 __be32 val = htonl(n->key);
2808 struct fib_alias *fa;
2809
2810 seq_indent(seq, iter->depth);
2811 seq_printf(seq, " |-- %pI4\n", &val);
2812
2813 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
2814 char buf1[32], buf2[32];
2815
2816 seq_indent(seq, iter->depth + 1);
2817 seq_printf(seq, " /%zu %s %s",
2818 KEYLENGTH - fa->fa_slen,
2819 rtn_scope(buf1, sizeof(buf1),
2820 fa->fa_info->fib_scope),
2821 rtn_type(buf2, sizeof(buf2),
2822 fa->fa_type));
2823 if (fa->fa_dscp)
2824 seq_printf(seq, " tos=%d",
2825 inet_dscp_to_dsfield(fa->fa_dscp));
2826 seq_putc(seq, '\n');
2827 }
2828 }
2829
2830 return 0;
2831 }
2832
2833 static const struct seq_operations fib_trie_seq_ops = {
2834 .start = fib_trie_seq_start,
2835 .next = fib_trie_seq_next,
2836 .stop = fib_trie_seq_stop,
2837 .show = fib_trie_seq_show,
2838 };
2839
2840 struct fib_route_iter {
2841 struct seq_net_private p;
2842 struct fib_table *main_tb;
2843 struct key_vector *tnode;
2844 loff_t pos;
2845 t_key key;
2846 };
2847
2848 static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
2849 loff_t pos)
2850 {
2851 struct key_vector *l, **tp = &iter->tnode;
2852 t_key key;
2853
2854
2855 if (iter->pos > 0 && pos >= iter->pos) {
2856 key = iter->key;
2857 } else {
2858 iter->pos = 1;
2859 key = 0;
2860 }
2861
2862 pos -= iter->pos;
2863
2864 while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
2865 key = l->key + 1;
2866 iter->pos++;
2867 l = NULL;
2868
2869
2870 if (!key)
2871 break;
2872 }
2873
2874 if (l)
2875 iter->key = l->key;
2876 else
2877 iter->pos = 0;
2878
2879 return l;
2880 }
2881
2882 static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2883 __acquires(RCU)
2884 {
2885 struct fib_route_iter *iter = seq->private;
2886 struct fib_table *tb;
2887 struct trie *t;
2888
2889 rcu_read_lock();
2890
2891 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
2892 if (!tb)
2893 return NULL;
2894
2895 iter->main_tb = tb;
2896 t = (struct trie *)tb->tb_data;
2897 iter->tnode = t->kv;
2898
2899 if (*pos != 0)
2900 return fib_route_get_idx(iter, *pos);
2901
2902 iter->pos = 0;
2903 iter->key = KEY_MAX;
2904
2905 return SEQ_START_TOKEN;
2906 }
2907
2908 static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2909 {
2910 struct fib_route_iter *iter = seq->private;
2911 struct key_vector *l = NULL;
2912 t_key key = iter->key + 1;
2913
2914 ++*pos;
2915
2916
2917 if ((v == SEQ_START_TOKEN) || key)
2918 l = leaf_walk_rcu(&iter->tnode, key);
2919
2920 if (l) {
2921 iter->key = l->key;
2922 iter->pos++;
2923 } else {
2924 iter->pos = 0;
2925 }
2926
2927 return l;
2928 }
2929
2930 static void fib_route_seq_stop(struct seq_file *seq, void *v)
2931 __releases(RCU)
2932 {
2933 rcu_read_unlock();
2934 }
2935
2936 static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
2937 {
2938 unsigned int flags = 0;
2939
2940 if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2941 flags = RTF_REJECT;
2942 if (fi) {
2943 const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
2944
2945 if (nhc->nhc_gw.ipv4)
2946 flags |= RTF_GATEWAY;
2947 }
2948 if (mask == htonl(0xFFFFFFFF))
2949 flags |= RTF_HOST;
2950 flags |= RTF_UP;
2951 return flags;
2952 }
2953
2954
2955
2956
2957
2958
2959
2960 static int fib_route_seq_show(struct seq_file *seq, void *v)
2961 {
2962 struct fib_route_iter *iter = seq->private;
2963 struct fib_table *tb = iter->main_tb;
2964 struct fib_alias *fa;
2965 struct key_vector *l = v;
2966 __be32 prefix;
2967
2968 if (v == SEQ_START_TOKEN) {
2969 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2970 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2971 "\tWindow\tIRTT");
2972 return 0;
2973 }
2974
2975 prefix = htonl(l->key);
2976
2977 hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
2978 struct fib_info *fi = fa->fa_info;
2979 __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
2980 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
2981
2982 if ((fa->fa_type == RTN_BROADCAST) ||
2983 (fa->fa_type == RTN_MULTICAST))
2984 continue;
2985
2986 if (fa->tb_id != tb->tb_id)
2987 continue;
2988
2989 seq_setwidth(seq, 127);
2990
2991 if (fi) {
2992 struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
2993 __be32 gw = 0;
2994
2995 if (nhc->nhc_gw_family == AF_INET)
2996 gw = nhc->nhc_gw.ipv4;
2997
2998 seq_printf(seq,
2999 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
3000 "%d\t%08X\t%d\t%u\t%u",
3001 nhc->nhc_dev ? nhc->nhc_dev->name : "*",
3002 prefix, gw, flags, 0, 0,
3003 fi->fib_priority,
3004 mask,
3005 (fi->fib_advmss ?
3006 fi->fib_advmss + 40 : 0),
3007 fi->fib_window,
3008 fi->fib_rtt >> 3);
3009 } else {
3010 seq_printf(seq,
3011 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
3012 "%d\t%08X\t%d\t%u\t%u",
3013 prefix, 0, flags, 0, 0, 0,
3014 mask, 0, 0, 0);
3015 }
3016 seq_pad(seq, '\n');
3017 }
3018
3019 return 0;
3020 }
3021
3022 static const struct seq_operations fib_route_seq_ops = {
3023 .start = fib_route_seq_start,
3024 .next = fib_route_seq_next,
3025 .stop = fib_route_seq_stop,
3026 .show = fib_route_seq_show,
3027 };
3028
3029 int __net_init fib_proc_init(struct net *net)
3030 {
3031 if (!proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops,
3032 sizeof(struct fib_trie_iter)))
3033 goto out1;
3034
3035 if (!proc_create_net_single("fib_triestat", 0444, net->proc_net,
3036 fib_triestat_seq_show, NULL))
3037 goto out2;
3038
3039 if (!proc_create_net("route", 0444, net->proc_net, &fib_route_seq_ops,
3040 sizeof(struct fib_route_iter)))
3041 goto out3;
3042
3043 return 0;
3044
3045 out3:
3046 remove_proc_entry("fib_triestat", net->proc_net);
3047 out2:
3048 remove_proc_entry("fib_trie", net->proc_net);
3049 out1:
3050 return -ENOMEM;
3051 }
3052
3053 void __net_exit fib_proc_exit(struct net *net)
3054 {
3055 remove_proc_entry("fib_trie", net->proc_net);
3056 remove_proc_entry("fib_triestat", net->proc_net);
3057 remove_proc_entry("route", net->proc_net);
3058 }
3059
3060 #endif