0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #include <linux/list.h>
0011 #include <linux/spinlock.h>
0012 #include <linux/module.h>
0013 #include <linux/timer.h>
0014 #include <linux/mm.h>
0015 #include <linux/random.h>
0016 #include <linux/skbuff.h>
0017 #include <linux/rtnetlink.h>
0018 #include <linux/slab.h>
0019 #include <linux/rhashtable.h>
0020
0021 #include <net/sock.h>
0022 #include <net/inet_frag.h>
0023 #include <net/inet_ecn.h>
0024 #include <net/ip.h>
0025 #include <net/ipv6.h>
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035 struct ipfrag_skb_cb {
0036 union {
0037 struct inet_skb_parm h4;
0038 struct inet6_skb_parm h6;
0039 };
0040 struct sk_buff *next_frag;
0041 int frag_run_len;
0042 };
0043
0044 #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
0045
0046 static void fragcb_clear(struct sk_buff *skb)
0047 {
0048 RB_CLEAR_NODE(&skb->rbnode);
0049 FRAG_CB(skb)->next_frag = NULL;
0050 FRAG_CB(skb)->frag_run_len = skb->len;
0051 }
0052
0053
0054 static void fragrun_append_to_last(struct inet_frag_queue *q,
0055 struct sk_buff *skb)
0056 {
0057 fragcb_clear(skb);
0058
0059 FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
0060 FRAG_CB(q->fragments_tail)->next_frag = skb;
0061 q->fragments_tail = skb;
0062 }
0063
0064
0065 static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb)
0066 {
0067 BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
0068 fragcb_clear(skb);
0069
0070 if (q->last_run_head)
0071 rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
0072 &q->last_run_head->rbnode.rb_right);
0073 else
0074 rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
0075 rb_insert_color(&skb->rbnode, &q->rb_fragments);
0076
0077 q->fragments_tail = skb;
0078 q->last_run_head = skb;
0079 }
0080
0081
0082
0083
0084
0085 const u8 ip_frag_ecn_table[16] = {
0086
0087 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
0088 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
0089 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
0090
0091
0092 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
0093 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
0094 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
0095 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
0096 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
0097 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
0098 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
0099 };
0100 EXPORT_SYMBOL(ip_frag_ecn_table);
0101
0102 int inet_frags_init(struct inet_frags *f)
0103 {
0104 f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
0105 NULL);
0106 if (!f->frags_cachep)
0107 return -ENOMEM;
0108
0109 refcount_set(&f->refcnt, 1);
0110 init_completion(&f->completion);
0111 return 0;
0112 }
0113 EXPORT_SYMBOL(inet_frags_init);
0114
0115 void inet_frags_fini(struct inet_frags *f)
0116 {
0117 if (refcount_dec_and_test(&f->refcnt))
0118 complete(&f->completion);
0119
0120 wait_for_completion(&f->completion);
0121
0122 kmem_cache_destroy(f->frags_cachep);
0123 f->frags_cachep = NULL;
0124 }
0125 EXPORT_SYMBOL(inet_frags_fini);
0126
0127
0128 static void inet_frags_free_cb(void *ptr, void *arg)
0129 {
0130 struct inet_frag_queue *fq = ptr;
0131 int count;
0132
0133 count = del_timer_sync(&fq->timer) ? 1 : 0;
0134
0135 spin_lock_bh(&fq->lock);
0136 if (!(fq->flags & INET_FRAG_COMPLETE)) {
0137 fq->flags |= INET_FRAG_COMPLETE;
0138 count++;
0139 } else if (fq->flags & INET_FRAG_HASH_DEAD) {
0140 count++;
0141 }
0142 spin_unlock_bh(&fq->lock);
0143
0144 if (refcount_sub_and_test(count, &fq->refcnt))
0145 inet_frag_destroy(fq);
0146 }
0147
0148 static LLIST_HEAD(fqdir_free_list);
0149
0150 static void fqdir_free_fn(struct work_struct *work)
0151 {
0152 struct llist_node *kill_list;
0153 struct fqdir *fqdir, *tmp;
0154 struct inet_frags *f;
0155
0156
0157 kill_list = llist_del_all(&fqdir_free_list);
0158
0159
0160
0161
0162
0163 rcu_barrier();
0164
0165 llist_for_each_entry_safe(fqdir, tmp, kill_list, free_list) {
0166 f = fqdir->f;
0167 if (refcount_dec_and_test(&f->refcnt))
0168 complete(&f->completion);
0169
0170 kfree(fqdir);
0171 }
0172 }
0173
0174 static DECLARE_WORK(fqdir_free_work, fqdir_free_fn);
0175
0176 static void fqdir_work_fn(struct work_struct *work)
0177 {
0178 struct fqdir *fqdir = container_of(work, struct fqdir, destroy_work);
0179
0180 rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL);
0181
0182 if (llist_add(&fqdir->free_list, &fqdir_free_list))
0183 queue_work(system_wq, &fqdir_free_work);
0184 }
0185
0186 int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net)
0187 {
0188 struct fqdir *fqdir = kzalloc(sizeof(*fqdir), GFP_KERNEL);
0189 int res;
0190
0191 if (!fqdir)
0192 return -ENOMEM;
0193 fqdir->f = f;
0194 fqdir->net = net;
0195 res = rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params);
0196 if (res < 0) {
0197 kfree(fqdir);
0198 return res;
0199 }
0200 refcount_inc(&f->refcnt);
0201 *fqdirp = fqdir;
0202 return 0;
0203 }
0204 EXPORT_SYMBOL(fqdir_init);
0205
0206 static struct workqueue_struct *inet_frag_wq;
0207
0208 static int __init inet_frag_wq_init(void)
0209 {
0210 inet_frag_wq = create_workqueue("inet_frag_wq");
0211 if (!inet_frag_wq)
0212 panic("Could not create inet frag workq");
0213 return 0;
0214 }
0215
0216 pure_initcall(inet_frag_wq_init);
0217
0218 void fqdir_exit(struct fqdir *fqdir)
0219 {
0220 INIT_WORK(&fqdir->destroy_work, fqdir_work_fn);
0221 queue_work(inet_frag_wq, &fqdir->destroy_work);
0222 }
0223 EXPORT_SYMBOL(fqdir_exit);
0224
0225 void inet_frag_kill(struct inet_frag_queue *fq)
0226 {
0227 if (del_timer(&fq->timer))
0228 refcount_dec(&fq->refcnt);
0229
0230 if (!(fq->flags & INET_FRAG_COMPLETE)) {
0231 struct fqdir *fqdir = fq->fqdir;
0232
0233 fq->flags |= INET_FRAG_COMPLETE;
0234 rcu_read_lock();
0235
0236
0237
0238
0239
0240 if (!READ_ONCE(fqdir->dead)) {
0241 rhashtable_remove_fast(&fqdir->rhashtable, &fq->node,
0242 fqdir->f->rhash_params);
0243 refcount_dec(&fq->refcnt);
0244 } else {
0245 fq->flags |= INET_FRAG_HASH_DEAD;
0246 }
0247 rcu_read_unlock();
0248 }
0249 }
0250 EXPORT_SYMBOL(inet_frag_kill);
0251
0252 static void inet_frag_destroy_rcu(struct rcu_head *head)
0253 {
0254 struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
0255 rcu);
0256 struct inet_frags *f = q->fqdir->f;
0257
0258 if (f->destructor)
0259 f->destructor(q);
0260 kmem_cache_free(f->frags_cachep, q);
0261 }
0262
0263 unsigned int inet_frag_rbtree_purge(struct rb_root *root)
0264 {
0265 struct rb_node *p = rb_first(root);
0266 unsigned int sum = 0;
0267
0268 while (p) {
0269 struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
0270
0271 p = rb_next(p);
0272 rb_erase(&skb->rbnode, root);
0273 while (skb) {
0274 struct sk_buff *next = FRAG_CB(skb)->next_frag;
0275
0276 sum += skb->truesize;
0277 kfree_skb(skb);
0278 skb = next;
0279 }
0280 }
0281 return sum;
0282 }
0283 EXPORT_SYMBOL(inet_frag_rbtree_purge);
0284
0285 void inet_frag_destroy(struct inet_frag_queue *q)
0286 {
0287 struct fqdir *fqdir;
0288 unsigned int sum, sum_truesize = 0;
0289 struct inet_frags *f;
0290
0291 WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
0292 WARN_ON(del_timer(&q->timer) != 0);
0293
0294
0295 fqdir = q->fqdir;
0296 f = fqdir->f;
0297 sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
0298 sum = sum_truesize + f->qsize;
0299
0300 call_rcu(&q->rcu, inet_frag_destroy_rcu);
0301
0302 sub_frag_mem_limit(fqdir, sum);
0303 }
0304 EXPORT_SYMBOL(inet_frag_destroy);
0305
0306 static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
0307 struct inet_frags *f,
0308 void *arg)
0309 {
0310 struct inet_frag_queue *q;
0311
0312 q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
0313 if (!q)
0314 return NULL;
0315
0316 q->fqdir = fqdir;
0317 f->constructor(q, arg);
0318 add_frag_mem_limit(fqdir, f->qsize);
0319
0320 timer_setup(&q->timer, f->frag_expire, 0);
0321 spin_lock_init(&q->lock);
0322 refcount_set(&q->refcnt, 3);
0323
0324 return q;
0325 }
0326
0327 static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
0328 void *arg,
0329 struct inet_frag_queue **prev)
0330 {
0331 struct inet_frags *f = fqdir->f;
0332 struct inet_frag_queue *q;
0333
0334 q = inet_frag_alloc(fqdir, f, arg);
0335 if (!q) {
0336 *prev = ERR_PTR(-ENOMEM);
0337 return NULL;
0338 }
0339 mod_timer(&q->timer, jiffies + fqdir->timeout);
0340
0341 *prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key,
0342 &q->node, f->rhash_params);
0343 if (*prev) {
0344 q->flags |= INET_FRAG_COMPLETE;
0345 inet_frag_kill(q);
0346 inet_frag_destroy(q);
0347 return NULL;
0348 }
0349 return q;
0350 }
0351
0352
0353 struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
0354 {
0355
0356 long high_thresh = READ_ONCE(fqdir->high_thresh);
0357 struct inet_frag_queue *fq = NULL, *prev;
0358
0359 if (!high_thresh || frag_mem_limit(fqdir) > high_thresh)
0360 return NULL;
0361
0362 rcu_read_lock();
0363
0364 prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params);
0365 if (!prev)
0366 fq = inet_frag_create(fqdir, key, &prev);
0367 if (!IS_ERR_OR_NULL(prev)) {
0368 fq = prev;
0369 if (!refcount_inc_not_zero(&fq->refcnt))
0370 fq = NULL;
0371 }
0372 rcu_read_unlock();
0373 return fq;
0374 }
0375 EXPORT_SYMBOL(inet_frag_find);
0376
0377 int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
0378 int offset, int end)
0379 {
0380 struct sk_buff *last = q->fragments_tail;
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391 if (!last)
0392 fragrun_create(q, skb);
0393 else if (last->ip_defrag_offset + last->len < end) {
0394
0395
0396 if (offset < last->ip_defrag_offset + last->len)
0397 return IPFRAG_OVERLAP;
0398 if (offset == last->ip_defrag_offset + last->len)
0399 fragrun_append_to_last(q, skb);
0400 else
0401 fragrun_create(q, skb);
0402 } else {
0403
0404
0405
0406 struct rb_node **rbn, *parent;
0407
0408 rbn = &q->rb_fragments.rb_node;
0409 do {
0410 struct sk_buff *curr;
0411 int curr_run_end;
0412
0413 parent = *rbn;
0414 curr = rb_to_skb(parent);
0415 curr_run_end = curr->ip_defrag_offset +
0416 FRAG_CB(curr)->frag_run_len;
0417 if (end <= curr->ip_defrag_offset)
0418 rbn = &parent->rb_left;
0419 else if (offset >= curr_run_end)
0420 rbn = &parent->rb_right;
0421 else if (offset >= curr->ip_defrag_offset &&
0422 end <= curr_run_end)
0423 return IPFRAG_DUP;
0424 else
0425 return IPFRAG_OVERLAP;
0426 } while (*rbn);
0427
0428
0429
0430 fragcb_clear(skb);
0431 rb_link_node(&skb->rbnode, parent, rbn);
0432 rb_insert_color(&skb->rbnode, &q->rb_fragments);
0433 }
0434
0435 skb->ip_defrag_offset = offset;
0436
0437 return IPFRAG_OK;
0438 }
0439 EXPORT_SYMBOL(inet_frag_queue_insert);
0440
0441 void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
0442 struct sk_buff *parent)
0443 {
0444 struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
0445 struct sk_buff **nextp;
0446 int delta;
0447
0448 if (head != skb) {
0449 fp = skb_clone(skb, GFP_ATOMIC);
0450 if (!fp)
0451 return NULL;
0452 FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
0453 if (RB_EMPTY_NODE(&skb->rbnode))
0454 FRAG_CB(parent)->next_frag = fp;
0455 else
0456 rb_replace_node(&skb->rbnode, &fp->rbnode,
0457 &q->rb_fragments);
0458 if (q->fragments_tail == skb)
0459 q->fragments_tail = fp;
0460 skb_morph(skb, head);
0461 FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
0462 rb_replace_node(&head->rbnode, &skb->rbnode,
0463 &q->rb_fragments);
0464 consume_skb(head);
0465 head = skb;
0466 }
0467 WARN_ON(head->ip_defrag_offset != 0);
0468
0469 delta = -head->truesize;
0470
0471
0472 if (skb_unclone(head, GFP_ATOMIC))
0473 return NULL;
0474
0475 delta += head->truesize;
0476 if (delta)
0477 add_frag_mem_limit(q->fqdir, delta);
0478
0479
0480
0481
0482
0483 if (skb_has_frag_list(head)) {
0484 struct sk_buff *clone;
0485 int i, plen = 0;
0486
0487 clone = alloc_skb(0, GFP_ATOMIC);
0488 if (!clone)
0489 return NULL;
0490 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
0491 skb_frag_list_init(head);
0492 for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
0493 plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
0494 clone->data_len = head->data_len - plen;
0495 clone->len = clone->data_len;
0496 head->truesize += clone->truesize;
0497 clone->csum = 0;
0498 clone->ip_summed = head->ip_summed;
0499 add_frag_mem_limit(q->fqdir, clone->truesize);
0500 skb_shinfo(head)->frag_list = clone;
0501 nextp = &clone->next;
0502 } else {
0503 nextp = &skb_shinfo(head)->frag_list;
0504 }
0505
0506 return nextp;
0507 }
0508 EXPORT_SYMBOL(inet_frag_reasm_prepare);
0509
0510 void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
0511 void *reasm_data, bool try_coalesce)
0512 {
0513 struct sk_buff **nextp = reasm_data;
0514 struct rb_node *rbn;
0515 struct sk_buff *fp;
0516 int sum_truesize;
0517
0518 skb_push(head, head->data - skb_network_header(head));
0519
0520
0521 fp = FRAG_CB(head)->next_frag;
0522 rbn = rb_next(&head->rbnode);
0523 rb_erase(&head->rbnode, &q->rb_fragments);
0524
0525 sum_truesize = head->truesize;
0526 while (rbn || fp) {
0527
0528
0529
0530
0531 while (fp) {
0532 struct sk_buff *next_frag = FRAG_CB(fp)->next_frag;
0533 bool stolen;
0534 int delta;
0535
0536 sum_truesize += fp->truesize;
0537 if (head->ip_summed != fp->ip_summed)
0538 head->ip_summed = CHECKSUM_NONE;
0539 else if (head->ip_summed == CHECKSUM_COMPLETE)
0540 head->csum = csum_add(head->csum, fp->csum);
0541
0542 if (try_coalesce && skb_try_coalesce(head, fp, &stolen,
0543 &delta)) {
0544 kfree_skb_partial(fp, stolen);
0545 } else {
0546 fp->prev = NULL;
0547 memset(&fp->rbnode, 0, sizeof(fp->rbnode));
0548 fp->sk = NULL;
0549
0550 head->data_len += fp->len;
0551 head->len += fp->len;
0552 head->truesize += fp->truesize;
0553
0554 *nextp = fp;
0555 nextp = &fp->next;
0556 }
0557
0558 fp = next_frag;
0559 }
0560
0561 if (rbn) {
0562 struct rb_node *rbnext = rb_next(rbn);
0563
0564 fp = rb_to_skb(rbn);
0565 rb_erase(rbn, &q->rb_fragments);
0566 rbn = rbnext;
0567 }
0568 }
0569 sub_frag_mem_limit(q->fqdir, sum_truesize);
0570
0571 *nextp = NULL;
0572 skb_mark_not_on_list(head);
0573 head->prev = NULL;
0574 head->tstamp = q->stamp;
0575 head->mono_delivery_time = q->mono_delivery_time;
0576 }
0577 EXPORT_SYMBOL(inet_frag_reasm_finish);
0578
0579 struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
0580 {
0581 struct sk_buff *head, *skb;
0582
0583 head = skb_rb_first(&q->rb_fragments);
0584 if (!head)
0585 return NULL;
0586 skb = FRAG_CB(head)->next_frag;
0587 if (skb)
0588 rb_replace_node(&head->rbnode, &skb->rbnode,
0589 &q->rb_fragments);
0590 else
0591 rb_erase(&head->rbnode, &q->rb_fragments);
0592 memset(&head->rbnode, 0, sizeof(head->rbnode));
0593 barrier();
0594
0595 if (head == q->fragments_tail)
0596 q->fragments_tail = NULL;
0597
0598 sub_frag_mem_limit(q->fqdir, head->truesize);
0599
0600 return head;
0601 }
0602 EXPORT_SYMBOL(inet_frag_pull_head);