0001
0002
0003
0004
0005
0006
0007
0008 #include <linux/jiffies.h>
0009 #include <linux/module.h>
0010 #include <linux/skbuff.h>
0011 #include <linux/vmalloc.h>
0012 #include <linux/siphash.h>
0013 #include <net/pkt_sched.h>
0014 #include <net/sock.h>
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097 #define HH_FLOWS_CNT 1024
0098 #define HHF_ARRAYS_CNT 4
0099 #define HHF_ARRAYS_LEN 1024
0100 #define HHF_BIT_MASK_LEN 10
0101 #define HHF_BIT_MASK 0x3FF
0102
0103 #define WDRR_BUCKET_CNT 2
0104 enum wdrr_bucket_idx {
0105 WDRR_BUCKET_FOR_HH = 0,
0106 WDRR_BUCKET_FOR_NON_HH = 1
0107 };
0108
0109 #define hhf_time_before(a, b) \
0110 (typecheck(u32, a) && typecheck(u32, b) && ((s32)((a) - (b)) < 0))
0111
0112
0113 struct hh_flow_state {
0114 u32 hash_id;
0115 u32 hit_timestamp;
0116 struct list_head flowchain;
0117 };
0118
0119
0120 struct wdrr_bucket {
0121 struct sk_buff *head;
0122 struct sk_buff *tail;
0123 struct list_head bucketchain;
0124 int deficit;
0125 };
0126
0127 struct hhf_sched_data {
0128 struct wdrr_bucket buckets[WDRR_BUCKET_CNT];
0129 siphash_key_t perturbation;
0130 u32 quantum;
0131 u32 drop_overlimit;
0132
0133
0134 struct list_head *hh_flows;
0135 u32 hh_flows_limit;
0136 u32 hh_flows_overlimit;
0137 u32 hh_flows_total_cnt;
0138 u32 hh_flows_current_cnt;
0139 u32 *hhf_arrays[HHF_ARRAYS_CNT];
0140 u32 hhf_arrays_reset_timestamp;
0141
0142
0143 unsigned long *hhf_valid_bits[HHF_ARRAYS_CNT];
0144
0145
0146
0147 struct list_head new_buckets;
0148 struct list_head old_buckets;
0149
0150
0151 u32 hhf_reset_timeout;
0152
0153
0154
0155 u32 hhf_admit_bytes;
0156
0157
0158
0159
0160
0161
0162 u32 hhf_evict_timeout;
0163
0164
0165
0166
0167
0168 u32 hhf_non_hh_weight;
0169
0170
0171
0172 };
0173
0174 static u32 hhf_time_stamp(void)
0175 {
0176 return jiffies;
0177 }
0178
0179
0180 static struct hh_flow_state *seek_list(const u32 hash,
0181 struct list_head *head,
0182 struct hhf_sched_data *q)
0183 {
0184 struct hh_flow_state *flow, *next;
0185 u32 now = hhf_time_stamp();
0186
0187 if (list_empty(head))
0188 return NULL;
0189
0190 list_for_each_entry_safe(flow, next, head, flowchain) {
0191 u32 prev = flow->hit_timestamp + q->hhf_evict_timeout;
0192
0193 if (hhf_time_before(prev, now)) {
0194
0195
0196
0197 if (list_is_last(&flow->flowchain, head))
0198 return NULL;
0199 list_del(&flow->flowchain);
0200 kfree(flow);
0201 q->hh_flows_current_cnt--;
0202 } else if (flow->hash_id == hash) {
0203 return flow;
0204 }
0205 }
0206 return NULL;
0207 }
0208
0209
0210
0211
0212 static struct hh_flow_state *alloc_new_hh(struct list_head *head,
0213 struct hhf_sched_data *q)
0214 {
0215 struct hh_flow_state *flow;
0216 u32 now = hhf_time_stamp();
0217
0218 if (!list_empty(head)) {
0219
0220 list_for_each_entry(flow, head, flowchain) {
0221 u32 prev = flow->hit_timestamp + q->hhf_evict_timeout;
0222
0223 if (hhf_time_before(prev, now))
0224 return flow;
0225 }
0226 }
0227
0228 if (q->hh_flows_current_cnt >= q->hh_flows_limit) {
0229 q->hh_flows_overlimit++;
0230 return NULL;
0231 }
0232
0233 flow = kzalloc(sizeof(struct hh_flow_state), GFP_ATOMIC);
0234 if (!flow)
0235 return NULL;
0236
0237 q->hh_flows_current_cnt++;
0238 INIT_LIST_HEAD(&flow->flowchain);
0239 list_add_tail(&flow->flowchain, head);
0240
0241 return flow;
0242 }
0243
0244
0245
0246
0247 static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
0248 {
0249 struct hhf_sched_data *q = qdisc_priv(sch);
0250 u32 tmp_hash, hash;
0251 u32 xorsum, filter_pos[HHF_ARRAYS_CNT], flow_pos;
0252 struct hh_flow_state *flow;
0253 u32 pkt_len, min_hhf_val;
0254 int i;
0255 u32 prev;
0256 u32 now = hhf_time_stamp();
0257
0258
0259 prev = q->hhf_arrays_reset_timestamp + q->hhf_reset_timeout;
0260 if (hhf_time_before(prev, now)) {
0261 for (i = 0; i < HHF_ARRAYS_CNT; i++)
0262 bitmap_zero(q->hhf_valid_bits[i], HHF_ARRAYS_LEN);
0263 q->hhf_arrays_reset_timestamp = now;
0264 }
0265
0266
0267 hash = skb_get_hash_perturb(skb, &q->perturbation);
0268
0269
0270 flow_pos = hash & HHF_BIT_MASK;
0271 flow = seek_list(hash, &q->hh_flows[flow_pos], q);
0272 if (flow) {
0273 flow->hit_timestamp = now;
0274 return WDRR_BUCKET_FOR_HH;
0275 }
0276
0277
0278 tmp_hash = hash;
0279 xorsum = 0;
0280 for (i = 0; i < HHF_ARRAYS_CNT - 1; i++) {
0281
0282 filter_pos[i] = tmp_hash & HHF_BIT_MASK;
0283 xorsum ^= filter_pos[i];
0284 tmp_hash >>= HHF_BIT_MASK_LEN;
0285 }
0286
0287 filter_pos[HHF_ARRAYS_CNT - 1] = xorsum ^ tmp_hash;
0288
0289 pkt_len = qdisc_pkt_len(skb);
0290 min_hhf_val = ~0U;
0291 for (i = 0; i < HHF_ARRAYS_CNT; i++) {
0292 u32 val;
0293
0294 if (!test_bit(filter_pos[i], q->hhf_valid_bits[i])) {
0295 q->hhf_arrays[i][filter_pos[i]] = 0;
0296 __set_bit(filter_pos[i], q->hhf_valid_bits[i]);
0297 }
0298
0299 val = q->hhf_arrays[i][filter_pos[i]] + pkt_len;
0300 if (min_hhf_val > val)
0301 min_hhf_val = val;
0302 }
0303
0304
0305 if (min_hhf_val > q->hhf_admit_bytes) {
0306
0307 flow = alloc_new_hh(&q->hh_flows[flow_pos], q);
0308 if (!flow)
0309 return WDRR_BUCKET_FOR_NON_HH;
0310 flow->hash_id = hash;
0311 flow->hit_timestamp = now;
0312 q->hh_flows_total_cnt++;
0313
0314
0315
0316
0317 return WDRR_BUCKET_FOR_HH;
0318 }
0319
0320
0321 for (i = 0; i < HHF_ARRAYS_CNT; i++) {
0322 if (q->hhf_arrays[i][filter_pos[i]] < min_hhf_val)
0323 q->hhf_arrays[i][filter_pos[i]] = min_hhf_val;
0324 }
0325 return WDRR_BUCKET_FOR_NON_HH;
0326 }
0327
0328
0329 static struct sk_buff *dequeue_head(struct wdrr_bucket *bucket)
0330 {
0331 struct sk_buff *skb = bucket->head;
0332
0333 bucket->head = skb->next;
0334 skb_mark_not_on_list(skb);
0335 return skb;
0336 }
0337
0338
0339 static void bucket_add(struct wdrr_bucket *bucket, struct sk_buff *skb)
0340 {
0341 if (bucket->head == NULL)
0342 bucket->head = skb;
0343 else
0344 bucket->tail->next = skb;
0345 bucket->tail = skb;
0346 skb->next = NULL;
0347 }
0348
0349 static unsigned int hhf_drop(struct Qdisc *sch, struct sk_buff **to_free)
0350 {
0351 struct hhf_sched_data *q = qdisc_priv(sch);
0352 struct wdrr_bucket *bucket;
0353
0354
0355 bucket = &q->buckets[WDRR_BUCKET_FOR_HH];
0356 if (!bucket->head)
0357 bucket = &q->buckets[WDRR_BUCKET_FOR_NON_HH];
0358
0359 if (bucket->head) {
0360 struct sk_buff *skb = dequeue_head(bucket);
0361
0362 sch->q.qlen--;
0363 qdisc_qstats_backlog_dec(sch, skb);
0364 qdisc_drop(skb, sch, to_free);
0365 }
0366
0367
0368 return bucket - q->buckets;
0369 }
0370
0371 static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
0372 struct sk_buff **to_free)
0373 {
0374 struct hhf_sched_data *q = qdisc_priv(sch);
0375 enum wdrr_bucket_idx idx;
0376 struct wdrr_bucket *bucket;
0377 unsigned int prev_backlog;
0378
0379 idx = hhf_classify(skb, sch);
0380
0381 bucket = &q->buckets[idx];
0382 bucket_add(bucket, skb);
0383 qdisc_qstats_backlog_inc(sch, skb);
0384
0385 if (list_empty(&bucket->bucketchain)) {
0386 unsigned int weight;
0387
0388
0389
0390
0391
0392 if (idx == WDRR_BUCKET_FOR_HH) {
0393
0394 weight = 1;
0395 list_add_tail(&bucket->bucketchain, &q->old_buckets);
0396 } else {
0397 weight = q->hhf_non_hh_weight;
0398 list_add_tail(&bucket->bucketchain, &q->new_buckets);
0399 }
0400 bucket->deficit = weight * q->quantum;
0401 }
0402 if (++sch->q.qlen <= sch->limit)
0403 return NET_XMIT_SUCCESS;
0404
0405 prev_backlog = sch->qstats.backlog;
0406 q->drop_overlimit++;
0407
0408
0409
0410 if (hhf_drop(sch, to_free) == idx)
0411 return NET_XMIT_CN;
0412
0413
0414 qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
0415 return NET_XMIT_SUCCESS;
0416 }
0417
0418 static struct sk_buff *hhf_dequeue(struct Qdisc *sch)
0419 {
0420 struct hhf_sched_data *q = qdisc_priv(sch);
0421 struct sk_buff *skb = NULL;
0422 struct wdrr_bucket *bucket;
0423 struct list_head *head;
0424
0425 begin:
0426 head = &q->new_buckets;
0427 if (list_empty(head)) {
0428 head = &q->old_buckets;
0429 if (list_empty(head))
0430 return NULL;
0431 }
0432 bucket = list_first_entry(head, struct wdrr_bucket, bucketchain);
0433
0434 if (bucket->deficit <= 0) {
0435 int weight = (bucket - q->buckets == WDRR_BUCKET_FOR_HH) ?
0436 1 : q->hhf_non_hh_weight;
0437
0438 bucket->deficit += weight * q->quantum;
0439 list_move_tail(&bucket->bucketchain, &q->old_buckets);
0440 goto begin;
0441 }
0442
0443 if (bucket->head) {
0444 skb = dequeue_head(bucket);
0445 sch->q.qlen--;
0446 qdisc_qstats_backlog_dec(sch, skb);
0447 }
0448
0449 if (!skb) {
0450
0451 if ((head == &q->new_buckets) && !list_empty(&q->old_buckets))
0452 list_move_tail(&bucket->bucketchain, &q->old_buckets);
0453 else
0454 list_del_init(&bucket->bucketchain);
0455 goto begin;
0456 }
0457 qdisc_bstats_update(sch, skb);
0458 bucket->deficit -= qdisc_pkt_len(skb);
0459
0460 return skb;
0461 }
0462
0463 static void hhf_reset(struct Qdisc *sch)
0464 {
0465 struct sk_buff *skb;
0466
0467 while ((skb = hhf_dequeue(sch)) != NULL)
0468 rtnl_kfree_skbs(skb, skb);
0469 }
0470
0471 static void hhf_destroy(struct Qdisc *sch)
0472 {
0473 int i;
0474 struct hhf_sched_data *q = qdisc_priv(sch);
0475
0476 for (i = 0; i < HHF_ARRAYS_CNT; i++) {
0477 kvfree(q->hhf_arrays[i]);
0478 kvfree(q->hhf_valid_bits[i]);
0479 }
0480
0481 if (!q->hh_flows)
0482 return;
0483
0484 for (i = 0; i < HH_FLOWS_CNT; i++) {
0485 struct hh_flow_state *flow, *next;
0486 struct list_head *head = &q->hh_flows[i];
0487
0488 if (list_empty(head))
0489 continue;
0490 list_for_each_entry_safe(flow, next, head, flowchain) {
0491 list_del(&flow->flowchain);
0492 kfree(flow);
0493 }
0494 }
0495 kvfree(q->hh_flows);
0496 }
0497
0498 static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = {
0499 [TCA_HHF_BACKLOG_LIMIT] = { .type = NLA_U32 },
0500 [TCA_HHF_QUANTUM] = { .type = NLA_U32 },
0501 [TCA_HHF_HH_FLOWS_LIMIT] = { .type = NLA_U32 },
0502 [TCA_HHF_RESET_TIMEOUT] = { .type = NLA_U32 },
0503 [TCA_HHF_ADMIT_BYTES] = { .type = NLA_U32 },
0504 [TCA_HHF_EVICT_TIMEOUT] = { .type = NLA_U32 },
0505 [TCA_HHF_NON_HH_WEIGHT] = { .type = NLA_U32 },
0506 };
0507
0508 static int hhf_change(struct Qdisc *sch, struct nlattr *opt,
0509 struct netlink_ext_ack *extack)
0510 {
0511 struct hhf_sched_data *q = qdisc_priv(sch);
0512 struct nlattr *tb[TCA_HHF_MAX + 1];
0513 unsigned int qlen, prev_backlog;
0514 int err;
0515 u64 non_hh_quantum;
0516 u32 new_quantum = q->quantum;
0517 u32 new_hhf_non_hh_weight = q->hhf_non_hh_weight;
0518
0519 if (!opt)
0520 return -EINVAL;
0521
0522 err = nla_parse_nested_deprecated(tb, TCA_HHF_MAX, opt, hhf_policy,
0523 NULL);
0524 if (err < 0)
0525 return err;
0526
0527 if (tb[TCA_HHF_QUANTUM])
0528 new_quantum = nla_get_u32(tb[TCA_HHF_QUANTUM]);
0529
0530 if (tb[TCA_HHF_NON_HH_WEIGHT])
0531 new_hhf_non_hh_weight = nla_get_u32(tb[TCA_HHF_NON_HH_WEIGHT]);
0532
0533 non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight;
0534 if (non_hh_quantum == 0 || non_hh_quantum > INT_MAX)
0535 return -EINVAL;
0536
0537 sch_tree_lock(sch);
0538
0539 if (tb[TCA_HHF_BACKLOG_LIMIT])
0540 sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]);
0541
0542 q->quantum = new_quantum;
0543 q->hhf_non_hh_weight = new_hhf_non_hh_weight;
0544
0545 if (tb[TCA_HHF_HH_FLOWS_LIMIT])
0546 q->hh_flows_limit = nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]);
0547
0548 if (tb[TCA_HHF_RESET_TIMEOUT]) {
0549 u32 us = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]);
0550
0551 q->hhf_reset_timeout = usecs_to_jiffies(us);
0552 }
0553
0554 if (tb[TCA_HHF_ADMIT_BYTES])
0555 q->hhf_admit_bytes = nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]);
0556
0557 if (tb[TCA_HHF_EVICT_TIMEOUT]) {
0558 u32 us = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]);
0559
0560 q->hhf_evict_timeout = usecs_to_jiffies(us);
0561 }
0562
0563 qlen = sch->q.qlen;
0564 prev_backlog = sch->qstats.backlog;
0565 while (sch->q.qlen > sch->limit) {
0566 struct sk_buff *skb = hhf_dequeue(sch);
0567
0568 rtnl_kfree_skbs(skb, skb);
0569 }
0570 qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
0571 prev_backlog - sch->qstats.backlog);
0572
0573 sch_tree_unlock(sch);
0574 return 0;
0575 }
0576
0577 static int hhf_init(struct Qdisc *sch, struct nlattr *opt,
0578 struct netlink_ext_ack *extack)
0579 {
0580 struct hhf_sched_data *q = qdisc_priv(sch);
0581 int i;
0582
0583 sch->limit = 1000;
0584 q->quantum = psched_mtu(qdisc_dev(sch));
0585 get_random_bytes(&q->perturbation, sizeof(q->perturbation));
0586 INIT_LIST_HEAD(&q->new_buckets);
0587 INIT_LIST_HEAD(&q->old_buckets);
0588
0589
0590 q->hhf_reset_timeout = HZ / 25;
0591 q->hhf_admit_bytes = 131072;
0592 q->hhf_evict_timeout = HZ;
0593 q->hhf_non_hh_weight = 2;
0594
0595 if (opt) {
0596 int err = hhf_change(sch, opt, extack);
0597
0598 if (err)
0599 return err;
0600 }
0601
0602 if (!q->hh_flows) {
0603
0604 q->hh_flows = kvcalloc(HH_FLOWS_CNT, sizeof(struct list_head),
0605 GFP_KERNEL);
0606 if (!q->hh_flows)
0607 return -ENOMEM;
0608 for (i = 0; i < HH_FLOWS_CNT; i++)
0609 INIT_LIST_HEAD(&q->hh_flows[i]);
0610
0611
0612 q->hh_flows_limit = 2 * HH_FLOWS_CNT;
0613 q->hh_flows_overlimit = 0;
0614 q->hh_flows_total_cnt = 0;
0615 q->hh_flows_current_cnt = 0;
0616
0617
0618 for (i = 0; i < HHF_ARRAYS_CNT; i++) {
0619 q->hhf_arrays[i] = kvcalloc(HHF_ARRAYS_LEN,
0620 sizeof(u32),
0621 GFP_KERNEL);
0622 if (!q->hhf_arrays[i]) {
0623
0624
0625
0626 return -ENOMEM;
0627 }
0628 }
0629 q->hhf_arrays_reset_timestamp = hhf_time_stamp();
0630
0631
0632 for (i = 0; i < HHF_ARRAYS_CNT; i++) {
0633 q->hhf_valid_bits[i] = kvzalloc(HHF_ARRAYS_LEN /
0634 BITS_PER_BYTE, GFP_KERNEL);
0635 if (!q->hhf_valid_bits[i]) {
0636
0637
0638
0639 return -ENOMEM;
0640 }
0641 }
0642
0643
0644 for (i = 0; i < WDRR_BUCKET_CNT; i++) {
0645 struct wdrr_bucket *bucket = q->buckets + i;
0646
0647 INIT_LIST_HEAD(&bucket->bucketchain);
0648 }
0649 }
0650
0651 return 0;
0652 }
0653
0654 static int hhf_dump(struct Qdisc *sch, struct sk_buff *skb)
0655 {
0656 struct hhf_sched_data *q = qdisc_priv(sch);
0657 struct nlattr *opts;
0658
0659 opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
0660 if (opts == NULL)
0661 goto nla_put_failure;
0662
0663 if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, sch->limit) ||
0664 nla_put_u32(skb, TCA_HHF_QUANTUM, q->quantum) ||
0665 nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, q->hh_flows_limit) ||
0666 nla_put_u32(skb, TCA_HHF_RESET_TIMEOUT,
0667 jiffies_to_usecs(q->hhf_reset_timeout)) ||
0668 nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, q->hhf_admit_bytes) ||
0669 nla_put_u32(skb, TCA_HHF_EVICT_TIMEOUT,
0670 jiffies_to_usecs(q->hhf_evict_timeout)) ||
0671 nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight))
0672 goto nla_put_failure;
0673
0674 return nla_nest_end(skb, opts);
0675
0676 nla_put_failure:
0677 return -1;
0678 }
0679
0680 static int hhf_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
0681 {
0682 struct hhf_sched_data *q = qdisc_priv(sch);
0683 struct tc_hhf_xstats st = {
0684 .drop_overlimit = q->drop_overlimit,
0685 .hh_overlimit = q->hh_flows_overlimit,
0686 .hh_tot_count = q->hh_flows_total_cnt,
0687 .hh_cur_count = q->hh_flows_current_cnt,
0688 };
0689
0690 return gnet_stats_copy_app(d, &st, sizeof(st));
0691 }
0692
0693 static struct Qdisc_ops hhf_qdisc_ops __read_mostly = {
0694 .id = "hhf",
0695 .priv_size = sizeof(struct hhf_sched_data),
0696
0697 .enqueue = hhf_enqueue,
0698 .dequeue = hhf_dequeue,
0699 .peek = qdisc_peek_dequeued,
0700 .init = hhf_init,
0701 .reset = hhf_reset,
0702 .destroy = hhf_destroy,
0703 .change = hhf_change,
0704 .dump = hhf_dump,
0705 .dump_stats = hhf_dump_stats,
0706 .owner = THIS_MODULE,
0707 };
0708
0709 static int __init hhf_module_init(void)
0710 {
0711 return register_qdisc(&hhf_qdisc_ops);
0712 }
0713
0714 static void __exit hhf_module_exit(void)
0715 {
0716 unregister_qdisc(&hhf_qdisc_ops);
0717 }
0718
0719 module_init(hhf_module_init)
0720 module_exit(hhf_module_exit)
0721 MODULE_AUTHOR("Terry Lam");
0722 MODULE_AUTHOR("Nandita Dukkipati");
0723 MODULE_LICENSE("GPL");
0724 MODULE_DESCRIPTION("Heavy-Hitter Filter (HHF)");