Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
0002 
0003 /* COMMON Applications Kept Enhanced (CAKE) discipline
0004  *
0005  * Copyright (C) 2014-2018 Jonathan Morton <chromatix99@gmail.com>
0006  * Copyright (C) 2015-2018 Toke Høiland-Jørgensen <toke@toke.dk>
0007  * Copyright (C) 2014-2018 Dave Täht <dave.taht@gmail.com>
0008  * Copyright (C) 2015-2018 Sebastian Moeller <moeller0@gmx.de>
0009  * (C) 2015-2018 Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk>
0010  * Copyright (C) 2017-2018 Ryan Mounce <ryan@mounce.com.au>
0011  *
0012  * The CAKE Principles:
0013  *         (or, how to have your cake and eat it too)
0014  *
0015  * This is a combination of several shaping, AQM and FQ techniques into one
0016  * easy-to-use package:
0017  *
0018  * - An overall bandwidth shaper, to move the bottleneck away from dumb CPE
0019  *   equipment and bloated MACs.  This operates in deficit mode (as in sch_fq),
0020  *   eliminating the need for any sort of burst parameter (eg. token bucket
0021  *   depth).  Burst support is limited to that necessary to overcome scheduling
0022  *   latency.
0023  *
0024  * - A Diffserv-aware priority queue, giving more priority to certain classes,
0025  *   up to a specified fraction of bandwidth.  Above that bandwidth threshold,
0026  *   the priority is reduced to avoid starving other tins.
0027  *
0028  * - Each priority tin has a separate Flow Queue system, to isolate traffic
0029  *   flows from each other.  This prevents a burst on one flow from increasing
0030  *   the delay to another.  Flows are distributed to queues using a
0031  *   set-associative hash function.
0032  *
0033  * - Each queue is actively managed by Cobalt, which is a combination of the
0034  *   Codel and Blue AQM algorithms.  This serves flows fairly, and signals
0035  *   congestion early via ECN (if available) and/or packet drops, to keep
0036  *   latency low.  The codel parameters are auto-tuned based on the bandwidth
0037  *   setting, as is necessary at low bandwidths.
0038  *
0039  * The configuration parameters are kept deliberately simple for ease of use.
0040  * Everything has sane defaults.  Complete generality of configuration is *not*
0041  * a goal.
0042  *
0043  * The priority queue operates according to a weighted DRR scheme, combined with
0044  * a bandwidth tracker which reuses the shaper logic to detect which side of the
0045  * bandwidth sharing threshold the tin is operating.  This determines whether a
0046  * priority-based weight (high) or a bandwidth-based weight (low) is used for
0047  * that tin in the current pass.
0048  *
0049  * This qdisc was inspired by Eric Dumazet's fq_codel code, which he kindly
0050  * granted us permission to leverage.
0051  */
0052 
0053 #include <linux/module.h>
0054 #include <linux/types.h>
0055 #include <linux/kernel.h>
0056 #include <linux/jiffies.h>
0057 #include <linux/string.h>
0058 #include <linux/in.h>
0059 #include <linux/errno.h>
0060 #include <linux/init.h>
0061 #include <linux/skbuff.h>
0062 #include <linux/jhash.h>
0063 #include <linux/slab.h>
0064 #include <linux/vmalloc.h>
0065 #include <linux/reciprocal_div.h>
0066 #include <net/netlink.h>
0067 #include <linux/if_vlan.h>
0068 #include <net/pkt_sched.h>
0069 #include <net/pkt_cls.h>
0070 #include <net/tcp.h>
0071 #include <net/flow_dissector.h>
0072 
0073 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
0074 #include <net/netfilter/nf_conntrack_core.h>
0075 #endif
0076 
0077 #define CAKE_SET_WAYS (8)
0078 #define CAKE_MAX_TINS (8)
0079 #define CAKE_QUEUES (1024)
0080 #define CAKE_FLOW_MASK 63
0081 #define CAKE_FLOW_NAT_FLAG 64
0082 
0083 /* struct cobalt_params - contains codel and blue parameters
0084  * @interval:   codel initial drop rate
0085  * @target:     maximum persistent sojourn time & blue update rate
0086  * @mtu_time:   serialisation delay of maximum-size packet
0087  * @p_inc:      increment of blue drop probability (0.32 fxp)
0088  * @p_dec:      decrement of blue drop probability (0.32 fxp)
0089  */
0090 struct cobalt_params {
0091     u64 interval;
0092     u64 target;
0093     u64 mtu_time;
0094     u32 p_inc;
0095     u32 p_dec;
0096 };
0097 
0098 /* struct cobalt_vars - contains codel and blue variables
0099  * @count:      codel dropping frequency
0100  * @rec_inv_sqrt:   reciprocal value of sqrt(count) >> 1
0101  * @drop_next:      time to drop next packet, or when we dropped last
0102  * @blue_timer:     Blue time to next drop
0103  * @p_drop:     BLUE drop probability (0.32 fxp)
0104  * @dropping:       set if in dropping state
0105  * @ecn_marked:     set if marked
0106  */
0107 struct cobalt_vars {
0108     u32 count;
0109     u32 rec_inv_sqrt;
0110     ktime_t drop_next;
0111     ktime_t blue_timer;
0112     u32     p_drop;
0113     bool    dropping;
0114     bool    ecn_marked;
0115 };
0116 
0117 enum {
0118     CAKE_SET_NONE = 0,
0119     CAKE_SET_SPARSE,
0120     CAKE_SET_SPARSE_WAIT, /* counted in SPARSE, actually in BULK */
0121     CAKE_SET_BULK,
0122     CAKE_SET_DECAYING
0123 };
0124 
0125 struct cake_flow {
0126     /* this stuff is all needed per-flow at dequeue time */
0127     struct sk_buff    *head;
0128     struct sk_buff    *tail;
0129     struct list_head  flowchain;
0130     s32       deficit;
0131     u32       dropped;
0132     struct cobalt_vars cvars;
0133     u16       srchost; /* index into cake_host table */
0134     u16       dsthost;
0135     u8        set;
0136 }; /* please try to keep this structure <= 64 bytes */
0137 
0138 struct cake_host {
0139     u32 srchost_tag;
0140     u32 dsthost_tag;
0141     u16 srchost_bulk_flow_count;
0142     u16 dsthost_bulk_flow_count;
0143 };
0144 
0145 struct cake_heap_entry {
0146     u16 t:3, b:10;
0147 };
0148 
0149 struct cake_tin_data {
0150     struct cake_flow flows[CAKE_QUEUES];
0151     u32 backlogs[CAKE_QUEUES];
0152     u32 tags[CAKE_QUEUES]; /* for set association */
0153     u16 overflow_idx[CAKE_QUEUES];
0154     struct cake_host hosts[CAKE_QUEUES]; /* for triple isolation */
0155     u16 flow_quantum;
0156 
0157     struct cobalt_params cparams;
0158     u32 drop_overlimit;
0159     u16 bulk_flow_count;
0160     u16 sparse_flow_count;
0161     u16 decaying_flow_count;
0162     u16 unresponsive_flow_count;
0163 
0164     u32 max_skblen;
0165 
0166     struct list_head new_flows;
0167     struct list_head old_flows;
0168     struct list_head decaying_flows;
0169 
0170     /* time_next = time_this + ((len * rate_ns) >> rate_shft) */
0171     ktime_t time_next_packet;
0172     u64 tin_rate_ns;
0173     u64 tin_rate_bps;
0174     u16 tin_rate_shft;
0175 
0176     u16 tin_quantum;
0177     s32 tin_deficit;
0178     u32 tin_backlog;
0179     u32 tin_dropped;
0180     u32 tin_ecn_mark;
0181 
0182     u32 packets;
0183     u64 bytes;
0184 
0185     u32 ack_drops;
0186 
0187     /* moving averages */
0188     u64 avge_delay;
0189     u64 peak_delay;
0190     u64 base_delay;
0191 
0192     /* hash function stats */
0193     u32 way_directs;
0194     u32 way_hits;
0195     u32 way_misses;
0196     u32 way_collisions;
0197 }; /* number of tins is small, so size of this struct doesn't matter much */
0198 
0199 struct cake_sched_data {
0200     struct tcf_proto __rcu *filter_list; /* optional external classifier */
0201     struct tcf_block *block;
0202     struct cake_tin_data *tins;
0203 
0204     struct cake_heap_entry overflow_heap[CAKE_QUEUES * CAKE_MAX_TINS];
0205     u16     overflow_timeout;
0206 
0207     u16     tin_cnt;
0208     u8      tin_mode;
0209     u8      flow_mode;
0210     u8      ack_filter;
0211     u8      atm_mode;
0212 
0213     u32     fwmark_mask;
0214     u16     fwmark_shft;
0215 
0216     /* time_next = time_this + ((len * rate_ns) >> rate_shft) */
0217     u16     rate_shft;
0218     ktime_t     time_next_packet;
0219     ktime_t     failsafe_next_packet;
0220     u64     rate_ns;
0221     u64     rate_bps;
0222     u16     rate_flags;
0223     s16     rate_overhead;
0224     u16     rate_mpu;
0225     u64     interval;
0226     u64     target;
0227 
0228     /* resource tracking */
0229     u32     buffer_used;
0230     u32     buffer_max_used;
0231     u32     buffer_limit;
0232     u32     buffer_config_limit;
0233 
0234     /* indices for dequeue */
0235     u16     cur_tin;
0236     u16     cur_flow;
0237 
0238     struct qdisc_watchdog watchdog;
0239     const u8    *tin_index;
0240     const u8    *tin_order;
0241 
0242     /* bandwidth capacity estimate */
0243     ktime_t     last_packet_time;
0244     ktime_t     avg_window_begin;
0245     u64     avg_packet_interval;
0246     u64     avg_window_bytes;
0247     u64     avg_peak_bandwidth;
0248     ktime_t     last_reconfig_time;
0249 
0250     /* packet length stats */
0251     u32     avg_netoff;
0252     u16     max_netlen;
0253     u16     max_adjlen;
0254     u16     min_netlen;
0255     u16     min_adjlen;
0256 };
0257 
0258 enum {
0259     CAKE_FLAG_OVERHEAD     = BIT(0),
0260     CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
0261     CAKE_FLAG_INGRESS      = BIT(2),
0262     CAKE_FLAG_WASH         = BIT(3),
0263     CAKE_FLAG_SPLIT_GSO    = BIT(4)
0264 };
0265 
0266 /* COBALT operates the Codel and BLUE algorithms in parallel, in order to
0267  * obtain the best features of each.  Codel is excellent on flows which
0268  * respond to congestion signals in a TCP-like way.  BLUE is more effective on
0269  * unresponsive flows.
0270  */
0271 
0272 struct cobalt_skb_cb {
0273     ktime_t enqueue_time;
0274     u32     adjusted_len;
0275 };
0276 
0277 static u64 us_to_ns(u64 us)
0278 {
0279     return us * NSEC_PER_USEC;
0280 }
0281 
0282 static struct cobalt_skb_cb *get_cobalt_cb(const struct sk_buff *skb)
0283 {
0284     qdisc_cb_private_validate(skb, sizeof(struct cobalt_skb_cb));
0285     return (struct cobalt_skb_cb *)qdisc_skb_cb(skb)->data;
0286 }
0287 
0288 static ktime_t cobalt_get_enqueue_time(const struct sk_buff *skb)
0289 {
0290     return get_cobalt_cb(skb)->enqueue_time;
0291 }
0292 
0293 static void cobalt_set_enqueue_time(struct sk_buff *skb,
0294                     ktime_t now)
0295 {
0296     get_cobalt_cb(skb)->enqueue_time = now;
0297 }
0298 
0299 static u16 quantum_div[CAKE_QUEUES + 1] = {0};
0300 
0301 /* Diffserv lookup tables */
0302 
0303 static const u8 precedence[] = {
0304     0, 0, 0, 0, 0, 0, 0, 0,
0305     1, 1, 1, 1, 1, 1, 1, 1,
0306     2, 2, 2, 2, 2, 2, 2, 2,
0307     3, 3, 3, 3, 3, 3, 3, 3,
0308     4, 4, 4, 4, 4, 4, 4, 4,
0309     5, 5, 5, 5, 5, 5, 5, 5,
0310     6, 6, 6, 6, 6, 6, 6, 6,
0311     7, 7, 7, 7, 7, 7, 7, 7,
0312 };
0313 
0314 static const u8 diffserv8[] = {
0315     2, 0, 1, 2, 4, 2, 2, 2,
0316     1, 2, 1, 2, 1, 2, 1, 2,
0317     5, 2, 4, 2, 4, 2, 4, 2,
0318     3, 2, 3, 2, 3, 2, 3, 2,
0319     6, 2, 3, 2, 3, 2, 3, 2,
0320     6, 2, 2, 2, 6, 2, 6, 2,
0321     7, 2, 2, 2, 2, 2, 2, 2,
0322     7, 2, 2, 2, 2, 2, 2, 2,
0323 };
0324 
0325 static const u8 diffserv4[] = {
0326     0, 1, 0, 0, 2, 0, 0, 0,
0327     1, 0, 0, 0, 0, 0, 0, 0,
0328     2, 0, 2, 0, 2, 0, 2, 0,
0329     2, 0, 2, 0, 2, 0, 2, 0,
0330     3, 0, 2, 0, 2, 0, 2, 0,
0331     3, 0, 0, 0, 3, 0, 3, 0,
0332     3, 0, 0, 0, 0, 0, 0, 0,
0333     3, 0, 0, 0, 0, 0, 0, 0,
0334 };
0335 
0336 static const u8 diffserv3[] = {
0337     0, 1, 0, 0, 2, 0, 0, 0,
0338     1, 0, 0, 0, 0, 0, 0, 0,
0339     0, 0, 0, 0, 0, 0, 0, 0,
0340     0, 0, 0, 0, 0, 0, 0, 0,
0341     0, 0, 0, 0, 0, 0, 0, 0,
0342     0, 0, 0, 0, 2, 0, 2, 0,
0343     2, 0, 0, 0, 0, 0, 0, 0,
0344     2, 0, 0, 0, 0, 0, 0, 0,
0345 };
0346 
0347 static const u8 besteffort[] = {
0348     0, 0, 0, 0, 0, 0, 0, 0,
0349     0, 0, 0, 0, 0, 0, 0, 0,
0350     0, 0, 0, 0, 0, 0, 0, 0,
0351     0, 0, 0, 0, 0, 0, 0, 0,
0352     0, 0, 0, 0, 0, 0, 0, 0,
0353     0, 0, 0, 0, 0, 0, 0, 0,
0354     0, 0, 0, 0, 0, 0, 0, 0,
0355     0, 0, 0, 0, 0, 0, 0, 0,
0356 };
0357 
0358 /* tin priority order for stats dumping */
0359 
0360 static const u8 normal_order[] = {0, 1, 2, 3, 4, 5, 6, 7};
0361 static const u8 bulk_order[] = {1, 0, 2, 3};
0362 
0363 #define REC_INV_SQRT_CACHE (16)
0364 static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0};
0365 
0366 /* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
0367  * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
0368  *
0369  * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
0370  */
0371 
0372 static void cobalt_newton_step(struct cobalt_vars *vars)
0373 {
0374     u32 invsqrt, invsqrt2;
0375     u64 val;
0376 
0377     invsqrt = vars->rec_inv_sqrt;
0378     invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
0379     val = (3LL << 32) - ((u64)vars->count * invsqrt2);
0380 
0381     val >>= 2; /* avoid overflow in following multiply */
0382     val = (val * invsqrt) >> (32 - 2 + 1);
0383 
0384     vars->rec_inv_sqrt = val;
0385 }
0386 
0387 static void cobalt_invsqrt(struct cobalt_vars *vars)
0388 {
0389     if (vars->count < REC_INV_SQRT_CACHE)
0390         vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
0391     else
0392         cobalt_newton_step(vars);
0393 }
0394 
0395 /* There is a big difference in timing between the accurate values placed in
0396  * the cache and the approximations given by a single Newton step for small
0397  * count values, particularly when stepping from count 1 to 2 or vice versa.
0398  * Above 16, a single Newton step gives sufficient accuracy in either
0399  * direction, given the precision stored.
0400  *
0401  * The magnitude of the error when stepping up to count 2 is such as to give
0402  * the value that *should* have been produced at count 4.
0403  */
0404 
0405 static void cobalt_cache_init(void)
0406 {
0407     struct cobalt_vars v;
0408 
0409     memset(&v, 0, sizeof(v));
0410     v.rec_inv_sqrt = ~0U;
0411     cobalt_rec_inv_sqrt_cache[0] = v.rec_inv_sqrt;
0412 
0413     for (v.count = 1; v.count < REC_INV_SQRT_CACHE; v.count++) {
0414         cobalt_newton_step(&v);
0415         cobalt_newton_step(&v);
0416         cobalt_newton_step(&v);
0417         cobalt_newton_step(&v);
0418 
0419         cobalt_rec_inv_sqrt_cache[v.count] = v.rec_inv_sqrt;
0420     }
0421 }
0422 
0423 static void cobalt_vars_init(struct cobalt_vars *vars)
0424 {
0425     memset(vars, 0, sizeof(*vars));
0426 
0427     if (!cobalt_rec_inv_sqrt_cache[0]) {
0428         cobalt_cache_init();
0429         cobalt_rec_inv_sqrt_cache[0] = ~0;
0430     }
0431 }
0432 
0433 /* CoDel control_law is t + interval/sqrt(count)
0434  * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid
0435  * both sqrt() and divide operation.
0436  */
0437 static ktime_t cobalt_control(ktime_t t,
0438                   u64 interval,
0439                   u32 rec_inv_sqrt)
0440 {
0441     return ktime_add_ns(t, reciprocal_scale(interval,
0442                         rec_inv_sqrt));
0443 }
0444 
0445 /* Call this when a packet had to be dropped due to queue overflow.  Returns
0446  * true if the BLUE state was quiescent before but active after this call.
0447  */
0448 static bool cobalt_queue_full(struct cobalt_vars *vars,
0449                   struct cobalt_params *p,
0450                   ktime_t now)
0451 {
0452     bool up = false;
0453 
0454     if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
0455         up = !vars->p_drop;
0456         vars->p_drop += p->p_inc;
0457         if (vars->p_drop < p->p_inc)
0458             vars->p_drop = ~0;
0459         vars->blue_timer = now;
0460     }
0461     vars->dropping = true;
0462     vars->drop_next = now;
0463     if (!vars->count)
0464         vars->count = 1;
0465 
0466     return up;
0467 }
0468 
0469 /* Call this when the queue was serviced but turned out to be empty.  Returns
0470  * true if the BLUE state was active before but quiescent after this call.
0471  */
0472 static bool cobalt_queue_empty(struct cobalt_vars *vars,
0473                    struct cobalt_params *p,
0474                    ktime_t now)
0475 {
0476     bool down = false;
0477 
0478     if (vars->p_drop &&
0479         ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
0480         if (vars->p_drop < p->p_dec)
0481             vars->p_drop = 0;
0482         else
0483             vars->p_drop -= p->p_dec;
0484         vars->blue_timer = now;
0485         down = !vars->p_drop;
0486     }
0487     vars->dropping = false;
0488 
0489     if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
0490         vars->count--;
0491         cobalt_invsqrt(vars);
0492         vars->drop_next = cobalt_control(vars->drop_next,
0493                          p->interval,
0494                          vars->rec_inv_sqrt);
0495     }
0496 
0497     return down;
0498 }
0499 
0500 /* Call this with a freshly dequeued packet for possible congestion marking.
0501  * Returns true as an instruction to drop the packet, false for delivery.
0502  */
0503 static bool cobalt_should_drop(struct cobalt_vars *vars,
0504                    struct cobalt_params *p,
0505                    ktime_t now,
0506                    struct sk_buff *skb,
0507                    u32 bulk_flows)
0508 {
0509     bool next_due, over_target, drop = false;
0510     ktime_t schedule;
0511     u64 sojourn;
0512 
0513 /* The 'schedule' variable records, in its sign, whether 'now' is before or
0514  * after 'drop_next'.  This allows 'drop_next' to be updated before the next
0515  * scheduling decision is actually branched, without destroying that
0516  * information.  Similarly, the first 'schedule' value calculated is preserved
0517  * in the boolean 'next_due'.
0518  *
0519  * As for 'drop_next', we take advantage of the fact that 'interval' is both
0520  * the delay between first exceeding 'target' and the first signalling event,
0521  * *and* the scaling factor for the signalling frequency.  It's therefore very
0522  * natural to use a single mechanism for both purposes, and eliminates a
0523  * significant amount of reference Codel's spaghetti code.  To help with this,
0524  * both the '0' and '1' entries in the invsqrt cache are 0xFFFFFFFF, as close
0525  * as possible to 1.0 in fixed-point.
0526  */
0527 
0528     sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
0529     schedule = ktime_sub(now, vars->drop_next);
0530     over_target = sojourn > p->target &&
0531               sojourn > p->mtu_time * bulk_flows * 2 &&
0532               sojourn > p->mtu_time * 4;
0533     next_due = vars->count && ktime_to_ns(schedule) >= 0;
0534 
0535     vars->ecn_marked = false;
0536 
0537     if (over_target) {
0538         if (!vars->dropping) {
0539             vars->dropping = true;
0540             vars->drop_next = cobalt_control(now,
0541                              p->interval,
0542                              vars->rec_inv_sqrt);
0543         }
0544         if (!vars->count)
0545             vars->count = 1;
0546     } else if (vars->dropping) {
0547         vars->dropping = false;
0548     }
0549 
0550     if (next_due && vars->dropping) {
0551         /* Use ECN mark if possible, otherwise drop */
0552         drop = !(vars->ecn_marked = INET_ECN_set_ce(skb));
0553 
0554         vars->count++;
0555         if (!vars->count)
0556             vars->count--;
0557         cobalt_invsqrt(vars);
0558         vars->drop_next = cobalt_control(vars->drop_next,
0559                          p->interval,
0560                          vars->rec_inv_sqrt);
0561         schedule = ktime_sub(now, vars->drop_next);
0562     } else {
0563         while (next_due) {
0564             vars->count--;
0565             cobalt_invsqrt(vars);
0566             vars->drop_next = cobalt_control(vars->drop_next,
0567                              p->interval,
0568                              vars->rec_inv_sqrt);
0569             schedule = ktime_sub(now, vars->drop_next);
0570             next_due = vars->count && ktime_to_ns(schedule) >= 0;
0571         }
0572     }
0573 
0574     /* Simple BLUE implementation.  Lack of ECN is deliberate. */
0575     if (vars->p_drop)
0576         drop |= (prandom_u32() < vars->p_drop);
0577 
0578     /* Overload the drop_next field as an activity timeout */
0579     if (!vars->count)
0580         vars->drop_next = ktime_add_ns(now, p->interval);
0581     else if (ktime_to_ns(schedule) > 0 && !drop)
0582         vars->drop_next = now;
0583 
0584     return drop;
0585 }
0586 
0587 static bool cake_update_flowkeys(struct flow_keys *keys,
0588                  const struct sk_buff *skb)
0589 {
0590 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
0591     struct nf_conntrack_tuple tuple = {};
0592     bool rev = !skb->_nfct, upd = false;
0593     __be32 ip;
0594 
0595     if (skb_protocol(skb, true) != htons(ETH_P_IP))
0596         return false;
0597 
0598     if (!nf_ct_get_tuple_skb(&tuple, skb))
0599         return false;
0600 
0601     ip = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
0602     if (ip != keys->addrs.v4addrs.src) {
0603         keys->addrs.v4addrs.src = ip;
0604         upd = true;
0605     }
0606     ip = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
0607     if (ip != keys->addrs.v4addrs.dst) {
0608         keys->addrs.v4addrs.dst = ip;
0609         upd = true;
0610     }
0611 
0612     if (keys->ports.ports) {
0613         __be16 port;
0614 
0615         port = rev ? tuple.dst.u.all : tuple.src.u.all;
0616         if (port != keys->ports.src) {
0617             keys->ports.src = port;
0618             upd = true;
0619         }
0620         port = rev ? tuple.src.u.all : tuple.dst.u.all;
0621         if (port != keys->ports.dst) {
0622             port = keys->ports.dst;
0623             upd = true;
0624         }
0625     }
0626     return upd;
0627 #else
0628     return false;
0629 #endif
0630 }
0631 
0632 /* Cake has several subtle multiple bit settings. In these cases you
0633  *  would be matching triple isolate mode as well.
0634  */
0635 
0636 static bool cake_dsrc(int flow_mode)
0637 {
0638     return (flow_mode & CAKE_FLOW_DUAL_SRC) == CAKE_FLOW_DUAL_SRC;
0639 }
0640 
0641 static bool cake_ddst(int flow_mode)
0642 {
0643     return (flow_mode & CAKE_FLOW_DUAL_DST) == CAKE_FLOW_DUAL_DST;
0644 }
0645 
0646 static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
0647              int flow_mode, u16 flow_override, u16 host_override)
0648 {
0649     bool hash_flows = (!flow_override && !!(flow_mode & CAKE_FLOW_FLOWS));
0650     bool hash_hosts = (!host_override && !!(flow_mode & CAKE_FLOW_HOSTS));
0651     bool nat_enabled = !!(flow_mode & CAKE_FLOW_NAT_FLAG);
0652     u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0;
0653     u16 reduced_hash, srchost_idx, dsthost_idx;
0654     struct flow_keys keys, host_keys;
0655     bool use_skbhash = skb->l4_hash;
0656 
0657     if (unlikely(flow_mode == CAKE_FLOW_NONE))
0658         return 0;
0659 
0660     /* If both overrides are set, or we can use the SKB hash and nat mode is
0661      * disabled, we can skip packet dissection entirely. If nat mode is
0662      * enabled there's another check below after doing the conntrack lookup.
0663      */
0664     if ((!hash_flows || (use_skbhash && !nat_enabled)) && !hash_hosts)
0665         goto skip_hash;
0666 
0667     skb_flow_dissect_flow_keys(skb, &keys,
0668                    FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
0669 
0670     /* Don't use the SKB hash if we change the lookup keys from conntrack */
0671     if (nat_enabled && cake_update_flowkeys(&keys, skb))
0672         use_skbhash = false;
0673 
0674     /* If we can still use the SKB hash and don't need the host hash, we can
0675      * skip the rest of the hashing procedure
0676      */
0677     if (use_skbhash && !hash_hosts)
0678         goto skip_hash;
0679 
0680     /* flow_hash_from_keys() sorts the addresses by value, so we have
0681      * to preserve their order in a separate data structure to treat
0682      * src and dst host addresses as independently selectable.
0683      */
0684     host_keys = keys;
0685     host_keys.ports.ports     = 0;
0686     host_keys.basic.ip_proto  = 0;
0687     host_keys.keyid.keyid     = 0;
0688     host_keys.tags.flow_label = 0;
0689 
0690     switch (host_keys.control.addr_type) {
0691     case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
0692         host_keys.addrs.v4addrs.src = 0;
0693         dsthost_hash = flow_hash_from_keys(&host_keys);
0694         host_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
0695         host_keys.addrs.v4addrs.dst = 0;
0696         srchost_hash = flow_hash_from_keys(&host_keys);
0697         break;
0698 
0699     case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
0700         memset(&host_keys.addrs.v6addrs.src, 0,
0701                sizeof(host_keys.addrs.v6addrs.src));
0702         dsthost_hash = flow_hash_from_keys(&host_keys);
0703         host_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
0704         memset(&host_keys.addrs.v6addrs.dst, 0,
0705                sizeof(host_keys.addrs.v6addrs.dst));
0706         srchost_hash = flow_hash_from_keys(&host_keys);
0707         break;
0708 
0709     default:
0710         dsthost_hash = 0;
0711         srchost_hash = 0;
0712     }
0713 
0714     /* This *must* be after the above switch, since as a
0715      * side-effect it sorts the src and dst addresses.
0716      */
0717     if (hash_flows && !use_skbhash)
0718         flow_hash = flow_hash_from_keys(&keys);
0719 
0720 skip_hash:
0721     if (flow_override)
0722         flow_hash = flow_override - 1;
0723     else if (use_skbhash && (flow_mode & CAKE_FLOW_FLOWS))
0724         flow_hash = skb->hash;
0725     if (host_override) {
0726         dsthost_hash = host_override - 1;
0727         srchost_hash = host_override - 1;
0728     }
0729 
0730     if (!(flow_mode & CAKE_FLOW_FLOWS)) {
0731         if (flow_mode & CAKE_FLOW_SRC_IP)
0732             flow_hash ^= srchost_hash;
0733 
0734         if (flow_mode & CAKE_FLOW_DST_IP)
0735             flow_hash ^= dsthost_hash;
0736     }
0737 
0738     reduced_hash = flow_hash % CAKE_QUEUES;
0739 
0740     /* set-associative hashing */
0741     /* fast path if no hash collision (direct lookup succeeds) */
0742     if (likely(q->tags[reduced_hash] == flow_hash &&
0743            q->flows[reduced_hash].set)) {
0744         q->way_directs++;
0745     } else {
0746         u32 inner_hash = reduced_hash % CAKE_SET_WAYS;
0747         u32 outer_hash = reduced_hash - inner_hash;
0748         bool allocate_src = false;
0749         bool allocate_dst = false;
0750         u32 i, k;
0751 
0752         /* check if any active queue in the set is reserved for
0753          * this flow.
0754          */
0755         for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
0756              i++, k = (k + 1) % CAKE_SET_WAYS) {
0757             if (q->tags[outer_hash + k] == flow_hash) {
0758                 if (i)
0759                     q->way_hits++;
0760 
0761                 if (!q->flows[outer_hash + k].set) {
0762                     /* need to increment host refcnts */
0763                     allocate_src = cake_dsrc(flow_mode);
0764                     allocate_dst = cake_ddst(flow_mode);
0765                 }
0766 
0767                 goto found;
0768             }
0769         }
0770 
0771         /* no queue is reserved for this flow, look for an
0772          * empty one.
0773          */
0774         for (i = 0; i < CAKE_SET_WAYS;
0775              i++, k = (k + 1) % CAKE_SET_WAYS) {
0776             if (!q->flows[outer_hash + k].set) {
0777                 q->way_misses++;
0778                 allocate_src = cake_dsrc(flow_mode);
0779                 allocate_dst = cake_ddst(flow_mode);
0780                 goto found;
0781             }
0782         }
0783 
0784         /* With no empty queues, default to the original
0785          * queue, accept the collision, update the host tags.
0786          */
0787         q->way_collisions++;
0788         if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
0789             q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
0790             q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
0791         }
0792         allocate_src = cake_dsrc(flow_mode);
0793         allocate_dst = cake_ddst(flow_mode);
0794 found:
0795         /* reserve queue for future packets in same flow */
0796         reduced_hash = outer_hash + k;
0797         q->tags[reduced_hash] = flow_hash;
0798 
0799         if (allocate_src) {
0800             srchost_idx = srchost_hash % CAKE_QUEUES;
0801             inner_hash = srchost_idx % CAKE_SET_WAYS;
0802             outer_hash = srchost_idx - inner_hash;
0803             for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
0804                 i++, k = (k + 1) % CAKE_SET_WAYS) {
0805                 if (q->hosts[outer_hash + k].srchost_tag ==
0806                     srchost_hash)
0807                     goto found_src;
0808             }
0809             for (i = 0; i < CAKE_SET_WAYS;
0810                 i++, k = (k + 1) % CAKE_SET_WAYS) {
0811                 if (!q->hosts[outer_hash + k].srchost_bulk_flow_count)
0812                     break;
0813             }
0814             q->hosts[outer_hash + k].srchost_tag = srchost_hash;
0815 found_src:
0816             srchost_idx = outer_hash + k;
0817             if (q->flows[reduced_hash].set == CAKE_SET_BULK)
0818                 q->hosts[srchost_idx].srchost_bulk_flow_count++;
0819             q->flows[reduced_hash].srchost = srchost_idx;
0820         }
0821 
0822         if (allocate_dst) {
0823             dsthost_idx = dsthost_hash % CAKE_QUEUES;
0824             inner_hash = dsthost_idx % CAKE_SET_WAYS;
0825             outer_hash = dsthost_idx - inner_hash;
0826             for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
0827                  i++, k = (k + 1) % CAKE_SET_WAYS) {
0828                 if (q->hosts[outer_hash + k].dsthost_tag ==
0829                     dsthost_hash)
0830                     goto found_dst;
0831             }
0832             for (i = 0; i < CAKE_SET_WAYS;
0833                  i++, k = (k + 1) % CAKE_SET_WAYS) {
0834                 if (!q->hosts[outer_hash + k].dsthost_bulk_flow_count)
0835                     break;
0836             }
0837             q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
0838 found_dst:
0839             dsthost_idx = outer_hash + k;
0840             if (q->flows[reduced_hash].set == CAKE_SET_BULK)
0841                 q->hosts[dsthost_idx].dsthost_bulk_flow_count++;
0842             q->flows[reduced_hash].dsthost = dsthost_idx;
0843         }
0844     }
0845 
0846     return reduced_hash;
0847 }
0848 
0849 /* helper functions : might be changed when/if skb use a standard list_head */
0850 /* remove one skb from head of slot queue */
0851 
0852 static struct sk_buff *dequeue_head(struct cake_flow *flow)
0853 {
0854     struct sk_buff *skb = flow->head;
0855 
0856     if (skb) {
0857         flow->head = skb->next;
0858         skb_mark_not_on_list(skb);
0859     }
0860 
0861     return skb;
0862 }
0863 
0864 /* add skb to flow queue (tail add) */
0865 
0866 static void flow_queue_add(struct cake_flow *flow, struct sk_buff *skb)
0867 {
0868     if (!flow->head)
0869         flow->head = skb;
0870     else
0871         flow->tail->next = skb;
0872     flow->tail = skb;
0873     skb->next = NULL;
0874 }
0875 
0876 static struct iphdr *cake_get_iphdr(const struct sk_buff *skb,
0877                     struct ipv6hdr *buf)
0878 {
0879     unsigned int offset = skb_network_offset(skb);
0880     struct iphdr *iph;
0881 
0882     iph = skb_header_pointer(skb, offset, sizeof(struct iphdr), buf);
0883 
0884     if (!iph)
0885         return NULL;
0886 
0887     if (iph->version == 4 && iph->protocol == IPPROTO_IPV6)
0888         return skb_header_pointer(skb, offset + iph->ihl * 4,
0889                       sizeof(struct ipv6hdr), buf);
0890 
0891     else if (iph->version == 4)
0892         return iph;
0893 
0894     else if (iph->version == 6)
0895         return skb_header_pointer(skb, offset, sizeof(struct ipv6hdr),
0896                       buf);
0897 
0898     return NULL;
0899 }
0900 
0901 static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb,
0902                       void *buf, unsigned int bufsize)
0903 {
0904     unsigned int offset = skb_network_offset(skb);
0905     const struct ipv6hdr *ipv6h;
0906     const struct tcphdr *tcph;
0907     const struct iphdr *iph;
0908     struct ipv6hdr _ipv6h;
0909     struct tcphdr _tcph;
0910 
0911     ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
0912 
0913     if (!ipv6h)
0914         return NULL;
0915 
0916     if (ipv6h->version == 4) {
0917         iph = (struct iphdr *)ipv6h;
0918         offset += iph->ihl * 4;
0919 
0920         /* special-case 6in4 tunnelling, as that is a common way to get
0921          * v6 connectivity in the home
0922          */
0923         if (iph->protocol == IPPROTO_IPV6) {
0924             ipv6h = skb_header_pointer(skb, offset,
0925                            sizeof(_ipv6h), &_ipv6h);
0926 
0927             if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP)
0928                 return NULL;
0929 
0930             offset += sizeof(struct ipv6hdr);
0931 
0932         } else if (iph->protocol != IPPROTO_TCP) {
0933             return NULL;
0934         }
0935 
0936     } else if (ipv6h->version == 6) {
0937         if (ipv6h->nexthdr != IPPROTO_TCP)
0938             return NULL;
0939 
0940         offset += sizeof(struct ipv6hdr);
0941     } else {
0942         return NULL;
0943     }
0944 
0945     tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
0946     if (!tcph || tcph->doff < 5)
0947         return NULL;
0948 
0949     return skb_header_pointer(skb, offset,
0950                   min(__tcp_hdrlen(tcph), bufsize), buf);
0951 }
0952 
0953 static const void *cake_get_tcpopt(const struct tcphdr *tcph,
0954                    int code, int *oplen)
0955 {
0956     /* inspired by tcp_parse_options in tcp_input.c */
0957     int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr);
0958     const u8 *ptr = (const u8 *)(tcph + 1);
0959 
0960     while (length > 0) {
0961         int opcode = *ptr++;
0962         int opsize;
0963 
0964         if (opcode == TCPOPT_EOL)
0965             break;
0966         if (opcode == TCPOPT_NOP) {
0967             length--;
0968             continue;
0969         }
0970         if (length < 2)
0971             break;
0972         opsize = *ptr++;
0973         if (opsize < 2 || opsize > length)
0974             break;
0975 
0976         if (opcode == code) {
0977             *oplen = opsize;
0978             return ptr;
0979         }
0980 
0981         ptr += opsize - 2;
0982         length -= opsize;
0983     }
0984 
0985     return NULL;
0986 }
0987 
0988 /* Compare two SACK sequences. A sequence is considered greater if it SACKs more
0989  * bytes than the other. In the case where both sequences ACKs bytes that the
0990  * other doesn't, A is considered greater. DSACKs in A also makes A be
0991  * considered greater.
0992  *
0993  * @return -1, 0 or 1 as normal compare functions
0994  */
0995 static int cake_tcph_sack_compare(const struct tcphdr *tcph_a,
0996                   const struct tcphdr *tcph_b)
0997 {
0998     const struct tcp_sack_block_wire *sack_a, *sack_b;
0999     u32 ack_seq_a = ntohl(tcph_a->ack_seq);
1000     u32 bytes_a = 0, bytes_b = 0;
1001     int oplen_a, oplen_b;
1002     bool first = true;
1003 
1004     sack_a = cake_get_tcpopt(tcph_a, TCPOPT_SACK, &oplen_a);
1005     sack_b = cake_get_tcpopt(tcph_b, TCPOPT_SACK, &oplen_b);
1006 
1007     /* pointers point to option contents */
1008     oplen_a -= TCPOLEN_SACK_BASE;
1009     oplen_b -= TCPOLEN_SACK_BASE;
1010 
1011     if (sack_a && oplen_a >= sizeof(*sack_a) &&
1012         (!sack_b || oplen_b < sizeof(*sack_b)))
1013         return -1;
1014     else if (sack_b && oplen_b >= sizeof(*sack_b) &&
1015          (!sack_a || oplen_a < sizeof(*sack_a)))
1016         return 1;
1017     else if ((!sack_a || oplen_a < sizeof(*sack_a)) &&
1018          (!sack_b || oplen_b < sizeof(*sack_b)))
1019         return 0;
1020 
1021     while (oplen_a >= sizeof(*sack_a)) {
1022         const struct tcp_sack_block_wire *sack_tmp = sack_b;
1023         u32 start_a = get_unaligned_be32(&sack_a->start_seq);
1024         u32 end_a = get_unaligned_be32(&sack_a->end_seq);
1025         int oplen_tmp = oplen_b;
1026         bool found = false;
1027 
1028         /* DSACK; always considered greater to prevent dropping */
1029         if (before(start_a, ack_seq_a))
1030             return -1;
1031 
1032         bytes_a += end_a - start_a;
1033 
1034         while (oplen_tmp >= sizeof(*sack_tmp)) {
1035             u32 start_b = get_unaligned_be32(&sack_tmp->start_seq);
1036             u32 end_b = get_unaligned_be32(&sack_tmp->end_seq);
1037 
1038             /* first time through we count the total size */
1039             if (first)
1040                 bytes_b += end_b - start_b;
1041 
1042             if (!after(start_b, start_a) && !before(end_b, end_a)) {
1043                 found = true;
1044                 if (!first)
1045                     break;
1046             }
1047             oplen_tmp -= sizeof(*sack_tmp);
1048             sack_tmp++;
1049         }
1050 
1051         if (!found)
1052             return -1;
1053 
1054         oplen_a -= sizeof(*sack_a);
1055         sack_a++;
1056         first = false;
1057     }
1058 
1059     /* If we made it this far, all ranges SACKed by A are covered by B, so
1060      * either the SACKs are equal, or B SACKs more bytes.
1061      */
1062     return bytes_b > bytes_a ? 1 : 0;
1063 }
1064 
1065 static void cake_tcph_get_tstamp(const struct tcphdr *tcph,
1066                  u32 *tsval, u32 *tsecr)
1067 {
1068     const u8 *ptr;
1069     int opsize;
1070 
1071     ptr = cake_get_tcpopt(tcph, TCPOPT_TIMESTAMP, &opsize);
1072 
1073     if (ptr && opsize == TCPOLEN_TIMESTAMP) {
1074         *tsval = get_unaligned_be32(ptr);
1075         *tsecr = get_unaligned_be32(ptr + 4);
1076     }
1077 }
1078 
1079 static bool cake_tcph_may_drop(const struct tcphdr *tcph,
1080                    u32 tstamp_new, u32 tsecr_new)
1081 {
1082     /* inspired by tcp_parse_options in tcp_input.c */
1083     int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr);
1084     const u8 *ptr = (const u8 *)(tcph + 1);
1085     u32 tstamp, tsecr;
1086 
1087     /* 3 reserved flags must be unset to avoid future breakage
1088      * ACK must be set
1089      * ECE/CWR are handled separately
1090      * All other flags URG/PSH/RST/SYN/FIN must be unset
1091      * 0x0FFF0000 = all TCP flags (confirm ACK=1, others zero)
1092      * 0x00C00000 = CWR/ECE (handled separately)
1093      * 0x0F3F0000 = 0x0FFF0000 & ~0x00C00000
1094      */
1095     if (((tcp_flag_word(tcph) &
1096           cpu_to_be32(0x0F3F0000)) != TCP_FLAG_ACK))
1097         return false;
1098 
1099     while (length > 0) {
1100         int opcode = *ptr++;
1101         int opsize;
1102 
1103         if (opcode == TCPOPT_EOL)
1104             break;
1105         if (opcode == TCPOPT_NOP) {
1106             length--;
1107             continue;
1108         }
1109         if (length < 2)
1110             break;
1111         opsize = *ptr++;
1112         if (opsize < 2 || opsize > length)
1113             break;
1114 
1115         switch (opcode) {
1116         case TCPOPT_MD5SIG: /* doesn't influence state */
1117             break;
1118 
1119         case TCPOPT_SACK: /* stricter checking performed later */
1120             if (opsize % 8 != 2)
1121                 return false;
1122             break;
1123 
1124         case TCPOPT_TIMESTAMP:
1125             /* only drop timestamps lower than new */
1126             if (opsize != TCPOLEN_TIMESTAMP)
1127                 return false;
1128             tstamp = get_unaligned_be32(ptr);
1129             tsecr = get_unaligned_be32(ptr + 4);
1130             if (after(tstamp, tstamp_new) ||
1131                 after(tsecr, tsecr_new))
1132                 return false;
1133             break;
1134 
1135         case TCPOPT_MSS:  /* these should only be set on SYN */
1136         case TCPOPT_WINDOW:
1137         case TCPOPT_SACK_PERM:
1138         case TCPOPT_FASTOPEN:
1139         case TCPOPT_EXP:
1140         default: /* don't drop if any unknown options are present */
1141             return false;
1142         }
1143 
1144         ptr += opsize - 2;
1145         length -= opsize;
1146     }
1147 
1148     return true;
1149 }
1150 
1151 static struct sk_buff *cake_ack_filter(struct cake_sched_data *q,
1152                        struct cake_flow *flow)
1153 {
1154     bool aggressive = q->ack_filter == CAKE_ACK_AGGRESSIVE;
1155     struct sk_buff *elig_ack = NULL, *elig_ack_prev = NULL;
1156     struct sk_buff *skb_check, *skb_prev = NULL;
1157     const struct ipv6hdr *ipv6h, *ipv6h_check;
1158     unsigned char _tcph[64], _tcph_check[64];
1159     const struct tcphdr *tcph, *tcph_check;
1160     const struct iphdr *iph, *iph_check;
1161     struct ipv6hdr _iph, _iph_check;
1162     const struct sk_buff *skb;
1163     int seglen, num_found = 0;
1164     u32 tstamp = 0, tsecr = 0;
1165     __be32 elig_flags = 0;
1166     int sack_comp;
1167 
1168     /* no other possible ACKs to filter */
1169     if (flow->head == flow->tail)
1170         return NULL;
1171 
1172     skb = flow->tail;
1173     tcph = cake_get_tcphdr(skb, _tcph, sizeof(_tcph));
1174     iph = cake_get_iphdr(skb, &_iph);
1175     if (!tcph)
1176         return NULL;
1177 
1178     cake_tcph_get_tstamp(tcph, &tstamp, &tsecr);
1179 
1180     /* the 'triggering' packet need only have the ACK flag set.
1181      * also check that SYN is not set, as there won't be any previous ACKs.
1182      */
1183     if ((tcp_flag_word(tcph) &
1184          (TCP_FLAG_ACK | TCP_FLAG_SYN)) != TCP_FLAG_ACK)
1185         return NULL;
1186 
1187     /* the 'triggering' ACK is at the tail of the queue, we have already
1188      * returned if it is the only packet in the flow. loop through the rest
1189      * of the queue looking for pure ACKs with the same 5-tuple as the
1190      * triggering one.
1191      */
1192     for (skb_check = flow->head;
1193          skb_check && skb_check != skb;
1194          skb_prev = skb_check, skb_check = skb_check->next) {
1195         iph_check = cake_get_iphdr(skb_check, &_iph_check);
1196         tcph_check = cake_get_tcphdr(skb_check, &_tcph_check,
1197                          sizeof(_tcph_check));
1198 
1199         /* only TCP packets with matching 5-tuple are eligible, and only
1200          * drop safe headers
1201          */
1202         if (!tcph_check || iph->version != iph_check->version ||
1203             tcph_check->source != tcph->source ||
1204             tcph_check->dest != tcph->dest)
1205             continue;
1206 
1207         if (iph_check->version == 4) {
1208             if (iph_check->saddr != iph->saddr ||
1209                 iph_check->daddr != iph->daddr)
1210                 continue;
1211 
1212             seglen = ntohs(iph_check->tot_len) -
1213                        (4 * iph_check->ihl);
1214         } else if (iph_check->version == 6) {
1215             ipv6h = (struct ipv6hdr *)iph;
1216             ipv6h_check = (struct ipv6hdr *)iph_check;
1217 
1218             if (ipv6_addr_cmp(&ipv6h_check->saddr, &ipv6h->saddr) ||
1219                 ipv6_addr_cmp(&ipv6h_check->daddr, &ipv6h->daddr))
1220                 continue;
1221 
1222             seglen = ntohs(ipv6h_check->payload_len);
1223         } else {
1224             WARN_ON(1);  /* shouldn't happen */
1225             continue;
1226         }
1227 
1228         /* If the ECE/CWR flags changed from the previous eligible
1229          * packet in the same flow, we should no longer be dropping that
1230          * previous packet as this would lose information.
1231          */
1232         if (elig_ack && (tcp_flag_word(tcph_check) &
1233                  (TCP_FLAG_ECE | TCP_FLAG_CWR)) != elig_flags) {
1234             elig_ack = NULL;
1235             elig_ack_prev = NULL;
1236             num_found--;
1237         }
1238 
1239         /* Check TCP options and flags, don't drop ACKs with segment
1240          * data, and don't drop ACKs with a higher cumulative ACK
1241          * counter than the triggering packet. Check ACK seqno here to
1242          * avoid parsing SACK options of packets we are going to exclude
1243          * anyway.
1244          */
1245         if (!cake_tcph_may_drop(tcph_check, tstamp, tsecr) ||
1246             (seglen - __tcp_hdrlen(tcph_check)) != 0 ||
1247             after(ntohl(tcph_check->ack_seq), ntohl(tcph->ack_seq)))
1248             continue;
1249 
1250         /* Check SACK options. The triggering packet must SACK more data
1251          * than the ACK under consideration, or SACK the same range but
1252          * have a larger cumulative ACK counter. The latter is a
1253          * pathological case, but is contained in the following check
1254          * anyway, just to be safe.
1255          */
1256         sack_comp = cake_tcph_sack_compare(tcph_check, tcph);
1257 
1258         if (sack_comp < 0 ||
1259             (ntohl(tcph_check->ack_seq) == ntohl(tcph->ack_seq) &&
1260              sack_comp == 0))
1261             continue;
1262 
1263         /* At this point we have found an eligible pure ACK to drop; if
1264          * we are in aggressive mode, we are done. Otherwise, keep
1265          * searching unless this is the second eligible ACK we
1266          * found.
1267          *
1268          * Since we want to drop ACK closest to the head of the queue,
1269          * save the first eligible ACK we find, even if we need to loop
1270          * again.
1271          */
1272         if (!elig_ack) {
1273             elig_ack = skb_check;
1274             elig_ack_prev = skb_prev;
1275             elig_flags = (tcp_flag_word(tcph_check)
1276                       & (TCP_FLAG_ECE | TCP_FLAG_CWR));
1277         }
1278 
1279         if (num_found++ > 0)
1280             goto found;
1281     }
1282 
1283     /* We made it through the queue without finding two eligible ACKs . If
1284      * we found a single eligible ACK we can drop it in aggressive mode if
1285      * we can guarantee that this does not interfere with ECN flag
1286      * information. We ensure this by dropping it only if the enqueued
1287      * packet is consecutive with the eligible ACK, and their flags match.
1288      */
1289     if (elig_ack && aggressive && elig_ack->next == skb &&
1290         (elig_flags == (tcp_flag_word(tcph) &
1291                 (TCP_FLAG_ECE | TCP_FLAG_CWR))))
1292         goto found;
1293 
1294     return NULL;
1295 
1296 found:
1297     if (elig_ack_prev)
1298         elig_ack_prev->next = elig_ack->next;
1299     else
1300         flow->head = elig_ack->next;
1301 
1302     skb_mark_not_on_list(elig_ack);
1303 
1304     return elig_ack;
1305 }
1306 
1307 static u64 cake_ewma(u64 avg, u64 sample, u32 shift)
1308 {
1309     avg -= avg >> shift;
1310     avg += sample >> shift;
1311     return avg;
1312 }
1313 
1314 static u32 cake_calc_overhead(struct cake_sched_data *q, u32 len, u32 off)
1315 {
1316     if (q->rate_flags & CAKE_FLAG_OVERHEAD)
1317         len -= off;
1318 
1319     if (q->max_netlen < len)
1320         q->max_netlen = len;
1321     if (q->min_netlen > len)
1322         q->min_netlen = len;
1323 
1324     len += q->rate_overhead;
1325 
1326     if (len < q->rate_mpu)
1327         len = q->rate_mpu;
1328 
1329     if (q->atm_mode == CAKE_ATM_ATM) {
1330         len += 47;
1331         len /= 48;
1332         len *= 53;
1333     } else if (q->atm_mode == CAKE_ATM_PTM) {
1334         /* Add one byte per 64 bytes or part thereof.
1335          * This is conservative and easier to calculate than the
1336          * precise value.
1337          */
1338         len += (len + 63) / 64;
1339     }
1340 
1341     if (q->max_adjlen < len)
1342         q->max_adjlen = len;
1343     if (q->min_adjlen > len)
1344         q->min_adjlen = len;
1345 
1346     return len;
1347 }
1348 
1349 static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb)
1350 {
1351     const struct skb_shared_info *shinfo = skb_shinfo(skb);
1352     unsigned int hdr_len, last_len = 0;
1353     u32 off = skb_network_offset(skb);
1354     u32 len = qdisc_pkt_len(skb);
1355     u16 segs = 1;
1356 
1357     q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
1358 
1359     if (!shinfo->gso_size)
1360         return cake_calc_overhead(q, len, off);
1361 
1362     /* borrowed from qdisc_pkt_len_init() */
1363     hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
1364 
1365     /* + transport layer */
1366     if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
1367                         SKB_GSO_TCPV6))) {
1368         const struct tcphdr *th;
1369         struct tcphdr _tcphdr;
1370 
1371         th = skb_header_pointer(skb, skb_transport_offset(skb),
1372                     sizeof(_tcphdr), &_tcphdr);
1373         if (likely(th))
1374             hdr_len += __tcp_hdrlen(th);
1375     } else {
1376         struct udphdr _udphdr;
1377 
1378         if (skb_header_pointer(skb, skb_transport_offset(skb),
1379                        sizeof(_udphdr), &_udphdr))
1380             hdr_len += sizeof(struct udphdr);
1381     }
1382 
1383     if (unlikely(shinfo->gso_type & SKB_GSO_DODGY))
1384         segs = DIV_ROUND_UP(skb->len - hdr_len,
1385                     shinfo->gso_size);
1386     else
1387         segs = shinfo->gso_segs;
1388 
1389     len = shinfo->gso_size + hdr_len;
1390     last_len = skb->len - shinfo->gso_size * (segs - 1);
1391 
1392     return (cake_calc_overhead(q, len, off) * (segs - 1) +
1393         cake_calc_overhead(q, last_len, off));
1394 }
1395 
1396 static void cake_heap_swap(struct cake_sched_data *q, u16 i, u16 j)
1397 {
1398     struct cake_heap_entry ii = q->overflow_heap[i];
1399     struct cake_heap_entry jj = q->overflow_heap[j];
1400 
1401     q->overflow_heap[i] = jj;
1402     q->overflow_heap[j] = ii;
1403 
1404     q->tins[ii.t].overflow_idx[ii.b] = j;
1405     q->tins[jj.t].overflow_idx[jj.b] = i;
1406 }
1407 
1408 static u32 cake_heap_get_backlog(const struct cake_sched_data *q, u16 i)
1409 {
1410     struct cake_heap_entry ii = q->overflow_heap[i];
1411 
1412     return q->tins[ii.t].backlogs[ii.b];
1413 }
1414 
1415 static void cake_heapify(struct cake_sched_data *q, u16 i)
1416 {
1417     static const u32 a = CAKE_MAX_TINS * CAKE_QUEUES;
1418     u32 mb = cake_heap_get_backlog(q, i);
1419     u32 m = i;
1420 
1421     while (m < a) {
1422         u32 l = m + m + 1;
1423         u32 r = l + 1;
1424 
1425         if (l < a) {
1426             u32 lb = cake_heap_get_backlog(q, l);
1427 
1428             if (lb > mb) {
1429                 m  = l;
1430                 mb = lb;
1431             }
1432         }
1433 
1434         if (r < a) {
1435             u32 rb = cake_heap_get_backlog(q, r);
1436 
1437             if (rb > mb) {
1438                 m  = r;
1439                 mb = rb;
1440             }
1441         }
1442 
1443         if (m != i) {
1444             cake_heap_swap(q, i, m);
1445             i = m;
1446         } else {
1447             break;
1448         }
1449     }
1450 }
1451 
1452 static void cake_heapify_up(struct cake_sched_data *q, u16 i)
1453 {
1454     while (i > 0 && i < CAKE_MAX_TINS * CAKE_QUEUES) {
1455         u16 p = (i - 1) >> 1;
1456         u32 ib = cake_heap_get_backlog(q, i);
1457         u32 pb = cake_heap_get_backlog(q, p);
1458 
1459         if (ib > pb) {
1460             cake_heap_swap(q, i, p);
1461             i = p;
1462         } else {
1463             break;
1464         }
1465     }
1466 }
1467 
1468 static int cake_advance_shaper(struct cake_sched_data *q,
1469                    struct cake_tin_data *b,
1470                    struct sk_buff *skb,
1471                    ktime_t now, bool drop)
1472 {
1473     u32 len = get_cobalt_cb(skb)->adjusted_len;
1474 
1475     /* charge packet bandwidth to this tin
1476      * and to the global shaper.
1477      */
1478     if (q->rate_ns) {
1479         u64 tin_dur = (len * b->tin_rate_ns) >> b->tin_rate_shft;
1480         u64 global_dur = (len * q->rate_ns) >> q->rate_shft;
1481         u64 failsafe_dur = global_dur + (global_dur >> 1);
1482 
1483         if (ktime_before(b->time_next_packet, now))
1484             b->time_next_packet = ktime_add_ns(b->time_next_packet,
1485                                tin_dur);
1486 
1487         else if (ktime_before(b->time_next_packet,
1488                       ktime_add_ns(now, tin_dur)))
1489             b->time_next_packet = ktime_add_ns(now, tin_dur);
1490 
1491         q->time_next_packet = ktime_add_ns(q->time_next_packet,
1492                            global_dur);
1493         if (!drop)
1494             q->failsafe_next_packet = \
1495                 ktime_add_ns(q->failsafe_next_packet,
1496                          failsafe_dur);
1497     }
1498     return len;
1499 }
1500 
1501 static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
1502 {
1503     struct cake_sched_data *q = qdisc_priv(sch);
1504     ktime_t now = ktime_get();
1505     u32 idx = 0, tin = 0, len;
1506     struct cake_heap_entry qq;
1507     struct cake_tin_data *b;
1508     struct cake_flow *flow;
1509     struct sk_buff *skb;
1510 
1511     if (!q->overflow_timeout) {
1512         int i;
1513         /* Build fresh max-heap */
1514         for (i = CAKE_MAX_TINS * CAKE_QUEUES / 2; i >= 0; i--)
1515             cake_heapify(q, i);
1516     }
1517     q->overflow_timeout = 65535;
1518 
1519     /* select longest queue for pruning */
1520     qq  = q->overflow_heap[0];
1521     tin = qq.t;
1522     idx = qq.b;
1523 
1524     b = &q->tins[tin];
1525     flow = &b->flows[idx];
1526     skb = dequeue_head(flow);
1527     if (unlikely(!skb)) {
1528         /* heap has gone wrong, rebuild it next time */
1529         q->overflow_timeout = 0;
1530         return idx + (tin << 16);
1531     }
1532 
1533     if (cobalt_queue_full(&flow->cvars, &b->cparams, now))
1534         b->unresponsive_flow_count++;
1535 
1536     len = qdisc_pkt_len(skb);
1537     q->buffer_used      -= skb->truesize;
1538     b->backlogs[idx]    -= len;
1539     b->tin_backlog      -= len;
1540     sch->qstats.backlog -= len;
1541     qdisc_tree_reduce_backlog(sch, 1, len);
1542 
1543     flow->dropped++;
1544     b->tin_dropped++;
1545     sch->qstats.drops++;
1546 
1547     if (q->rate_flags & CAKE_FLAG_INGRESS)
1548         cake_advance_shaper(q, b, skb, now, true);
1549 
1550     __qdisc_drop(skb, to_free);
1551     sch->q.qlen--;
1552 
1553     cake_heapify(q, 0);
1554 
1555     return idx + (tin << 16);
1556 }
1557 
1558 static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash)
1559 {
1560     const int offset = skb_network_offset(skb);
1561     u16 *buf, buf_;
1562     u8 dscp;
1563 
1564     switch (skb_protocol(skb, true)) {
1565     case htons(ETH_P_IP):
1566         buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
1567         if (unlikely(!buf))
1568             return 0;
1569 
1570         /* ToS is in the second byte of iphdr */
1571         dscp = ipv4_get_dsfield((struct iphdr *)buf) >> 2;
1572 
1573         if (wash && dscp) {
1574             const int wlen = offset + sizeof(struct iphdr);
1575 
1576             if (!pskb_may_pull(skb, wlen) ||
1577                 skb_try_make_writable(skb, wlen))
1578                 return 0;
1579 
1580             ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
1581         }
1582 
1583         return dscp;
1584 
1585     case htons(ETH_P_IPV6):
1586         buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
1587         if (unlikely(!buf))
1588             return 0;
1589 
1590         /* Traffic class is in the first and second bytes of ipv6hdr */
1591         dscp = ipv6_get_dsfield((struct ipv6hdr *)buf) >> 2;
1592 
1593         if (wash && dscp) {
1594             const int wlen = offset + sizeof(struct ipv6hdr);
1595 
1596             if (!pskb_may_pull(skb, wlen) ||
1597                 skb_try_make_writable(skb, wlen))
1598                 return 0;
1599 
1600             ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
1601         }
1602 
1603         return dscp;
1604 
1605     case htons(ETH_P_ARP):
1606         return 0x38;  /* CS7 - Net Control */
1607 
1608     default:
1609         /* If there is no Diffserv field, treat as best-effort */
1610         return 0;
1611     }
1612 }
1613 
1614 static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
1615                          struct sk_buff *skb)
1616 {
1617     struct cake_sched_data *q = qdisc_priv(sch);
1618     u32 tin, mark;
1619     bool wash;
1620     u8 dscp;
1621 
1622     /* Tin selection: Default to diffserv-based selection, allow overriding
1623      * using firewall marks or skb->priority. Call DSCP parsing early if
1624      * wash is enabled, otherwise defer to below to skip unneeded parsing.
1625      */
1626     mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
1627     wash = !!(q->rate_flags & CAKE_FLAG_WASH);
1628     if (wash)
1629         dscp = cake_handle_diffserv(skb, wash);
1630 
1631     if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
1632         tin = 0;
1633 
1634     else if (mark && mark <= q->tin_cnt)
1635         tin = q->tin_order[mark - 1];
1636 
1637     else if (TC_H_MAJ(skb->priority) == sch->handle &&
1638          TC_H_MIN(skb->priority) > 0 &&
1639          TC_H_MIN(skb->priority) <= q->tin_cnt)
1640         tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
1641 
1642     else {
1643         if (!wash)
1644             dscp = cake_handle_diffserv(skb, wash);
1645         tin = q->tin_index[dscp];
1646 
1647         if (unlikely(tin >= q->tin_cnt))
1648             tin = 0;
1649     }
1650 
1651     return &q->tins[tin];
1652 }
1653 
1654 static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t,
1655              struct sk_buff *skb, int flow_mode, int *qerr)
1656 {
1657     struct cake_sched_data *q = qdisc_priv(sch);
1658     struct tcf_proto *filter;
1659     struct tcf_result res;
1660     u16 flow = 0, host = 0;
1661     int result;
1662 
1663     filter = rcu_dereference_bh(q->filter_list);
1664     if (!filter)
1665         goto hash;
1666 
1667     *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1668     result = tcf_classify(skb, NULL, filter, &res, false);
1669 
1670     if (result >= 0) {
1671 #ifdef CONFIG_NET_CLS_ACT
1672         switch (result) {
1673         case TC_ACT_STOLEN:
1674         case TC_ACT_QUEUED:
1675         case TC_ACT_TRAP:
1676             *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1677             fallthrough;
1678         case TC_ACT_SHOT:
1679             return 0;
1680         }
1681 #endif
1682         if (TC_H_MIN(res.classid) <= CAKE_QUEUES)
1683             flow = TC_H_MIN(res.classid);
1684         if (TC_H_MAJ(res.classid) <= (CAKE_QUEUES << 16))
1685             host = TC_H_MAJ(res.classid) >> 16;
1686     }
1687 hash:
1688     *t = cake_select_tin(sch, skb);
1689     return cake_hash(*t, skb, flow_mode, flow, host) + 1;
1690 }
1691 
1692 static void cake_reconfigure(struct Qdisc *sch);
1693 
1694 static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
1695             struct sk_buff **to_free)
1696 {
1697     struct cake_sched_data *q = qdisc_priv(sch);
1698     int len = qdisc_pkt_len(skb);
1699     int ret;
1700     struct sk_buff *ack = NULL;
1701     ktime_t now = ktime_get();
1702     struct cake_tin_data *b;
1703     struct cake_flow *flow;
1704     u32 idx;
1705 
1706     /* choose flow to insert into */
1707     idx = cake_classify(sch, &b, skb, q->flow_mode, &ret);
1708     if (idx == 0) {
1709         if (ret & __NET_XMIT_BYPASS)
1710             qdisc_qstats_drop(sch);
1711         __qdisc_drop(skb, to_free);
1712         return ret;
1713     }
1714     idx--;
1715     flow = &b->flows[idx];
1716 
1717     /* ensure shaper state isn't stale */
1718     if (!b->tin_backlog) {
1719         if (ktime_before(b->time_next_packet, now))
1720             b->time_next_packet = now;
1721 
1722         if (!sch->q.qlen) {
1723             if (ktime_before(q->time_next_packet, now)) {
1724                 q->failsafe_next_packet = now;
1725                 q->time_next_packet = now;
1726             } else if (ktime_after(q->time_next_packet, now) &&
1727                    ktime_after(q->failsafe_next_packet, now)) {
1728                 u64 next = \
1729                     min(ktime_to_ns(q->time_next_packet),
1730                         ktime_to_ns(
1731                            q->failsafe_next_packet));
1732                 sch->qstats.overlimits++;
1733                 qdisc_watchdog_schedule_ns(&q->watchdog, next);
1734             }
1735         }
1736     }
1737 
1738     if (unlikely(len > b->max_skblen))
1739         b->max_skblen = len;
1740 
1741     if (skb_is_gso(skb) && q->rate_flags & CAKE_FLAG_SPLIT_GSO) {
1742         struct sk_buff *segs, *nskb;
1743         netdev_features_t features = netif_skb_features(skb);
1744         unsigned int slen = 0, numsegs = 0;
1745 
1746         segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
1747         if (IS_ERR_OR_NULL(segs))
1748             return qdisc_drop(skb, sch, to_free);
1749 
1750         skb_list_walk_safe(segs, segs, nskb) {
1751             skb_mark_not_on_list(segs);
1752             qdisc_skb_cb(segs)->pkt_len = segs->len;
1753             cobalt_set_enqueue_time(segs, now);
1754             get_cobalt_cb(segs)->adjusted_len = cake_overhead(q,
1755                                       segs);
1756             flow_queue_add(flow, segs);
1757 
1758             sch->q.qlen++;
1759             numsegs++;
1760             slen += segs->len;
1761             q->buffer_used += segs->truesize;
1762             b->packets++;
1763         }
1764 
1765         /* stats */
1766         b->bytes        += slen;
1767         b->backlogs[idx]    += slen;
1768         b->tin_backlog      += slen;
1769         sch->qstats.backlog += slen;
1770         q->avg_window_bytes += slen;
1771 
1772         qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen);
1773         consume_skb(skb);
1774     } else {
1775         /* not splitting */
1776         cobalt_set_enqueue_time(skb, now);
1777         get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb);
1778         flow_queue_add(flow, skb);
1779 
1780         if (q->ack_filter)
1781             ack = cake_ack_filter(q, flow);
1782 
1783         if (ack) {
1784             b->ack_drops++;
1785             sch->qstats.drops++;
1786             b->bytes += qdisc_pkt_len(ack);
1787             len -= qdisc_pkt_len(ack);
1788             q->buffer_used += skb->truesize - ack->truesize;
1789             if (q->rate_flags & CAKE_FLAG_INGRESS)
1790                 cake_advance_shaper(q, b, ack, now, true);
1791 
1792             qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(ack));
1793             consume_skb(ack);
1794         } else {
1795             sch->q.qlen++;
1796             q->buffer_used      += skb->truesize;
1797         }
1798 
1799         /* stats */
1800         b->packets++;
1801         b->bytes        += len;
1802         b->backlogs[idx]    += len;
1803         b->tin_backlog      += len;
1804         sch->qstats.backlog += len;
1805         q->avg_window_bytes += len;
1806     }
1807 
1808     if (q->overflow_timeout)
1809         cake_heapify_up(q, b->overflow_idx[idx]);
1810 
1811     /* incoming bandwidth capacity estimate */
1812     if (q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS) {
1813         u64 packet_interval = \
1814             ktime_to_ns(ktime_sub(now, q->last_packet_time));
1815 
1816         if (packet_interval > NSEC_PER_SEC)
1817             packet_interval = NSEC_PER_SEC;
1818 
1819         /* filter out short-term bursts, eg. wifi aggregation */
1820         q->avg_packet_interval = \
1821             cake_ewma(q->avg_packet_interval,
1822                   packet_interval,
1823                   (packet_interval > q->avg_packet_interval ?
1824                       2 : 8));
1825 
1826         q->last_packet_time = now;
1827 
1828         if (packet_interval > q->avg_packet_interval) {
1829             u64 window_interval = \
1830                 ktime_to_ns(ktime_sub(now,
1831                               q->avg_window_begin));
1832             u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC;
1833 
1834             b = div64_u64(b, window_interval);
1835             q->avg_peak_bandwidth =
1836                 cake_ewma(q->avg_peak_bandwidth, b,
1837                       b > q->avg_peak_bandwidth ? 2 : 8);
1838             q->avg_window_bytes = 0;
1839             q->avg_window_begin = now;
1840 
1841             if (ktime_after(now,
1842                     ktime_add_ms(q->last_reconfig_time,
1843                              250))) {
1844                 q->rate_bps = (q->avg_peak_bandwidth * 15) >> 4;
1845                 cake_reconfigure(sch);
1846             }
1847         }
1848     } else {
1849         q->avg_window_bytes = 0;
1850         q->last_packet_time = now;
1851     }
1852 
1853     /* flowchain */
1854     if (!flow->set || flow->set == CAKE_SET_DECAYING) {
1855         struct cake_host *srchost = &b->hosts[flow->srchost];
1856         struct cake_host *dsthost = &b->hosts[flow->dsthost];
1857         u16 host_load = 1;
1858 
1859         if (!flow->set) {
1860             list_add_tail(&flow->flowchain, &b->new_flows);
1861         } else {
1862             b->decaying_flow_count--;
1863             list_move_tail(&flow->flowchain, &b->new_flows);
1864         }
1865         flow->set = CAKE_SET_SPARSE;
1866         b->sparse_flow_count++;
1867 
1868         if (cake_dsrc(q->flow_mode))
1869             host_load = max(host_load, srchost->srchost_bulk_flow_count);
1870 
1871         if (cake_ddst(q->flow_mode))
1872             host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
1873 
1874         flow->deficit = (b->flow_quantum *
1875                  quantum_div[host_load]) >> 16;
1876     } else if (flow->set == CAKE_SET_SPARSE_WAIT) {
1877         struct cake_host *srchost = &b->hosts[flow->srchost];
1878         struct cake_host *dsthost = &b->hosts[flow->dsthost];
1879 
1880         /* this flow was empty, accounted as a sparse flow, but actually
1881          * in the bulk rotation.
1882          */
1883         flow->set = CAKE_SET_BULK;
1884         b->sparse_flow_count--;
1885         b->bulk_flow_count++;
1886 
1887         if (cake_dsrc(q->flow_mode))
1888             srchost->srchost_bulk_flow_count++;
1889 
1890         if (cake_ddst(q->flow_mode))
1891             dsthost->dsthost_bulk_flow_count++;
1892 
1893     }
1894 
1895     if (q->buffer_used > q->buffer_max_used)
1896         q->buffer_max_used = q->buffer_used;
1897 
1898     if (q->buffer_used > q->buffer_limit) {
1899         u32 dropped = 0;
1900 
1901         while (q->buffer_used > q->buffer_limit) {
1902             dropped++;
1903             cake_drop(sch, to_free);
1904         }
1905         b->drop_overlimit += dropped;
1906     }
1907     return NET_XMIT_SUCCESS;
1908 }
1909 
1910 static struct sk_buff *cake_dequeue_one(struct Qdisc *sch)
1911 {
1912     struct cake_sched_data *q = qdisc_priv(sch);
1913     struct cake_tin_data *b = &q->tins[q->cur_tin];
1914     struct cake_flow *flow = &b->flows[q->cur_flow];
1915     struct sk_buff *skb = NULL;
1916     u32 len;
1917 
1918     if (flow->head) {
1919         skb = dequeue_head(flow);
1920         len = qdisc_pkt_len(skb);
1921         b->backlogs[q->cur_flow] -= len;
1922         b->tin_backlog       -= len;
1923         sch->qstats.backlog      -= len;
1924         q->buffer_used       -= skb->truesize;
1925         sch->q.qlen--;
1926 
1927         if (q->overflow_timeout)
1928             cake_heapify(q, b->overflow_idx[q->cur_flow]);
1929     }
1930     return skb;
1931 }
1932 
1933 /* Discard leftover packets from a tin no longer in use. */
1934 static void cake_clear_tin(struct Qdisc *sch, u16 tin)
1935 {
1936     struct cake_sched_data *q = qdisc_priv(sch);
1937     struct sk_buff *skb;
1938 
1939     q->cur_tin = tin;
1940     for (q->cur_flow = 0; q->cur_flow < CAKE_QUEUES; q->cur_flow++)
1941         while (!!(skb = cake_dequeue_one(sch)))
1942             kfree_skb(skb);
1943 }
1944 
1945 static struct sk_buff *cake_dequeue(struct Qdisc *sch)
1946 {
1947     struct cake_sched_data *q = qdisc_priv(sch);
1948     struct cake_tin_data *b = &q->tins[q->cur_tin];
1949     struct cake_host *srchost, *dsthost;
1950     ktime_t now = ktime_get();
1951     struct cake_flow *flow;
1952     struct list_head *head;
1953     bool first_flow = true;
1954     struct sk_buff *skb;
1955     u16 host_load;
1956     u64 delay;
1957     u32 len;
1958 
1959 begin:
1960     if (!sch->q.qlen)
1961         return NULL;
1962 
1963     /* global hard shaper */
1964     if (ktime_after(q->time_next_packet, now) &&
1965         ktime_after(q->failsafe_next_packet, now)) {
1966         u64 next = min(ktime_to_ns(q->time_next_packet),
1967                    ktime_to_ns(q->failsafe_next_packet));
1968 
1969         sch->qstats.overlimits++;
1970         qdisc_watchdog_schedule_ns(&q->watchdog, next);
1971         return NULL;
1972     }
1973 
1974     /* Choose a class to work on. */
1975     if (!q->rate_ns) {
1976         /* In unlimited mode, can't rely on shaper timings, just balance
1977          * with DRR
1978          */
1979         bool wrapped = false, empty = true;
1980 
1981         while (b->tin_deficit < 0 ||
1982                !(b->sparse_flow_count + b->bulk_flow_count)) {
1983             if (b->tin_deficit <= 0)
1984                 b->tin_deficit += b->tin_quantum;
1985             if (b->sparse_flow_count + b->bulk_flow_count)
1986                 empty = false;
1987 
1988             q->cur_tin++;
1989             b++;
1990             if (q->cur_tin >= q->tin_cnt) {
1991                 q->cur_tin = 0;
1992                 b = q->tins;
1993 
1994                 if (wrapped) {
1995                     /* It's possible for q->qlen to be
1996                      * nonzero when we actually have no
1997                      * packets anywhere.
1998                      */
1999                     if (empty)
2000                         return NULL;
2001                 } else {
2002                     wrapped = true;
2003                 }
2004             }
2005         }
2006     } else {
2007         /* In shaped mode, choose:
2008          * - Highest-priority tin with queue and meeting schedule, or
2009          * - The earliest-scheduled tin with queue.
2010          */
2011         ktime_t best_time = KTIME_MAX;
2012         int tin, best_tin = 0;
2013 
2014         for (tin = 0; tin < q->tin_cnt; tin++) {
2015             b = q->tins + tin;
2016             if ((b->sparse_flow_count + b->bulk_flow_count) > 0) {
2017                 ktime_t time_to_pkt = \
2018                     ktime_sub(b->time_next_packet, now);
2019 
2020                 if (ktime_to_ns(time_to_pkt) <= 0 ||
2021                     ktime_compare(time_to_pkt,
2022                           best_time) <= 0) {
2023                     best_time = time_to_pkt;
2024                     best_tin = tin;
2025                 }
2026             }
2027         }
2028 
2029         q->cur_tin = best_tin;
2030         b = q->tins + best_tin;
2031 
2032         /* No point in going further if no packets to deliver. */
2033         if (unlikely(!(b->sparse_flow_count + b->bulk_flow_count)))
2034             return NULL;
2035     }
2036 
2037 retry:
2038     /* service this class */
2039     head = &b->decaying_flows;
2040     if (!first_flow || list_empty(head)) {
2041         head = &b->new_flows;
2042         if (list_empty(head)) {
2043             head = &b->old_flows;
2044             if (unlikely(list_empty(head))) {
2045                 head = &b->decaying_flows;
2046                 if (unlikely(list_empty(head)))
2047                     goto begin;
2048             }
2049         }
2050     }
2051     flow = list_first_entry(head, struct cake_flow, flowchain);
2052     q->cur_flow = flow - b->flows;
2053     first_flow = false;
2054 
2055     /* triple isolation (modified DRR++) */
2056     srchost = &b->hosts[flow->srchost];
2057     dsthost = &b->hosts[flow->dsthost];
2058     host_load = 1;
2059 
2060     /* flow isolation (DRR++) */
2061     if (flow->deficit <= 0) {
2062         /* Keep all flows with deficits out of the sparse and decaying
2063          * rotations.  No non-empty flow can go into the decaying
2064          * rotation, so they can't get deficits
2065          */
2066         if (flow->set == CAKE_SET_SPARSE) {
2067             if (flow->head) {
2068                 b->sparse_flow_count--;
2069                 b->bulk_flow_count++;
2070 
2071                 if (cake_dsrc(q->flow_mode))
2072                     srchost->srchost_bulk_flow_count++;
2073 
2074                 if (cake_ddst(q->flow_mode))
2075                     dsthost->dsthost_bulk_flow_count++;
2076 
2077                 flow->set = CAKE_SET_BULK;
2078             } else {
2079                 /* we've moved it to the bulk rotation for
2080                  * correct deficit accounting but we still want
2081                  * to count it as a sparse flow, not a bulk one.
2082                  */
2083                 flow->set = CAKE_SET_SPARSE_WAIT;
2084             }
2085         }
2086 
2087         if (cake_dsrc(q->flow_mode))
2088             host_load = max(host_load, srchost->srchost_bulk_flow_count);
2089 
2090         if (cake_ddst(q->flow_mode))
2091             host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
2092 
2093         WARN_ON(host_load > CAKE_QUEUES);
2094 
2095         /* The shifted prandom_u32() is a way to apply dithering to
2096          * avoid accumulating roundoff errors
2097          */
2098         flow->deficit += (b->flow_quantum * quantum_div[host_load] +
2099                   (prandom_u32() >> 16)) >> 16;
2100         list_move_tail(&flow->flowchain, &b->old_flows);
2101 
2102         goto retry;
2103     }
2104 
2105     /* Retrieve a packet via the AQM */
2106     while (1) {
2107         skb = cake_dequeue_one(sch);
2108         if (!skb) {
2109             /* this queue was actually empty */
2110             if (cobalt_queue_empty(&flow->cvars, &b->cparams, now))
2111                 b->unresponsive_flow_count--;
2112 
2113             if (flow->cvars.p_drop || flow->cvars.count ||
2114                 ktime_before(now, flow->cvars.drop_next)) {
2115                 /* keep in the flowchain until the state has
2116                  * decayed to rest
2117                  */
2118                 list_move_tail(&flow->flowchain,
2119                            &b->decaying_flows);
2120                 if (flow->set == CAKE_SET_BULK) {
2121                     b->bulk_flow_count--;
2122 
2123                     if (cake_dsrc(q->flow_mode))
2124                         srchost->srchost_bulk_flow_count--;
2125 
2126                     if (cake_ddst(q->flow_mode))
2127                         dsthost->dsthost_bulk_flow_count--;
2128 
2129                     b->decaying_flow_count++;
2130                 } else if (flow->set == CAKE_SET_SPARSE ||
2131                        flow->set == CAKE_SET_SPARSE_WAIT) {
2132                     b->sparse_flow_count--;
2133                     b->decaying_flow_count++;
2134                 }
2135                 flow->set = CAKE_SET_DECAYING;
2136             } else {
2137                 /* remove empty queue from the flowchain */
2138                 list_del_init(&flow->flowchain);
2139                 if (flow->set == CAKE_SET_SPARSE ||
2140                     flow->set == CAKE_SET_SPARSE_WAIT)
2141                     b->sparse_flow_count--;
2142                 else if (flow->set == CAKE_SET_BULK) {
2143                     b->bulk_flow_count--;
2144 
2145                     if (cake_dsrc(q->flow_mode))
2146                         srchost->srchost_bulk_flow_count--;
2147 
2148                     if (cake_ddst(q->flow_mode))
2149                         dsthost->dsthost_bulk_flow_count--;
2150 
2151                 } else
2152                     b->decaying_flow_count--;
2153 
2154                 flow->set = CAKE_SET_NONE;
2155             }
2156             goto begin;
2157         }
2158 
2159         /* Last packet in queue may be marked, shouldn't be dropped */
2160         if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb,
2161                     (b->bulk_flow_count *
2162                      !!(q->rate_flags &
2163                         CAKE_FLAG_INGRESS))) ||
2164             !flow->head)
2165             break;
2166 
2167         /* drop this packet, get another one */
2168         if (q->rate_flags & CAKE_FLAG_INGRESS) {
2169             len = cake_advance_shaper(q, b, skb,
2170                           now, true);
2171             flow->deficit -= len;
2172             b->tin_deficit -= len;
2173         }
2174         flow->dropped++;
2175         b->tin_dropped++;
2176         qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
2177         qdisc_qstats_drop(sch);
2178         kfree_skb(skb);
2179         if (q->rate_flags & CAKE_FLAG_INGRESS)
2180             goto retry;
2181     }
2182 
2183     b->tin_ecn_mark += !!flow->cvars.ecn_marked;
2184     qdisc_bstats_update(sch, skb);
2185 
2186     /* collect delay stats */
2187     delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
2188     b->avge_delay = cake_ewma(b->avge_delay, delay, 8);
2189     b->peak_delay = cake_ewma(b->peak_delay, delay,
2190                   delay > b->peak_delay ? 2 : 8);
2191     b->base_delay = cake_ewma(b->base_delay, delay,
2192                   delay < b->base_delay ? 2 : 8);
2193 
2194     len = cake_advance_shaper(q, b, skb, now, false);
2195     flow->deficit -= len;
2196     b->tin_deficit -= len;
2197 
2198     if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
2199         u64 next = min(ktime_to_ns(q->time_next_packet),
2200                    ktime_to_ns(q->failsafe_next_packet));
2201 
2202         qdisc_watchdog_schedule_ns(&q->watchdog, next);
2203     } else if (!sch->q.qlen) {
2204         int i;
2205 
2206         for (i = 0; i < q->tin_cnt; i++) {
2207             if (q->tins[i].decaying_flow_count) {
2208                 ktime_t next = \
2209                     ktime_add_ns(now,
2210                              q->tins[i].cparams.target);
2211 
2212                 qdisc_watchdog_schedule_ns(&q->watchdog,
2213                                ktime_to_ns(next));
2214                 break;
2215             }
2216         }
2217     }
2218 
2219     if (q->overflow_timeout)
2220         q->overflow_timeout--;
2221 
2222     return skb;
2223 }
2224 
2225 static void cake_reset(struct Qdisc *sch)
2226 {
2227     u32 c;
2228 
2229     for (c = 0; c < CAKE_MAX_TINS; c++)
2230         cake_clear_tin(sch, c);
2231 }
2232 
2233 static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = {
2234     [TCA_CAKE_BASE_RATE64]   = { .type = NLA_U64 },
2235     [TCA_CAKE_DIFFSERV_MODE] = { .type = NLA_U32 },
2236     [TCA_CAKE_ATM]       = { .type = NLA_U32 },
2237     [TCA_CAKE_FLOW_MODE]     = { .type = NLA_U32 },
2238     [TCA_CAKE_OVERHEAD]      = { .type = NLA_S32 },
2239     [TCA_CAKE_RTT]       = { .type = NLA_U32 },
2240     [TCA_CAKE_TARGET]    = { .type = NLA_U32 },
2241     [TCA_CAKE_AUTORATE]      = { .type = NLA_U32 },
2242     [TCA_CAKE_MEMORY]    = { .type = NLA_U32 },
2243     [TCA_CAKE_NAT]       = { .type = NLA_U32 },
2244     [TCA_CAKE_RAW]       = { .type = NLA_U32 },
2245     [TCA_CAKE_WASH]      = { .type = NLA_U32 },
2246     [TCA_CAKE_MPU]       = { .type = NLA_U32 },
2247     [TCA_CAKE_INGRESS]   = { .type = NLA_U32 },
2248     [TCA_CAKE_ACK_FILTER]    = { .type = NLA_U32 },
2249     [TCA_CAKE_SPLIT_GSO]     = { .type = NLA_U32 },
2250     [TCA_CAKE_FWMARK]    = { .type = NLA_U32 },
2251 };
2252 
2253 static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
2254               u64 target_ns, u64 rtt_est_ns)
2255 {
2256     /* convert byte-rate into time-per-byte
2257      * so it will always unwedge in reasonable time.
2258      */
2259     static const u64 MIN_RATE = 64;
2260     u32 byte_target = mtu;
2261     u64 byte_target_ns;
2262     u8  rate_shft = 0;
2263     u64 rate_ns = 0;
2264 
2265     b->flow_quantum = 1514;
2266     if (rate) {
2267         b->flow_quantum = max(min(rate >> 12, 1514ULL), 300ULL);
2268         rate_shft = 34;
2269         rate_ns = ((u64)NSEC_PER_SEC) << rate_shft;
2270         rate_ns = div64_u64(rate_ns, max(MIN_RATE, rate));
2271         while (!!(rate_ns >> 34)) {
2272             rate_ns >>= 1;
2273             rate_shft--;
2274         }
2275     } /* else unlimited, ie. zero delay */
2276 
2277     b->tin_rate_bps  = rate;
2278     b->tin_rate_ns   = rate_ns;
2279     b->tin_rate_shft = rate_shft;
2280 
2281     byte_target_ns = (byte_target * rate_ns) >> rate_shft;
2282 
2283     b->cparams.target = max((byte_target_ns * 3) / 2, target_ns);
2284     b->cparams.interval = max(rtt_est_ns +
2285                      b->cparams.target - target_ns,
2286                      b->cparams.target * 2);
2287     b->cparams.mtu_time = byte_target_ns;
2288     b->cparams.p_inc = 1 << 24; /* 1/256 */
2289     b->cparams.p_dec = 1 << 20; /* 1/4096 */
2290 }
2291 
2292 static int cake_config_besteffort(struct Qdisc *sch)
2293 {
2294     struct cake_sched_data *q = qdisc_priv(sch);
2295     struct cake_tin_data *b = &q->tins[0];
2296     u32 mtu = psched_mtu(qdisc_dev(sch));
2297     u64 rate = q->rate_bps;
2298 
2299     q->tin_cnt = 1;
2300 
2301     q->tin_index = besteffort;
2302     q->tin_order = normal_order;
2303 
2304     cake_set_rate(b, rate, mtu,
2305               us_to_ns(q->target), us_to_ns(q->interval));
2306     b->tin_quantum = 65535;
2307 
2308     return 0;
2309 }
2310 
2311 static int cake_config_precedence(struct Qdisc *sch)
2312 {
2313     /* convert high-level (user visible) parameters into internal format */
2314     struct cake_sched_data *q = qdisc_priv(sch);
2315     u32 mtu = psched_mtu(qdisc_dev(sch));
2316     u64 rate = q->rate_bps;
2317     u32 quantum = 256;
2318     u32 i;
2319 
2320     q->tin_cnt = 8;
2321     q->tin_index = precedence;
2322     q->tin_order = normal_order;
2323 
2324     for (i = 0; i < q->tin_cnt; i++) {
2325         struct cake_tin_data *b = &q->tins[i];
2326 
2327         cake_set_rate(b, rate, mtu, us_to_ns(q->target),
2328                   us_to_ns(q->interval));
2329 
2330         b->tin_quantum = max_t(u16, 1U, quantum);
2331 
2332         /* calculate next class's parameters */
2333         rate  *= 7;
2334         rate >>= 3;
2335 
2336         quantum  *= 7;
2337         quantum >>= 3;
2338     }
2339 
2340     return 0;
2341 }
2342 
2343 /*  List of known Diffserv codepoints:
2344  *
2345  *  Default Forwarding (DF/CS0) - Best Effort
2346  *  Max Throughput (TOS2)
2347  *  Min Delay (TOS4)
2348  *  LLT "La" (TOS5)
2349  *  Assured Forwarding 1 (AF1x) - x3
2350  *  Assured Forwarding 2 (AF2x) - x3
2351  *  Assured Forwarding 3 (AF3x) - x3
2352  *  Assured Forwarding 4 (AF4x) - x3
2353  *  Precedence Class 1 (CS1)
2354  *  Precedence Class 2 (CS2)
2355  *  Precedence Class 3 (CS3)
2356  *  Precedence Class 4 (CS4)
2357  *  Precedence Class 5 (CS5)
2358  *  Precedence Class 6 (CS6)
2359  *  Precedence Class 7 (CS7)
2360  *  Voice Admit (VA)
2361  *  Expedited Forwarding (EF)
2362  *  Lower Effort (LE)
2363  *
2364  *  Total 26 codepoints.
2365  */
2366 
2367 /*  List of traffic classes in RFC 4594, updated by RFC 8622:
2368  *      (roughly descending order of contended priority)
2369  *      (roughly ascending order of uncontended throughput)
2370  *
2371  *  Network Control (CS6,CS7)      - routing traffic
2372  *  Telephony (EF,VA)         - aka. VoIP streams
2373  *  Signalling (CS5)               - VoIP setup
2374  *  Multimedia Conferencing (AF4x) - aka. video calls
2375  *  Realtime Interactive (CS4)     - eg. games
2376  *  Multimedia Streaming (AF3x)    - eg. YouTube, NetFlix, Twitch
2377  *  Broadcast Video (CS3)
2378  *  Low-Latency Data (AF2x,TOS4)      - eg. database
2379  *  Ops, Admin, Management (CS2)      - eg. ssh
2380  *  Standard Service (DF & unrecognised codepoints)
2381  *  High-Throughput Data (AF1x,TOS2)  - eg. web traffic
2382  *  Low-Priority Data (LE,CS1)        - eg. BitTorrent
2383  *
2384  *  Total 12 traffic classes.
2385  */
2386 
2387 static int cake_config_diffserv8(struct Qdisc *sch)
2388 {
2389 /*  Pruned list of traffic classes for typical applications:
2390  *
2391  *      Network Control          (CS6, CS7)
2392  *      Minimum Latency          (EF, VA, CS5, CS4)
2393  *      Interactive Shell        (CS2)
2394  *      Low Latency Transactions (AF2x, TOS4)
2395  *      Video Streaming          (AF4x, AF3x, CS3)
2396  *      Bog Standard             (DF etc.)
2397  *      High Throughput          (AF1x, TOS2, CS1)
2398  *      Background Traffic       (LE)
2399  *
2400  *      Total 8 traffic classes.
2401  */
2402 
2403     struct cake_sched_data *q = qdisc_priv(sch);
2404     u32 mtu = psched_mtu(qdisc_dev(sch));
2405     u64 rate = q->rate_bps;
2406     u32 quantum = 256;
2407     u32 i;
2408 
2409     q->tin_cnt = 8;
2410 
2411     /* codepoint to class mapping */
2412     q->tin_index = diffserv8;
2413     q->tin_order = normal_order;
2414 
2415     /* class characteristics */
2416     for (i = 0; i < q->tin_cnt; i++) {
2417         struct cake_tin_data *b = &q->tins[i];
2418 
2419         cake_set_rate(b, rate, mtu, us_to_ns(q->target),
2420                   us_to_ns(q->interval));
2421 
2422         b->tin_quantum = max_t(u16, 1U, quantum);
2423 
2424         /* calculate next class's parameters */
2425         rate  *= 7;
2426         rate >>= 3;
2427 
2428         quantum  *= 7;
2429         quantum >>= 3;
2430     }
2431 
2432     return 0;
2433 }
2434 
2435 static int cake_config_diffserv4(struct Qdisc *sch)
2436 {
2437 /*  Further pruned list of traffic classes for four-class system:
2438  *
2439  *      Latency Sensitive  (CS7, CS6, EF, VA, CS5, CS4)
2440  *      Streaming Media    (AF4x, AF3x, CS3, AF2x, TOS4, CS2)
2441  *      Best Effort        (DF, AF1x, TOS2, and those not specified)
2442  *      Background Traffic (LE, CS1)
2443  *
2444  *      Total 4 traffic classes.
2445  */
2446 
2447     struct cake_sched_data *q = qdisc_priv(sch);
2448     u32 mtu = psched_mtu(qdisc_dev(sch));
2449     u64 rate = q->rate_bps;
2450     u32 quantum = 1024;
2451 
2452     q->tin_cnt = 4;
2453 
2454     /* codepoint to class mapping */
2455     q->tin_index = diffserv4;
2456     q->tin_order = bulk_order;
2457 
2458     /* class characteristics */
2459     cake_set_rate(&q->tins[0], rate, mtu,
2460               us_to_ns(q->target), us_to_ns(q->interval));
2461     cake_set_rate(&q->tins[1], rate >> 4, mtu,
2462               us_to_ns(q->target), us_to_ns(q->interval));
2463     cake_set_rate(&q->tins[2], rate >> 1, mtu,
2464               us_to_ns(q->target), us_to_ns(q->interval));
2465     cake_set_rate(&q->tins[3], rate >> 2, mtu,
2466               us_to_ns(q->target), us_to_ns(q->interval));
2467 
2468     /* bandwidth-sharing weights */
2469     q->tins[0].tin_quantum = quantum;
2470     q->tins[1].tin_quantum = quantum >> 4;
2471     q->tins[2].tin_quantum = quantum >> 1;
2472     q->tins[3].tin_quantum = quantum >> 2;
2473 
2474     return 0;
2475 }
2476 
2477 static int cake_config_diffserv3(struct Qdisc *sch)
2478 {
2479 /*  Simplified Diffserv structure with 3 tins.
2480  *      Latency Sensitive   (CS7, CS6, EF, VA, TOS4)
2481  *      Best Effort
2482  *      Low Priority        (LE, CS1)
2483  */
2484     struct cake_sched_data *q = qdisc_priv(sch);
2485     u32 mtu = psched_mtu(qdisc_dev(sch));
2486     u64 rate = q->rate_bps;
2487     u32 quantum = 1024;
2488 
2489     q->tin_cnt = 3;
2490 
2491     /* codepoint to class mapping */
2492     q->tin_index = diffserv3;
2493     q->tin_order = bulk_order;
2494 
2495     /* class characteristics */
2496     cake_set_rate(&q->tins[0], rate, mtu,
2497               us_to_ns(q->target), us_to_ns(q->interval));
2498     cake_set_rate(&q->tins[1], rate >> 4, mtu,
2499               us_to_ns(q->target), us_to_ns(q->interval));
2500     cake_set_rate(&q->tins[2], rate >> 2, mtu,
2501               us_to_ns(q->target), us_to_ns(q->interval));
2502 
2503     /* bandwidth-sharing weights */
2504     q->tins[0].tin_quantum = quantum;
2505     q->tins[1].tin_quantum = quantum >> 4;
2506     q->tins[2].tin_quantum = quantum >> 2;
2507 
2508     return 0;
2509 }
2510 
2511 static void cake_reconfigure(struct Qdisc *sch)
2512 {
2513     struct cake_sched_data *q = qdisc_priv(sch);
2514     int c, ft;
2515 
2516     switch (q->tin_mode) {
2517     case CAKE_DIFFSERV_BESTEFFORT:
2518         ft = cake_config_besteffort(sch);
2519         break;
2520 
2521     case CAKE_DIFFSERV_PRECEDENCE:
2522         ft = cake_config_precedence(sch);
2523         break;
2524 
2525     case CAKE_DIFFSERV_DIFFSERV8:
2526         ft = cake_config_diffserv8(sch);
2527         break;
2528 
2529     case CAKE_DIFFSERV_DIFFSERV4:
2530         ft = cake_config_diffserv4(sch);
2531         break;
2532 
2533     case CAKE_DIFFSERV_DIFFSERV3:
2534     default:
2535         ft = cake_config_diffserv3(sch);
2536         break;
2537     }
2538 
2539     for (c = q->tin_cnt; c < CAKE_MAX_TINS; c++) {
2540         cake_clear_tin(sch, c);
2541         q->tins[c].cparams.mtu_time = q->tins[ft].cparams.mtu_time;
2542     }
2543 
2544     q->rate_ns   = q->tins[ft].tin_rate_ns;
2545     q->rate_shft = q->tins[ft].tin_rate_shft;
2546 
2547     if (q->buffer_config_limit) {
2548         q->buffer_limit = q->buffer_config_limit;
2549     } else if (q->rate_bps) {
2550         u64 t = q->rate_bps * q->interval;
2551 
2552         do_div(t, USEC_PER_SEC / 4);
2553         q->buffer_limit = max_t(u32, t, 4U << 20);
2554     } else {
2555         q->buffer_limit = ~0;
2556     }
2557 
2558     sch->flags &= ~TCQ_F_CAN_BYPASS;
2559 
2560     q->buffer_limit = min(q->buffer_limit,
2561                   max(sch->limit * psched_mtu(qdisc_dev(sch)),
2562                   q->buffer_config_limit));
2563 }
2564 
2565 static int cake_change(struct Qdisc *sch, struct nlattr *opt,
2566                struct netlink_ext_ack *extack)
2567 {
2568     struct cake_sched_data *q = qdisc_priv(sch);
2569     struct nlattr *tb[TCA_CAKE_MAX + 1];
2570     int err;
2571 
2572     if (!opt)
2573         return -EINVAL;
2574 
2575     err = nla_parse_nested_deprecated(tb, TCA_CAKE_MAX, opt, cake_policy,
2576                       extack);
2577     if (err < 0)
2578         return err;
2579 
2580     if (tb[TCA_CAKE_NAT]) {
2581 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
2582         q->flow_mode &= ~CAKE_FLOW_NAT_FLAG;
2583         q->flow_mode |= CAKE_FLOW_NAT_FLAG *
2584             !!nla_get_u32(tb[TCA_CAKE_NAT]);
2585 #else
2586         NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CAKE_NAT],
2587                     "No conntrack support in kernel");
2588         return -EOPNOTSUPP;
2589 #endif
2590     }
2591 
2592     if (tb[TCA_CAKE_BASE_RATE64])
2593         q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]);
2594 
2595     if (tb[TCA_CAKE_DIFFSERV_MODE])
2596         q->tin_mode = nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]);
2597 
2598     if (tb[TCA_CAKE_WASH]) {
2599         if (!!nla_get_u32(tb[TCA_CAKE_WASH]))
2600             q->rate_flags |= CAKE_FLAG_WASH;
2601         else
2602             q->rate_flags &= ~CAKE_FLAG_WASH;
2603     }
2604 
2605     if (tb[TCA_CAKE_FLOW_MODE])
2606         q->flow_mode = ((q->flow_mode & CAKE_FLOW_NAT_FLAG) |
2607                 (nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) &
2608                     CAKE_FLOW_MASK));
2609 
2610     if (tb[TCA_CAKE_ATM])
2611         q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]);
2612 
2613     if (tb[TCA_CAKE_OVERHEAD]) {
2614         q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]);
2615         q->rate_flags |= CAKE_FLAG_OVERHEAD;
2616 
2617         q->max_netlen = 0;
2618         q->max_adjlen = 0;
2619         q->min_netlen = ~0;
2620         q->min_adjlen = ~0;
2621     }
2622 
2623     if (tb[TCA_CAKE_RAW]) {
2624         q->rate_flags &= ~CAKE_FLAG_OVERHEAD;
2625 
2626         q->max_netlen = 0;
2627         q->max_adjlen = 0;
2628         q->min_netlen = ~0;
2629         q->min_adjlen = ~0;
2630     }
2631 
2632     if (tb[TCA_CAKE_MPU])
2633         q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]);
2634 
2635     if (tb[TCA_CAKE_RTT]) {
2636         q->interval = nla_get_u32(tb[TCA_CAKE_RTT]);
2637 
2638         if (!q->interval)
2639             q->interval = 1;
2640     }
2641 
2642     if (tb[TCA_CAKE_TARGET]) {
2643         q->target = nla_get_u32(tb[TCA_CAKE_TARGET]);
2644 
2645         if (!q->target)
2646             q->target = 1;
2647     }
2648 
2649     if (tb[TCA_CAKE_AUTORATE]) {
2650         if (!!nla_get_u32(tb[TCA_CAKE_AUTORATE]))
2651             q->rate_flags |= CAKE_FLAG_AUTORATE_INGRESS;
2652         else
2653             q->rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS;
2654     }
2655 
2656     if (tb[TCA_CAKE_INGRESS]) {
2657         if (!!nla_get_u32(tb[TCA_CAKE_INGRESS]))
2658             q->rate_flags |= CAKE_FLAG_INGRESS;
2659         else
2660             q->rate_flags &= ~CAKE_FLAG_INGRESS;
2661     }
2662 
2663     if (tb[TCA_CAKE_ACK_FILTER])
2664         q->ack_filter = nla_get_u32(tb[TCA_CAKE_ACK_FILTER]);
2665 
2666     if (tb[TCA_CAKE_MEMORY])
2667         q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]);
2668 
2669     if (tb[TCA_CAKE_SPLIT_GSO]) {
2670         if (!!nla_get_u32(tb[TCA_CAKE_SPLIT_GSO]))
2671             q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
2672         else
2673             q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
2674     }
2675 
2676     if (tb[TCA_CAKE_FWMARK]) {
2677         q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]);
2678         q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0;
2679     }
2680 
2681     if (q->tins) {
2682         sch_tree_lock(sch);
2683         cake_reconfigure(sch);
2684         sch_tree_unlock(sch);
2685     }
2686 
2687     return 0;
2688 }
2689 
2690 static void cake_destroy(struct Qdisc *sch)
2691 {
2692     struct cake_sched_data *q = qdisc_priv(sch);
2693 
2694     qdisc_watchdog_cancel(&q->watchdog);
2695     tcf_block_put(q->block);
2696     kvfree(q->tins);
2697 }
2698 
2699 static int cake_init(struct Qdisc *sch, struct nlattr *opt,
2700              struct netlink_ext_ack *extack)
2701 {
2702     struct cake_sched_data *q = qdisc_priv(sch);
2703     int i, j, err;
2704 
2705     sch->limit = 10240;
2706     q->tin_mode = CAKE_DIFFSERV_DIFFSERV3;
2707     q->flow_mode  = CAKE_FLOW_TRIPLE;
2708 
2709     q->rate_bps = 0; /* unlimited by default */
2710 
2711     q->interval = 100000; /* 100ms default */
2712     q->target   =   5000; /* 5ms: codel RFC argues
2713                    * for 5 to 10% of interval
2714                    */
2715     q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
2716     q->cur_tin = 0;
2717     q->cur_flow  = 0;
2718 
2719     qdisc_watchdog_init(&q->watchdog, sch);
2720 
2721     if (opt) {
2722         err = cake_change(sch, opt, extack);
2723 
2724         if (err)
2725             return err;
2726     }
2727 
2728     err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
2729     if (err)
2730         return err;
2731 
2732     quantum_div[0] = ~0;
2733     for (i = 1; i <= CAKE_QUEUES; i++)
2734         quantum_div[i] = 65535 / i;
2735 
2736     q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data),
2737                GFP_KERNEL);
2738     if (!q->tins)
2739         return -ENOMEM;
2740 
2741     for (i = 0; i < CAKE_MAX_TINS; i++) {
2742         struct cake_tin_data *b = q->tins + i;
2743 
2744         INIT_LIST_HEAD(&b->new_flows);
2745         INIT_LIST_HEAD(&b->old_flows);
2746         INIT_LIST_HEAD(&b->decaying_flows);
2747         b->sparse_flow_count = 0;
2748         b->bulk_flow_count = 0;
2749         b->decaying_flow_count = 0;
2750 
2751         for (j = 0; j < CAKE_QUEUES; j++) {
2752             struct cake_flow *flow = b->flows + j;
2753             u32 k = j * CAKE_MAX_TINS + i;
2754 
2755             INIT_LIST_HEAD(&flow->flowchain);
2756             cobalt_vars_init(&flow->cvars);
2757 
2758             q->overflow_heap[k].t = i;
2759             q->overflow_heap[k].b = j;
2760             b->overflow_idx[j] = k;
2761         }
2762     }
2763 
2764     cake_reconfigure(sch);
2765     q->avg_peak_bandwidth = q->rate_bps;
2766     q->min_netlen = ~0;
2767     q->min_adjlen = ~0;
2768     return 0;
2769 }
2770 
2771 static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
2772 {
2773     struct cake_sched_data *q = qdisc_priv(sch);
2774     struct nlattr *opts;
2775 
2776     opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
2777     if (!opts)
2778         goto nla_put_failure;
2779 
2780     if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps,
2781                   TCA_CAKE_PAD))
2782         goto nla_put_failure;
2783 
2784     if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE,
2785             q->flow_mode & CAKE_FLOW_MASK))
2786         goto nla_put_failure;
2787 
2788     if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval))
2789         goto nla_put_failure;
2790 
2791     if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target))
2792         goto nla_put_failure;
2793 
2794     if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit))
2795         goto nla_put_failure;
2796 
2797     if (nla_put_u32(skb, TCA_CAKE_AUTORATE,
2798             !!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
2799         goto nla_put_failure;
2800 
2801     if (nla_put_u32(skb, TCA_CAKE_INGRESS,
2802             !!(q->rate_flags & CAKE_FLAG_INGRESS)))
2803         goto nla_put_failure;
2804 
2805     if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter))
2806         goto nla_put_failure;
2807 
2808     if (nla_put_u32(skb, TCA_CAKE_NAT,
2809             !!(q->flow_mode & CAKE_FLOW_NAT_FLAG)))
2810         goto nla_put_failure;
2811 
2812     if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, q->tin_mode))
2813         goto nla_put_failure;
2814 
2815     if (nla_put_u32(skb, TCA_CAKE_WASH,
2816             !!(q->rate_flags & CAKE_FLAG_WASH)))
2817         goto nla_put_failure;
2818 
2819     if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead))
2820         goto nla_put_failure;
2821 
2822     if (!(q->rate_flags & CAKE_FLAG_OVERHEAD))
2823         if (nla_put_u32(skb, TCA_CAKE_RAW, 0))
2824             goto nla_put_failure;
2825 
2826     if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode))
2827         goto nla_put_failure;
2828 
2829     if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu))
2830         goto nla_put_failure;
2831 
2832     if (nla_put_u32(skb, TCA_CAKE_SPLIT_GSO,
2833             !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
2834         goto nla_put_failure;
2835 
2836     if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask))
2837         goto nla_put_failure;
2838 
2839     return nla_nest_end(skb, opts);
2840 
2841 nla_put_failure:
2842     return -1;
2843 }
2844 
2845 static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
2846 {
2847     struct nlattr *stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP);
2848     struct cake_sched_data *q = qdisc_priv(sch);
2849     struct nlattr *tstats, *ts;
2850     int i;
2851 
2852     if (!stats)
2853         return -1;
2854 
2855 #define PUT_STAT_U32(attr, data) do {                      \
2856         if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
2857             goto nla_put_failure;                  \
2858     } while (0)
2859 #define PUT_STAT_U64(attr, data) do {                      \
2860         if (nla_put_u64_64bit(d->skb, TCA_CAKE_STATS_ ## attr, \
2861                     data, TCA_CAKE_STATS_PAD)) \
2862             goto nla_put_failure;                  \
2863     } while (0)
2864 
2865     PUT_STAT_U64(CAPACITY_ESTIMATE64, q->avg_peak_bandwidth);
2866     PUT_STAT_U32(MEMORY_LIMIT, q->buffer_limit);
2867     PUT_STAT_U32(MEMORY_USED, q->buffer_max_used);
2868     PUT_STAT_U32(AVG_NETOFF, ((q->avg_netoff + 0x8000) >> 16));
2869     PUT_STAT_U32(MAX_NETLEN, q->max_netlen);
2870     PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen);
2871     PUT_STAT_U32(MIN_NETLEN, q->min_netlen);
2872     PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen);
2873 
2874 #undef PUT_STAT_U32
2875 #undef PUT_STAT_U64
2876 
2877     tstats = nla_nest_start_noflag(d->skb, TCA_CAKE_STATS_TIN_STATS);
2878     if (!tstats)
2879         goto nla_put_failure;
2880 
2881 #define PUT_TSTAT_U32(attr, data) do {                  \
2882         if (nla_put_u32(d->skb, TCA_CAKE_TIN_STATS_ ## attr, data)) \
2883             goto nla_put_failure;               \
2884     } while (0)
2885 #define PUT_TSTAT_U64(attr, data) do {                  \
2886         if (nla_put_u64_64bit(d->skb, TCA_CAKE_TIN_STATS_ ## attr, \
2887                     data, TCA_CAKE_TIN_STATS_PAD))  \
2888             goto nla_put_failure;               \
2889     } while (0)
2890 
2891     for (i = 0; i < q->tin_cnt; i++) {
2892         struct cake_tin_data *b = &q->tins[q->tin_order[i]];
2893 
2894         ts = nla_nest_start_noflag(d->skb, i + 1);
2895         if (!ts)
2896             goto nla_put_failure;
2897 
2898         PUT_TSTAT_U64(THRESHOLD_RATE64, b->tin_rate_bps);
2899         PUT_TSTAT_U64(SENT_BYTES64, b->bytes);
2900         PUT_TSTAT_U32(BACKLOG_BYTES, b->tin_backlog);
2901 
2902         PUT_TSTAT_U32(TARGET_US,
2903                   ktime_to_us(ns_to_ktime(b->cparams.target)));
2904         PUT_TSTAT_U32(INTERVAL_US,
2905                   ktime_to_us(ns_to_ktime(b->cparams.interval)));
2906 
2907         PUT_TSTAT_U32(SENT_PACKETS, b->packets);
2908         PUT_TSTAT_U32(DROPPED_PACKETS, b->tin_dropped);
2909         PUT_TSTAT_U32(ECN_MARKED_PACKETS, b->tin_ecn_mark);
2910         PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, b->ack_drops);
2911 
2912         PUT_TSTAT_U32(PEAK_DELAY_US,
2913                   ktime_to_us(ns_to_ktime(b->peak_delay)));
2914         PUT_TSTAT_U32(AVG_DELAY_US,
2915                   ktime_to_us(ns_to_ktime(b->avge_delay)));
2916         PUT_TSTAT_U32(BASE_DELAY_US,
2917                   ktime_to_us(ns_to_ktime(b->base_delay)));
2918 
2919         PUT_TSTAT_U32(WAY_INDIRECT_HITS, b->way_hits);
2920         PUT_TSTAT_U32(WAY_MISSES, b->way_misses);
2921         PUT_TSTAT_U32(WAY_COLLISIONS, b->way_collisions);
2922 
2923         PUT_TSTAT_U32(SPARSE_FLOWS, b->sparse_flow_count +
2924                         b->decaying_flow_count);
2925         PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count);
2926         PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count);
2927         PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen);
2928 
2929         PUT_TSTAT_U32(FLOW_QUANTUM, b->flow_quantum);
2930         nla_nest_end(d->skb, ts);
2931     }
2932 
2933 #undef PUT_TSTAT_U32
2934 #undef PUT_TSTAT_U64
2935 
2936     nla_nest_end(d->skb, tstats);
2937     return nla_nest_end(d->skb, stats);
2938 
2939 nla_put_failure:
2940     nla_nest_cancel(d->skb, stats);
2941     return -1;
2942 }
2943 
2944 static struct Qdisc *cake_leaf(struct Qdisc *sch, unsigned long arg)
2945 {
2946     return NULL;
2947 }
2948 
2949 static unsigned long cake_find(struct Qdisc *sch, u32 classid)
2950 {
2951     return 0;
2952 }
2953 
2954 static unsigned long cake_bind(struct Qdisc *sch, unsigned long parent,
2955                    u32 classid)
2956 {
2957     return 0;
2958 }
2959 
2960 static void cake_unbind(struct Qdisc *q, unsigned long cl)
2961 {
2962 }
2963 
2964 static struct tcf_block *cake_tcf_block(struct Qdisc *sch, unsigned long cl,
2965                     struct netlink_ext_ack *extack)
2966 {
2967     struct cake_sched_data *q = qdisc_priv(sch);
2968 
2969     if (cl)
2970         return NULL;
2971     return q->block;
2972 }
2973 
2974 static int cake_dump_class(struct Qdisc *sch, unsigned long cl,
2975                struct sk_buff *skb, struct tcmsg *tcm)
2976 {
2977     tcm->tcm_handle |= TC_H_MIN(cl);
2978     return 0;
2979 }
2980 
2981 static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
2982                  struct gnet_dump *d)
2983 {
2984     struct cake_sched_data *q = qdisc_priv(sch);
2985     const struct cake_flow *flow = NULL;
2986     struct gnet_stats_queue qs = { 0 };
2987     struct nlattr *stats;
2988     u32 idx = cl - 1;
2989 
2990     if (idx < CAKE_QUEUES * q->tin_cnt) {
2991         const struct cake_tin_data *b = \
2992             &q->tins[q->tin_order[idx / CAKE_QUEUES]];
2993         const struct sk_buff *skb;
2994 
2995         flow = &b->flows[idx % CAKE_QUEUES];
2996 
2997         if (flow->head) {
2998             sch_tree_lock(sch);
2999             skb = flow->head;
3000             while (skb) {
3001                 qs.qlen++;
3002                 skb = skb->next;
3003             }
3004             sch_tree_unlock(sch);
3005         }
3006         qs.backlog = b->backlogs[idx % CAKE_QUEUES];
3007         qs.drops = flow->dropped;
3008     }
3009     if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
3010         return -1;
3011     if (flow) {
3012         ktime_t now = ktime_get();
3013 
3014         stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP);
3015         if (!stats)
3016             return -1;
3017 
3018 #define PUT_STAT_U32(attr, data) do {                      \
3019         if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
3020             goto nla_put_failure;                  \
3021     } while (0)
3022 #define PUT_STAT_S32(attr, data) do {                      \
3023         if (nla_put_s32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
3024             goto nla_put_failure;                  \
3025     } while (0)
3026 
3027         PUT_STAT_S32(DEFICIT, flow->deficit);
3028         PUT_STAT_U32(DROPPING, flow->cvars.dropping);
3029         PUT_STAT_U32(COBALT_COUNT, flow->cvars.count);
3030         PUT_STAT_U32(P_DROP, flow->cvars.p_drop);
3031         if (flow->cvars.p_drop) {
3032             PUT_STAT_S32(BLUE_TIMER_US,
3033                      ktime_to_us(
3034                          ktime_sub(now,
3035                                flow->cvars.blue_timer)));
3036         }
3037         if (flow->cvars.dropping) {
3038             PUT_STAT_S32(DROP_NEXT_US,
3039                      ktime_to_us(
3040                          ktime_sub(now,
3041                                flow->cvars.drop_next)));
3042         }
3043 
3044         if (nla_nest_end(d->skb, stats) < 0)
3045             return -1;
3046     }
3047 
3048     return 0;
3049 
3050 nla_put_failure:
3051     nla_nest_cancel(d->skb, stats);
3052     return -1;
3053 }
3054 
3055 static void cake_walk(struct Qdisc *sch, struct qdisc_walker *arg)
3056 {
3057     struct cake_sched_data *q = qdisc_priv(sch);
3058     unsigned int i, j;
3059 
3060     if (arg->stop)
3061         return;
3062 
3063     for (i = 0; i < q->tin_cnt; i++) {
3064         struct cake_tin_data *b = &q->tins[q->tin_order[i]];
3065 
3066         for (j = 0; j < CAKE_QUEUES; j++) {
3067             if (list_empty(&b->flows[j].flowchain) ||
3068                 arg->count < arg->skip) {
3069                 arg->count++;
3070                 continue;
3071             }
3072             if (arg->fn(sch, i * CAKE_QUEUES + j + 1, arg) < 0) {
3073                 arg->stop = 1;
3074                 break;
3075             }
3076             arg->count++;
3077         }
3078     }
3079 }
3080 
3081 static const struct Qdisc_class_ops cake_class_ops = {
3082     .leaf       =   cake_leaf,
3083     .find       =   cake_find,
3084     .tcf_block  =   cake_tcf_block,
3085     .bind_tcf   =   cake_bind,
3086     .unbind_tcf =   cake_unbind,
3087     .dump       =   cake_dump_class,
3088     .dump_stats =   cake_dump_class_stats,
3089     .walk       =   cake_walk,
3090 };
3091 
3092 static struct Qdisc_ops cake_qdisc_ops __read_mostly = {
3093     .cl_ops     =   &cake_class_ops,
3094     .id     =   "cake",
3095     .priv_size  =   sizeof(struct cake_sched_data),
3096     .enqueue    =   cake_enqueue,
3097     .dequeue    =   cake_dequeue,
3098     .peek       =   qdisc_peek_dequeued,
3099     .init       =   cake_init,
3100     .reset      =   cake_reset,
3101     .destroy    =   cake_destroy,
3102     .change     =   cake_change,
3103     .dump       =   cake_dump,
3104     .dump_stats =   cake_dump_stats,
3105     .owner      =   THIS_MODULE,
3106 };
3107 
3108 static int __init cake_module_init(void)
3109 {
3110     return register_qdisc(&cake_qdisc_ops);
3111 }
3112 
3113 static void __exit cake_module_exit(void)
3114 {
3115     unregister_qdisc(&cake_qdisc_ops);
3116 }
3117 
3118 module_init(cake_module_init)
3119 module_exit(cake_module_exit)
3120 MODULE_AUTHOR("Jonathan Morton");
3121 MODULE_LICENSE("Dual BSD/GPL");
3122 MODULE_DESCRIPTION("The CAKE shaper.");