Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-or-later
0002 /*
0003  * net/sched/sch_sfq.c  Stochastic Fairness Queueing discipline.
0004  *
0005  * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
0006  */
0007 
0008 #include <linux/module.h>
0009 #include <linux/types.h>
0010 #include <linux/kernel.h>
0011 #include <linux/jiffies.h>
0012 #include <linux/string.h>
0013 #include <linux/in.h>
0014 #include <linux/errno.h>
0015 #include <linux/init.h>
0016 #include <linux/skbuff.h>
0017 #include <linux/siphash.h>
0018 #include <linux/slab.h>
0019 #include <linux/vmalloc.h>
0020 #include <net/netlink.h>
0021 #include <net/pkt_sched.h>
0022 #include <net/pkt_cls.h>
0023 #include <net/red.h>
0024 
0025 
0026 /*  Stochastic Fairness Queuing algorithm.
0027     =======================================
0028 
0029     Source:
0030     Paul E. McKenney "Stochastic Fairness Queuing",
0031     IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
0032 
0033     Paul E. McKenney "Stochastic Fairness Queuing",
0034     "Interworking: Research and Experience", v.2, 1991, p.113-131.
0035 
0036 
0037     See also:
0038     M. Shreedhar and George Varghese "Efficient Fair
0039     Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
0040 
0041 
0042     This is not the thing that is usually called (W)FQ nowadays.
0043     It does not use any timestamp mechanism, but instead
0044     processes queues in round-robin order.
0045 
0046     ADVANTAGE:
0047 
0048     - It is very cheap. Both CPU and memory requirements are minimal.
0049 
0050     DRAWBACKS:
0051 
0052     - "Stochastic" -> It is not 100% fair.
0053     When hash collisions occur, several flows are considered as one.
0054 
0055     - "Round-robin" -> It introduces larger delays than virtual clock
0056     based schemes, and should not be used for isolating interactive
0057     traffic from non-interactive. It means, that this scheduler
0058     should be used as leaf of CBQ or P3, which put interactive traffic
0059     to higher priority band.
0060 
0061     We still need true WFQ for top level CSZ, but using WFQ
0062     for the best effort traffic is absolutely pointless:
0063     SFQ is superior for this purpose.
0064 
0065     IMPLEMENTATION:
0066     This implementation limits :
0067     - maximal queue length per flow to 127 packets.
0068     - max mtu to 2^18-1;
0069     - max 65408 flows,
0070     - number of hash buckets to 65536.
0071 
0072     It is easy to increase these values, but not in flight.  */
0073 
0074 #define SFQ_MAX_DEPTH       127 /* max number of packets per flow */
0075 #define SFQ_DEFAULT_FLOWS   128
0076 #define SFQ_MAX_FLOWS       (0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */
0077 #define SFQ_EMPTY_SLOT      0xffff
0078 #define SFQ_DEFAULT_HASH_DIVISOR 1024
0079 
0080 /* We use 16 bits to store allot, and want to handle packets up to 64K
0081  * Scale allot by 8 (1<<3) so that no overflow occurs.
0082  */
0083 #define SFQ_ALLOT_SHIFT     3
0084 #define SFQ_ALLOT_SIZE(X)   DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
0085 
0086 /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
0087 typedef u16 sfq_index;
0088 
0089 /*
0090  * We dont use pointers to save space.
0091  * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array
0092  * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH]
0093  * are 'pointers' to dep[] array
0094  */
0095 struct sfq_head {
0096     sfq_index   next;
0097     sfq_index   prev;
0098 };
0099 
0100 struct sfq_slot {
0101     struct sk_buff  *skblist_next;
0102     struct sk_buff  *skblist_prev;
0103     sfq_index   qlen; /* number of skbs in skblist */
0104     sfq_index   next; /* next slot in sfq RR chain */
0105     struct sfq_head dep; /* anchor in dep[] chains */
0106     unsigned short  hash; /* hash value (index in ht[]) */
0107     short       allot; /* credit for this slot */
0108 
0109     unsigned int    backlog;
0110     struct red_vars vars;
0111 };
0112 
0113 struct sfq_sched_data {
0114 /* frequently used fields */
0115     int     limit;      /* limit of total number of packets in this qdisc */
0116     unsigned int    divisor;    /* number of slots in hash table */
0117     u8      headdrop;
0118     u8      maxdepth;   /* limit of packets per flow */
0119 
0120     siphash_key_t   perturbation;
0121     u8      cur_depth;  /* depth of longest slot */
0122     u8      flags;
0123     unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
0124     struct tcf_proto __rcu *filter_list;
0125     struct tcf_block *block;
0126     sfq_index   *ht;        /* Hash table ('divisor' slots) */
0127     struct sfq_slot *slots;     /* Flows table ('maxflows' entries) */
0128 
0129     struct red_parms *red_parms;
0130     struct tc_sfqred_stats stats;
0131     struct sfq_slot *tail;      /* current slot in round */
0132 
0133     struct sfq_head dep[SFQ_MAX_DEPTH + 1];
0134                     /* Linked lists of slots, indexed by depth
0135                      * dep[0] : list of unused flows
0136                      * dep[1] : list of flows with 1 packet
0137                      * dep[X] : list of flows with X packets
0138                      */
0139 
0140     unsigned int    maxflows;   /* number of flows in flows array */
0141     int     perturb_period;
0142     unsigned int    quantum;    /* Allotment per round: MUST BE >= MTU */
0143     struct timer_list perturb_timer;
0144     struct Qdisc    *sch;
0145 };
0146 
0147 /*
0148  * sfq_head are either in a sfq_slot or in dep[] array
0149  */
0150 static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
0151 {
0152     if (val < SFQ_MAX_FLOWS)
0153         return &q->slots[val].dep;
0154     return &q->dep[val - SFQ_MAX_FLOWS];
0155 }
0156 
0157 static unsigned int sfq_hash(const struct sfq_sched_data *q,
0158                  const struct sk_buff *skb)
0159 {
0160     return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1);
0161 }
0162 
0163 static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
0164                  int *qerr)
0165 {
0166     struct sfq_sched_data *q = qdisc_priv(sch);
0167     struct tcf_result res;
0168     struct tcf_proto *fl;
0169     int result;
0170 
0171     if (TC_H_MAJ(skb->priority) == sch->handle &&
0172         TC_H_MIN(skb->priority) > 0 &&
0173         TC_H_MIN(skb->priority) <= q->divisor)
0174         return TC_H_MIN(skb->priority);
0175 
0176     fl = rcu_dereference_bh(q->filter_list);
0177     if (!fl)
0178         return sfq_hash(q, skb) + 1;
0179 
0180     *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
0181     result = tcf_classify(skb, NULL, fl, &res, false);
0182     if (result >= 0) {
0183 #ifdef CONFIG_NET_CLS_ACT
0184         switch (result) {
0185         case TC_ACT_STOLEN:
0186         case TC_ACT_QUEUED:
0187         case TC_ACT_TRAP:
0188             *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
0189             fallthrough;
0190         case TC_ACT_SHOT:
0191             return 0;
0192         }
0193 #endif
0194         if (TC_H_MIN(res.classid) <= q->divisor)
0195             return TC_H_MIN(res.classid);
0196     }
0197     return 0;
0198 }
0199 
0200 /*
0201  * x : slot number [0 .. SFQ_MAX_FLOWS - 1]
0202  */
0203 static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
0204 {
0205     sfq_index p, n;
0206     struct sfq_slot *slot = &q->slots[x];
0207     int qlen = slot->qlen;
0208 
0209     p = qlen + SFQ_MAX_FLOWS;
0210     n = q->dep[qlen].next;
0211 
0212     slot->dep.next = n;
0213     slot->dep.prev = p;
0214 
0215     q->dep[qlen].next = x;      /* sfq_dep_head(q, p)->next = x */
0216     sfq_dep_head(q, n)->prev = x;
0217 }
0218 
0219 #define sfq_unlink(q, x, n, p)          \
0220     do {                    \
0221         n = q->slots[x].dep.next;   \
0222         p = q->slots[x].dep.prev;   \
0223         sfq_dep_head(q, p)->next = n;   \
0224         sfq_dep_head(q, n)->prev = p;   \
0225     } while (0)
0226 
0227 
0228 static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
0229 {
0230     sfq_index p, n;
0231     int d;
0232 
0233     sfq_unlink(q, x, n, p);
0234 
0235     d = q->slots[x].qlen--;
0236     if (n == p && q->cur_depth == d)
0237         q->cur_depth--;
0238     sfq_link(q, x);
0239 }
0240 
0241 static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
0242 {
0243     sfq_index p, n;
0244     int d;
0245 
0246     sfq_unlink(q, x, n, p);
0247 
0248     d = ++q->slots[x].qlen;
0249     if (q->cur_depth < d)
0250         q->cur_depth = d;
0251     sfq_link(q, x);
0252 }
0253 
0254 /* helper functions : might be changed when/if skb use a standard list_head */
0255 
0256 /* remove one skb from tail of slot queue */
0257 static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
0258 {
0259     struct sk_buff *skb = slot->skblist_prev;
0260 
0261     slot->skblist_prev = skb->prev;
0262     skb->prev->next = (struct sk_buff *)slot;
0263     skb->next = skb->prev = NULL;
0264     return skb;
0265 }
0266 
0267 /* remove one skb from head of slot queue */
0268 static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
0269 {
0270     struct sk_buff *skb = slot->skblist_next;
0271 
0272     slot->skblist_next = skb->next;
0273     skb->next->prev = (struct sk_buff *)slot;
0274     skb->next = skb->prev = NULL;
0275     return skb;
0276 }
0277 
0278 static inline void slot_queue_init(struct sfq_slot *slot)
0279 {
0280     memset(slot, 0, sizeof(*slot));
0281     slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
0282 }
0283 
0284 /* add skb to slot queue (tail add) */
0285 static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
0286 {
0287     skb->prev = slot->skblist_prev;
0288     skb->next = (struct sk_buff *)slot;
0289     slot->skblist_prev->next = skb;
0290     slot->skblist_prev = skb;
0291 }
0292 
0293 static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free)
0294 {
0295     struct sfq_sched_data *q = qdisc_priv(sch);
0296     sfq_index x, d = q->cur_depth;
0297     struct sk_buff *skb;
0298     unsigned int len;
0299     struct sfq_slot *slot;
0300 
0301     /* Queue is full! Find the longest slot and drop tail packet from it */
0302     if (d > 1) {
0303         x = q->dep[d].next;
0304         slot = &q->slots[x];
0305 drop:
0306         skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
0307         len = qdisc_pkt_len(skb);
0308         slot->backlog -= len;
0309         sfq_dec(q, x);
0310         sch->q.qlen--;
0311         qdisc_qstats_backlog_dec(sch, skb);
0312         qdisc_drop(skb, sch, to_free);
0313         return len;
0314     }
0315 
0316     if (d == 1) {
0317         /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
0318         x = q->tail->next;
0319         slot = &q->slots[x];
0320         q->tail->next = slot->next;
0321         q->ht[slot->hash] = SFQ_EMPTY_SLOT;
0322         goto drop;
0323     }
0324 
0325     return 0;
0326 }
0327 
0328 /* Is ECN parameter configured */
0329 static int sfq_prob_mark(const struct sfq_sched_data *q)
0330 {
0331     return q->flags & TC_RED_ECN;
0332 }
0333 
0334 /* Should packets over max threshold just be marked */
0335 static int sfq_hard_mark(const struct sfq_sched_data *q)
0336 {
0337     return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN;
0338 }
0339 
0340 static int sfq_headdrop(const struct sfq_sched_data *q)
0341 {
0342     return q->headdrop;
0343 }
0344 
0345 static int
0346 sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
0347 {
0348     struct sfq_sched_data *q = qdisc_priv(sch);
0349     unsigned int hash, dropped;
0350     sfq_index x, qlen;
0351     struct sfq_slot *slot;
0352     int ret;
0353     struct sk_buff *head;
0354     int delta;
0355 
0356     hash = sfq_classify(skb, sch, &ret);
0357     if (hash == 0) {
0358         if (ret & __NET_XMIT_BYPASS)
0359             qdisc_qstats_drop(sch);
0360         __qdisc_drop(skb, to_free);
0361         return ret;
0362     }
0363     hash--;
0364 
0365     x = q->ht[hash];
0366     slot = &q->slots[x];
0367     if (x == SFQ_EMPTY_SLOT) {
0368         x = q->dep[0].next; /* get a free slot */
0369         if (x >= SFQ_MAX_FLOWS)
0370             return qdisc_drop(skb, sch, to_free);
0371         q->ht[hash] = x;
0372         slot = &q->slots[x];
0373         slot->hash = hash;
0374         slot->backlog = 0; /* should already be 0 anyway... */
0375         red_set_vars(&slot->vars);
0376         goto enqueue;
0377     }
0378     if (q->red_parms) {
0379         slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
0380                             &slot->vars,
0381                             slot->backlog);
0382         switch (red_action(q->red_parms,
0383                    &slot->vars,
0384                    slot->vars.qavg)) {
0385         case RED_DONT_MARK:
0386             break;
0387 
0388         case RED_PROB_MARK:
0389             qdisc_qstats_overlimit(sch);
0390             if (sfq_prob_mark(q)) {
0391                 /* We know we have at least one packet in queue */
0392                 if (sfq_headdrop(q) &&
0393                     INET_ECN_set_ce(slot->skblist_next)) {
0394                     q->stats.prob_mark_head++;
0395                     break;
0396                 }
0397                 if (INET_ECN_set_ce(skb)) {
0398                     q->stats.prob_mark++;
0399                     break;
0400                 }
0401             }
0402             q->stats.prob_drop++;
0403             goto congestion_drop;
0404 
0405         case RED_HARD_MARK:
0406             qdisc_qstats_overlimit(sch);
0407             if (sfq_hard_mark(q)) {
0408                 /* We know we have at least one packet in queue */
0409                 if (sfq_headdrop(q) &&
0410                     INET_ECN_set_ce(slot->skblist_next)) {
0411                     q->stats.forced_mark_head++;
0412                     break;
0413                 }
0414                 if (INET_ECN_set_ce(skb)) {
0415                     q->stats.forced_mark++;
0416                     break;
0417                 }
0418             }
0419             q->stats.forced_drop++;
0420             goto congestion_drop;
0421         }
0422     }
0423 
0424     if (slot->qlen >= q->maxdepth) {
0425 congestion_drop:
0426         if (!sfq_headdrop(q))
0427             return qdisc_drop(skb, sch, to_free);
0428 
0429         /* We know we have at least one packet in queue */
0430         head = slot_dequeue_head(slot);
0431         delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
0432         sch->qstats.backlog -= delta;
0433         slot->backlog -= delta;
0434         qdisc_drop(head, sch, to_free);
0435 
0436         slot_queue_add(slot, skb);
0437         qdisc_tree_reduce_backlog(sch, 0, delta);
0438         return NET_XMIT_CN;
0439     }
0440 
0441 enqueue:
0442     qdisc_qstats_backlog_inc(sch, skb);
0443     slot->backlog += qdisc_pkt_len(skb);
0444     slot_queue_add(slot, skb);
0445     sfq_inc(q, x);
0446     if (slot->qlen == 1) {      /* The flow is new */
0447         if (q->tail == NULL) {  /* It is the first flow */
0448             slot->next = x;
0449         } else {
0450             slot->next = q->tail->next;
0451             q->tail->next = x;
0452         }
0453         /* We put this flow at the end of our flow list.
0454          * This might sound unfair for a new flow to wait after old ones,
0455          * but we could endup servicing new flows only, and freeze old ones.
0456          */
0457         q->tail = slot;
0458         /* We could use a bigger initial quantum for new flows */
0459         slot->allot = q->scaled_quantum;
0460     }
0461     if (++sch->q.qlen <= q->limit)
0462         return NET_XMIT_SUCCESS;
0463 
0464     qlen = slot->qlen;
0465     dropped = sfq_drop(sch, to_free);
0466     /* Return Congestion Notification only if we dropped a packet
0467      * from this flow.
0468      */
0469     if (qlen != slot->qlen) {
0470         qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb));
0471         return NET_XMIT_CN;
0472     }
0473 
0474     /* As we dropped a packet, better let upper stack know this */
0475     qdisc_tree_reduce_backlog(sch, 1, dropped);
0476     return NET_XMIT_SUCCESS;
0477 }
0478 
0479 static struct sk_buff *
0480 sfq_dequeue(struct Qdisc *sch)
0481 {
0482     struct sfq_sched_data *q = qdisc_priv(sch);
0483     struct sk_buff *skb;
0484     sfq_index a, next_a;
0485     struct sfq_slot *slot;
0486 
0487     /* No active slots */
0488     if (q->tail == NULL)
0489         return NULL;
0490 
0491 next_slot:
0492     a = q->tail->next;
0493     slot = &q->slots[a];
0494     if (slot->allot <= 0) {
0495         q->tail = slot;
0496         slot->allot += q->scaled_quantum;
0497         goto next_slot;
0498     }
0499     skb = slot_dequeue_head(slot);
0500     sfq_dec(q, a);
0501     qdisc_bstats_update(sch, skb);
0502     sch->q.qlen--;
0503     qdisc_qstats_backlog_dec(sch, skb);
0504     slot->backlog -= qdisc_pkt_len(skb);
0505     /* Is the slot empty? */
0506     if (slot->qlen == 0) {
0507         q->ht[slot->hash] = SFQ_EMPTY_SLOT;
0508         next_a = slot->next;
0509         if (a == next_a) {
0510             q->tail = NULL; /* no more active slots */
0511             return skb;
0512         }
0513         q->tail->next = next_a;
0514     } else {
0515         slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
0516     }
0517     return skb;
0518 }
0519 
0520 static void
0521 sfq_reset(struct Qdisc *sch)
0522 {
0523     struct sk_buff *skb;
0524 
0525     while ((skb = sfq_dequeue(sch)) != NULL)
0526         rtnl_kfree_skbs(skb, skb);
0527 }
0528 
0529 /*
0530  * When q->perturbation is changed, we rehash all queued skbs
0531  * to avoid OOO (Out Of Order) effects.
0532  * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
0533  * counters.
0534  */
0535 static void sfq_rehash(struct Qdisc *sch)
0536 {
0537     struct sfq_sched_data *q = qdisc_priv(sch);
0538     struct sk_buff *skb;
0539     int i;
0540     struct sfq_slot *slot;
0541     struct sk_buff_head list;
0542     int dropped = 0;
0543     unsigned int drop_len = 0;
0544 
0545     __skb_queue_head_init(&list);
0546 
0547     for (i = 0; i < q->maxflows; i++) {
0548         slot = &q->slots[i];
0549         if (!slot->qlen)
0550             continue;
0551         while (slot->qlen) {
0552             skb = slot_dequeue_head(slot);
0553             sfq_dec(q, i);
0554             __skb_queue_tail(&list, skb);
0555         }
0556         slot->backlog = 0;
0557         red_set_vars(&slot->vars);
0558         q->ht[slot->hash] = SFQ_EMPTY_SLOT;
0559     }
0560     q->tail = NULL;
0561 
0562     while ((skb = __skb_dequeue(&list)) != NULL) {
0563         unsigned int hash = sfq_hash(q, skb);
0564         sfq_index x = q->ht[hash];
0565 
0566         slot = &q->slots[x];
0567         if (x == SFQ_EMPTY_SLOT) {
0568             x = q->dep[0].next; /* get a free slot */
0569             if (x >= SFQ_MAX_FLOWS) {
0570 drop:
0571                 qdisc_qstats_backlog_dec(sch, skb);
0572                 drop_len += qdisc_pkt_len(skb);
0573                 kfree_skb(skb);
0574                 dropped++;
0575                 continue;
0576             }
0577             q->ht[hash] = x;
0578             slot = &q->slots[x];
0579             slot->hash = hash;
0580         }
0581         if (slot->qlen >= q->maxdepth)
0582             goto drop;
0583         slot_queue_add(slot, skb);
0584         if (q->red_parms)
0585             slot->vars.qavg = red_calc_qavg(q->red_parms,
0586                             &slot->vars,
0587                             slot->backlog);
0588         slot->backlog += qdisc_pkt_len(skb);
0589         sfq_inc(q, x);
0590         if (slot->qlen == 1) {      /* The flow is new */
0591             if (q->tail == NULL) {  /* It is the first flow */
0592                 slot->next = x;
0593             } else {
0594                 slot->next = q->tail->next;
0595                 q->tail->next = x;
0596             }
0597             q->tail = slot;
0598             slot->allot = q->scaled_quantum;
0599         }
0600     }
0601     sch->q.qlen -= dropped;
0602     qdisc_tree_reduce_backlog(sch, dropped, drop_len);
0603 }
0604 
0605 static void sfq_perturbation(struct timer_list *t)
0606 {
0607     struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
0608     struct Qdisc *sch = q->sch;
0609     spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
0610     siphash_key_t nkey;
0611 
0612     get_random_bytes(&nkey, sizeof(nkey));
0613     spin_lock(root_lock);
0614     q->perturbation = nkey;
0615     if (!q->filter_list && q->tail)
0616         sfq_rehash(sch);
0617     spin_unlock(root_lock);
0618 
0619     if (q->perturb_period)
0620         mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
0621 }
0622 
0623 static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
0624 {
0625     struct sfq_sched_data *q = qdisc_priv(sch);
0626     struct tc_sfq_qopt *ctl = nla_data(opt);
0627     struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
0628     unsigned int qlen, dropped = 0;
0629     struct red_parms *p = NULL;
0630     struct sk_buff *to_free = NULL;
0631     struct sk_buff *tail = NULL;
0632 
0633     if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
0634         return -EINVAL;
0635     if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
0636         ctl_v1 = nla_data(opt);
0637     if (ctl->divisor &&
0638         (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
0639         return -EINVAL;
0640 
0641     /* slot->allot is a short, make sure quantum is not too big. */
0642     if (ctl->quantum) {
0643         unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
0644 
0645         if (scaled <= 0 || scaled > SHRT_MAX)
0646             return -EINVAL;
0647     }
0648 
0649     if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
0650                     ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
0651         return -EINVAL;
0652     if (ctl_v1 && ctl_v1->qth_min) {
0653         p = kmalloc(sizeof(*p), GFP_KERNEL);
0654         if (!p)
0655             return -ENOMEM;
0656     }
0657     sch_tree_lock(sch);
0658     if (ctl->quantum) {
0659         q->quantum = ctl->quantum;
0660         q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
0661     }
0662     q->perturb_period = ctl->perturb_period * HZ;
0663     if (ctl->flows)
0664         q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
0665     if (ctl->divisor) {
0666         q->divisor = ctl->divisor;
0667         q->maxflows = min_t(u32, q->maxflows, q->divisor);
0668     }
0669     if (ctl_v1) {
0670         if (ctl_v1->depth)
0671             q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
0672         if (p) {
0673             swap(q->red_parms, p);
0674             red_set_parms(q->red_parms,
0675                       ctl_v1->qth_min, ctl_v1->qth_max,
0676                       ctl_v1->Wlog,
0677                       ctl_v1->Plog, ctl_v1->Scell_log,
0678                       NULL,
0679                       ctl_v1->max_P);
0680         }
0681         q->flags = ctl_v1->flags;
0682         q->headdrop = ctl_v1->headdrop;
0683     }
0684     if (ctl->limit) {
0685         q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
0686         q->maxflows = min_t(u32, q->maxflows, q->limit);
0687     }
0688 
0689     qlen = sch->q.qlen;
0690     while (sch->q.qlen > q->limit) {
0691         dropped += sfq_drop(sch, &to_free);
0692         if (!tail)
0693             tail = to_free;
0694     }
0695 
0696     rtnl_kfree_skbs(to_free, tail);
0697     qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
0698 
0699     del_timer(&q->perturb_timer);
0700     if (q->perturb_period) {
0701         mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
0702         get_random_bytes(&q->perturbation, sizeof(q->perturbation));
0703     }
0704     sch_tree_unlock(sch);
0705     kfree(p);
0706     return 0;
0707 }
0708 
0709 static void *sfq_alloc(size_t sz)
0710 {
0711     return  kvmalloc(sz, GFP_KERNEL);
0712 }
0713 
0714 static void sfq_free(void *addr)
0715 {
0716     kvfree(addr);
0717 }
0718 
0719 static void sfq_destroy(struct Qdisc *sch)
0720 {
0721     struct sfq_sched_data *q = qdisc_priv(sch);
0722 
0723     tcf_block_put(q->block);
0724     q->perturb_period = 0;
0725     del_timer_sync(&q->perturb_timer);
0726     sfq_free(q->ht);
0727     sfq_free(q->slots);
0728     kfree(q->red_parms);
0729 }
0730 
0731 static int sfq_init(struct Qdisc *sch, struct nlattr *opt,
0732             struct netlink_ext_ack *extack)
0733 {
0734     struct sfq_sched_data *q = qdisc_priv(sch);
0735     int i;
0736     int err;
0737 
0738     q->sch = sch;
0739     timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
0740 
0741     err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
0742     if (err)
0743         return err;
0744 
0745     for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
0746         q->dep[i].next = i + SFQ_MAX_FLOWS;
0747         q->dep[i].prev = i + SFQ_MAX_FLOWS;
0748     }
0749 
0750     q->limit = SFQ_MAX_DEPTH;
0751     q->maxdepth = SFQ_MAX_DEPTH;
0752     q->cur_depth = 0;
0753     q->tail = NULL;
0754     q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
0755     q->maxflows = SFQ_DEFAULT_FLOWS;
0756     q->quantum = psched_mtu(qdisc_dev(sch));
0757     q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
0758     q->perturb_period = 0;
0759     get_random_bytes(&q->perturbation, sizeof(q->perturbation));
0760 
0761     if (opt) {
0762         int err = sfq_change(sch, opt);
0763         if (err)
0764             return err;
0765     }
0766 
0767     q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
0768     q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
0769     if (!q->ht || !q->slots) {
0770         /* Note: sfq_destroy() will be called by our caller */
0771         return -ENOMEM;
0772     }
0773 
0774     for (i = 0; i < q->divisor; i++)
0775         q->ht[i] = SFQ_EMPTY_SLOT;
0776 
0777     for (i = 0; i < q->maxflows; i++) {
0778         slot_queue_init(&q->slots[i]);
0779         sfq_link(q, i);
0780     }
0781     if (q->limit >= 1)
0782         sch->flags |= TCQ_F_CAN_BYPASS;
0783     else
0784         sch->flags &= ~TCQ_F_CAN_BYPASS;
0785     return 0;
0786 }
0787 
0788 static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
0789 {
0790     struct sfq_sched_data *q = qdisc_priv(sch);
0791     unsigned char *b = skb_tail_pointer(skb);
0792     struct tc_sfq_qopt_v1 opt;
0793     struct red_parms *p = q->red_parms;
0794 
0795     memset(&opt, 0, sizeof(opt));
0796     opt.v0.quantum  = q->quantum;
0797     opt.v0.perturb_period = q->perturb_period / HZ;
0798     opt.v0.limit    = q->limit;
0799     opt.v0.divisor  = q->divisor;
0800     opt.v0.flows    = q->maxflows;
0801     opt.depth   = q->maxdepth;
0802     opt.headdrop    = q->headdrop;
0803 
0804     if (p) {
0805         opt.qth_min = p->qth_min >> p->Wlog;
0806         opt.qth_max = p->qth_max >> p->Wlog;
0807         opt.Wlog    = p->Wlog;
0808         opt.Plog    = p->Plog;
0809         opt.Scell_log   = p->Scell_log;
0810         opt.max_P   = p->max_P;
0811     }
0812     memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
0813     opt.flags   = q->flags;
0814 
0815     if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
0816         goto nla_put_failure;
0817 
0818     return skb->len;
0819 
0820 nla_put_failure:
0821     nlmsg_trim(skb, b);
0822     return -1;
0823 }
0824 
0825 static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
0826 {
0827     return NULL;
0828 }
0829 
0830 static unsigned long sfq_find(struct Qdisc *sch, u32 classid)
0831 {
0832     return 0;
0833 }
0834 
0835 static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
0836                   u32 classid)
0837 {
0838     return 0;
0839 }
0840 
0841 static void sfq_unbind(struct Qdisc *q, unsigned long cl)
0842 {
0843 }
0844 
0845 static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl,
0846                        struct netlink_ext_ack *extack)
0847 {
0848     struct sfq_sched_data *q = qdisc_priv(sch);
0849 
0850     if (cl)
0851         return NULL;
0852     return q->block;
0853 }
0854 
0855 static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
0856               struct sk_buff *skb, struct tcmsg *tcm)
0857 {
0858     tcm->tcm_handle |= TC_H_MIN(cl);
0859     return 0;
0860 }
0861 
0862 static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
0863                 struct gnet_dump *d)
0864 {
0865     struct sfq_sched_data *q = qdisc_priv(sch);
0866     sfq_index idx = q->ht[cl - 1];
0867     struct gnet_stats_queue qs = { 0 };
0868     struct tc_sfq_xstats xstats = { 0 };
0869 
0870     if (idx != SFQ_EMPTY_SLOT) {
0871         const struct sfq_slot *slot = &q->slots[idx];
0872 
0873         xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
0874         qs.qlen = slot->qlen;
0875         qs.backlog = slot->backlog;
0876     }
0877     if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
0878         return -1;
0879     return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
0880 }
0881 
0882 static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
0883 {
0884     struct sfq_sched_data *q = qdisc_priv(sch);
0885     unsigned int i;
0886 
0887     if (arg->stop)
0888         return;
0889 
0890     for (i = 0; i < q->divisor; i++) {
0891         if (q->ht[i] == SFQ_EMPTY_SLOT ||
0892             arg->count < arg->skip) {
0893             arg->count++;
0894             continue;
0895         }
0896         if (arg->fn(sch, i + 1, arg) < 0) {
0897             arg->stop = 1;
0898             break;
0899         }
0900         arg->count++;
0901     }
0902 }
0903 
0904 static const struct Qdisc_class_ops sfq_class_ops = {
0905     .leaf       =   sfq_leaf,
0906     .find       =   sfq_find,
0907     .tcf_block  =   sfq_tcf_block,
0908     .bind_tcf   =   sfq_bind,
0909     .unbind_tcf =   sfq_unbind,
0910     .dump       =   sfq_dump_class,
0911     .dump_stats =   sfq_dump_class_stats,
0912     .walk       =   sfq_walk,
0913 };
0914 
0915 static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
0916     .cl_ops     =   &sfq_class_ops,
0917     .id     =   "sfq",
0918     .priv_size  =   sizeof(struct sfq_sched_data),
0919     .enqueue    =   sfq_enqueue,
0920     .dequeue    =   sfq_dequeue,
0921     .peek       =   qdisc_peek_dequeued,
0922     .init       =   sfq_init,
0923     .reset      =   sfq_reset,
0924     .destroy    =   sfq_destroy,
0925     .change     =   NULL,
0926     .dump       =   sfq_dump,
0927     .owner      =   THIS_MODULE,
0928 };
0929 
0930 static int __init sfq_module_init(void)
0931 {
0932     return register_qdisc(&sfq_qdisc_ops);
0933 }
0934 static void __exit sfq_module_exit(void)
0935 {
0936     unregister_qdisc(&sfq_qdisc_ops);
0937 }
0938 module_init(sfq_module_init)
0939 module_exit(sfq_module_exit)
0940 MODULE_LICENSE("GPL");