Back to home page

OSCL-LXR

 
 

    


0001 /* SPDX-License-Identifier: GPL-2.0
0002  *
0003  * Copyright (c) 2019 Facebook
0004  *
0005  * This program is free software; you can redistribute it and/or
0006  * modify it under the terms of version 2 of the GNU General Public
0007  * License as published by the Free Software Foundation.
0008  *
0009  * Include file for sample Host Bandwidth Manager (HBM) BPF programs
0010  */
0011 #define KBUILD_MODNAME "foo"
0012 #include <uapi/linux/bpf.h>
0013 #include <uapi/linux/if_ether.h>
0014 #include <uapi/linux/if_packet.h>
0015 #include <uapi/linux/ip.h>
0016 #include <uapi/linux/ipv6.h>
0017 #include <uapi/linux/in.h>
0018 #include <uapi/linux/tcp.h>
0019 #include <uapi/linux/filter.h>
0020 #include <uapi/linux/pkt_cls.h>
0021 #include <net/ipv6.h>
0022 #include <net/inet_ecn.h>
0023 #include <bpf/bpf_endian.h>
0024 #include <bpf/bpf_helpers.h>
0025 #include "hbm.h"
0026 
0027 #define DROP_PKT    0
0028 #define ALLOW_PKT   1
0029 #define TCP_ECN_OK  1
0030 #define CWR     2
0031 
0032 #ifndef HBM_DEBUG  // Define HBM_DEBUG to enable debugging
0033 #undef bpf_printk
0034 #define bpf_printk(fmt, ...)
0035 #endif
0036 
0037 #define INITIAL_CREDIT_PACKETS  100
0038 #define MAX_BYTES_PER_PACKET    1500
0039 #define MARK_THRESH     (40 * MAX_BYTES_PER_PACKET)
0040 #define DROP_THRESH     (80 * 5 * MAX_BYTES_PER_PACKET)
0041 #define LARGE_PKT_DROP_THRESH   (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET))
0042 #define MARK_REGION_SIZE    (LARGE_PKT_DROP_THRESH - MARK_THRESH)
0043 #define LARGE_PKT_THRESH    120
0044 #define MAX_CREDIT      (100 * MAX_BYTES_PER_PACKET)
0045 #define INIT_CREDIT     (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET)
0046 
0047 // Time base accounting for fq's EDT
0048 #define BURST_SIZE_NS       100000 // 100us
0049 #define MARK_THRESH_NS      50000 // 50us
0050 #define DROP_THRESH_NS      500000 // 500us
0051 // Reserve 20us of queuing for small packets (less than 120 bytes)
0052 #define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000)
0053 #define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS)
0054 
0055 // rate in bytes per ns << 20
0056 #define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
0057 #define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20)
0058 #define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate))
0059 
0060 struct {
0061     __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
0062     __type(key, struct bpf_cgroup_storage_key);
0063     __type(value, struct hbm_vqueue);
0064 } queue_state SEC(".maps");
0065 
0066 struct {
0067     __uint(type, BPF_MAP_TYPE_ARRAY);
0068     __uint(max_entries, 1);
0069     __type(key, u32);
0070     __type(value, struct hbm_queue_stats);
0071 } queue_stats SEC(".maps");
0072 
0073 struct hbm_pkt_info {
0074     int cwnd;
0075     int rtt;
0076     int packets_out;
0077     bool    is_ip;
0078     bool    is_tcp;
0079     short   ecn;
0080 };
0081 
0082 static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti)
0083 {
0084     struct bpf_sock *sk;
0085     struct bpf_tcp_sock *tp;
0086 
0087     sk = skb->sk;
0088     if (sk) {
0089         sk = bpf_sk_fullsock(sk);
0090         if (sk) {
0091             if (sk->protocol == IPPROTO_TCP) {
0092                 tp = bpf_tcp_sock(sk);
0093                 if (tp) {
0094                     pkti->cwnd = tp->snd_cwnd;
0095                     pkti->rtt = tp->srtt_us >> 3;
0096                     pkti->packets_out = tp->packets_out;
0097                     return 0;
0098                 }
0099             }
0100         }
0101     }
0102     pkti->cwnd = 0;
0103     pkti->rtt = 0;
0104     pkti->packets_out = 0;
0105     return 1;
0106 }
0107 
0108 static void hbm_get_pkt_info(struct __sk_buff *skb,
0109                  struct hbm_pkt_info *pkti)
0110 {
0111     struct iphdr iph;
0112     struct ipv6hdr *ip6h;
0113 
0114     pkti->cwnd = 0;
0115     pkti->rtt = 0;
0116     bpf_skb_load_bytes(skb, 0, &iph, 12);
0117     if (iph.version == 6) {
0118         ip6h = (struct ipv6hdr *)&iph;
0119         pkti->is_ip = true;
0120         pkti->is_tcp = (ip6h->nexthdr == 6);
0121         pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK;
0122     } else if (iph.version == 4) {
0123         pkti->is_ip = true;
0124         pkti->is_tcp = (iph.protocol == 6);
0125         pkti->ecn = iph.tos & INET_ECN_MASK;
0126     } else {
0127         pkti->is_ip = false;
0128         pkti->is_tcp = false;
0129         pkti->ecn = 0;
0130     }
0131     if (pkti->is_tcp)
0132         get_tcp_info(skb, pkti);
0133 }
0134 
0135 static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate)
0136 {
0137     bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
0138     qdp->lasttime = bpf_ktime_get_ns();
0139     qdp->credit = INIT_CREDIT;
0140     qdp->rate = rate * 128;
0141 }
0142 
0143 static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp,
0144                         int rate)
0145 {
0146     unsigned long long curtime;
0147 
0148     curtime = bpf_ktime_get_ns();
0149     bpf_printk("Initializing queue_state, rate:%d\n", rate * 128);
0150     qdp->lasttime = curtime - BURST_SIZE_NS;    // support initial burst
0151     qdp->credit = 0;                // not used
0152     qdp->rate = rate * 128;
0153 }
0154 
0155 static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp,
0156                          int len,
0157                          unsigned long long curtime,
0158                          bool congestion_flag,
0159                          bool drop_flag,
0160                          bool cwr_flag,
0161                          bool ecn_ce_flag,
0162                          struct hbm_pkt_info *pkti,
0163                          int credit)
0164 {
0165     int rv = ALLOW_PKT;
0166 
0167     if (qsp != NULL) {
0168         // Following is needed for work conserving
0169         __sync_add_and_fetch(&(qsp->bytes_total), len);
0170         if (qsp->stats) {
0171             // Optionally update statistics
0172             if (qsp->firstPacketTime == 0)
0173                 qsp->firstPacketTime = curtime;
0174             qsp->lastPacketTime = curtime;
0175             __sync_add_and_fetch(&(qsp->pkts_total), 1);
0176             if (congestion_flag) {
0177                 __sync_add_and_fetch(&(qsp->pkts_marked), 1);
0178                 __sync_add_and_fetch(&(qsp->bytes_marked), len);
0179             }
0180             if (drop_flag) {
0181                 __sync_add_and_fetch(&(qsp->pkts_dropped), 1);
0182                 __sync_add_and_fetch(&(qsp->bytes_dropped),
0183                              len);
0184             }
0185             if (ecn_ce_flag)
0186                 __sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1);
0187             if (pkti->cwnd) {
0188                 __sync_add_and_fetch(&(qsp->sum_cwnd),
0189                              pkti->cwnd);
0190                 __sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1);
0191             }
0192             if (pkti->rtt)
0193                 __sync_add_and_fetch(&(qsp->sum_rtt),
0194                              pkti->rtt);
0195             __sync_add_and_fetch(&(qsp->sum_credit), credit);
0196 
0197             if (drop_flag)
0198                 rv = DROP_PKT;
0199             if (cwr_flag)
0200                 rv |= 2;
0201             if (rv == DROP_PKT)
0202                 __sync_add_and_fetch(&(qsp->returnValCount[0]),
0203                              1);
0204             else if (rv == ALLOW_PKT)
0205                 __sync_add_and_fetch(&(qsp->returnValCount[1]),
0206                              1);
0207             else if (rv == 2)
0208                 __sync_add_and_fetch(&(qsp->returnValCount[2]),
0209                              1);
0210             else if (rv == 3)
0211                 __sync_add_and_fetch(&(qsp->returnValCount[3]),
0212                              1);
0213         }
0214     }
0215 }