Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0
0002 
0003 #include <linux/version.h>
0004 #include <linux/ptrace.h>
0005 #include <uapi/linux/bpf.h>
0006 #include <bpf/bpf_helpers.h>
0007 
0008 /*
0009  * The CPU number, cstate number and pstate number are based
0010  * on 96boards Hikey with octa CA53 CPUs.
0011  *
0012  * Every CPU have three idle states for cstate:
0013  *   WFI, CPU_OFF, CLUSTER_OFF
0014  *
0015  * Every CPU have 5 operating points:
0016  *   208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
0017  *
0018  * This code is based on these assumption and other platforms
0019  * need to adjust these definitions.
0020  */
0021 #define MAX_CPU         8
0022 #define MAX_PSTATE_ENTRIES  5
0023 #define MAX_CSTATE_ENTRIES  3
0024 
0025 static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
0026 
0027 /*
0028  * my_map structure is used to record cstate and pstate index and
0029  * timestamp (Idx, Ts), when new event incoming we need to update
0030  * combination for new state index and timestamp (Idx`, Ts`).
0031  *
0032  * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
0033  * interval for the previous state: Duration(Idx) = Ts` - Ts.
0034  *
0035  * Every CPU has one below array for recording state index and
0036  * timestamp, and record for cstate and pstate saperately:
0037  *
0038  * +--------------------------+
0039  * | cstate timestamp         |
0040  * +--------------------------+
0041  * | cstate index             |
0042  * +--------------------------+
0043  * | pstate timestamp         |
0044  * +--------------------------+
0045  * | pstate index             |
0046  * +--------------------------+
0047  */
0048 #define MAP_OFF_CSTATE_TIME 0
0049 #define MAP_OFF_CSTATE_IDX  1
0050 #define MAP_OFF_PSTATE_TIME 2
0051 #define MAP_OFF_PSTATE_IDX  3
0052 #define MAP_OFF_NUM     4
0053 
0054 struct {
0055     __uint(type, BPF_MAP_TYPE_ARRAY);
0056     __type(key, u32);
0057     __type(value, u64);
0058     __uint(max_entries, MAX_CPU * MAP_OFF_NUM);
0059 } my_map SEC(".maps");
0060 
0061 /* cstate_duration records duration time for every idle state per CPU */
0062 struct {
0063     __uint(type, BPF_MAP_TYPE_ARRAY);
0064     __type(key, u32);
0065     __type(value, u64);
0066     __uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES);
0067 } cstate_duration SEC(".maps");
0068 
0069 /* pstate_duration records duration time for every operating point per CPU */
0070 struct {
0071     __uint(type, BPF_MAP_TYPE_ARRAY);
0072     __type(key, u32);
0073     __type(value, u64);
0074     __uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES);
0075 } pstate_duration SEC(".maps");
0076 
0077 /*
0078  * The trace events for cpu_idle and cpu_frequency are taken from:
0079  * /sys/kernel/debug/tracing/events/power/cpu_idle/format
0080  * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
0081  *
0082  * These two events have same format, so define one common structure.
0083  */
0084 struct cpu_args {
0085     u64 pad;
0086     u32 state;
0087     u32 cpu_id;
0088 };
0089 
0090 /* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
0091 static u32 find_cpu_pstate_idx(u32 frequency)
0092 {
0093     u32 i;
0094 
0095     for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
0096         if (frequency == cpu_opps[i])
0097             return i;
0098     }
0099 
0100     return i;
0101 }
0102 
0103 SEC("tracepoint/power/cpu_idle")
0104 int bpf_prog1(struct cpu_args *ctx)
0105 {
0106     u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
0107     u32 key, cpu, pstate_idx;
0108     u64 *val;
0109 
0110     if (ctx->cpu_id > MAX_CPU)
0111         return 0;
0112 
0113     cpu = ctx->cpu_id;
0114 
0115     key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
0116     cts = bpf_map_lookup_elem(&my_map, &key);
0117     if (!cts)
0118         return 0;
0119 
0120     key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
0121     cstate = bpf_map_lookup_elem(&my_map, &key);
0122     if (!cstate)
0123         return 0;
0124 
0125     key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
0126     pts = bpf_map_lookup_elem(&my_map, &key);
0127     if (!pts)
0128         return 0;
0129 
0130     key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
0131     pstate = bpf_map_lookup_elem(&my_map, &key);
0132     if (!pstate)
0133         return 0;
0134 
0135     prev_state = *cstate;
0136     *cstate = ctx->state;
0137 
0138     if (!*cts) {
0139         *cts = bpf_ktime_get_ns();
0140         return 0;
0141     }
0142 
0143     cur_ts = bpf_ktime_get_ns();
0144     delta = cur_ts - *cts;
0145     *cts = cur_ts;
0146 
0147     /*
0148      * When state doesn't equal to (u32)-1, the cpu will enter
0149      * one idle state; for this case we need to record interval
0150      * for the pstate.
0151      *
0152      *                 OPP2
0153      *            +---------------------+
0154      *     OPP1   |                     |
0155      *   ---------+                     |
0156      *                                  |  Idle state
0157      *                                  +---------------
0158      *
0159      *            |<- pstate duration ->|
0160      *            ^                     ^
0161      *           pts                  cur_ts
0162      */
0163     if (ctx->state != (u32)-1) {
0164 
0165         /* record pstate after have first cpu_frequency event */
0166         if (!*pts)
0167             return 0;
0168 
0169         delta = cur_ts - *pts;
0170 
0171         pstate_idx = find_cpu_pstate_idx(*pstate);
0172         if (pstate_idx >= MAX_PSTATE_ENTRIES)
0173             return 0;
0174 
0175         key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
0176         val = bpf_map_lookup_elem(&pstate_duration, &key);
0177         if (val)
0178             __sync_fetch_and_add((long *)val, delta);
0179 
0180     /*
0181      * When state equal to (u32)-1, the cpu just exits from one
0182      * specific idle state; for this case we need to record
0183      * interval for the pstate.
0184      *
0185      *       OPP2
0186      *   -----------+
0187      *              |                          OPP1
0188      *              |                     +-----------
0189      *              |     Idle state      |
0190      *              +---------------------+
0191      *
0192      *              |<- cstate duration ->|
0193      *              ^                     ^
0194      *             cts                  cur_ts
0195      */
0196     } else {
0197 
0198         key = cpu * MAX_CSTATE_ENTRIES + prev_state;
0199         val = bpf_map_lookup_elem(&cstate_duration, &key);
0200         if (val)
0201             __sync_fetch_and_add((long *)val, delta);
0202     }
0203 
0204     /* Update timestamp for pstate as new start time */
0205     if (*pts)
0206         *pts = cur_ts;
0207 
0208     return 0;
0209 }
0210 
0211 SEC("tracepoint/power/cpu_frequency")
0212 int bpf_prog2(struct cpu_args *ctx)
0213 {
0214     u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
0215     u32 key, cpu, pstate_idx;
0216     u64 *val;
0217 
0218     cpu = ctx->cpu_id;
0219 
0220     key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
0221     pts = bpf_map_lookup_elem(&my_map, &key);
0222     if (!pts)
0223         return 0;
0224 
0225     key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
0226     pstate = bpf_map_lookup_elem(&my_map, &key);
0227     if (!pstate)
0228         return 0;
0229 
0230     key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
0231     cstate = bpf_map_lookup_elem(&my_map, &key);
0232     if (!cstate)
0233         return 0;
0234 
0235     prev_state = *pstate;
0236     *pstate = ctx->state;
0237 
0238     if (!*pts) {
0239         *pts = bpf_ktime_get_ns();
0240         return 0;
0241     }
0242 
0243     cur_ts = bpf_ktime_get_ns();
0244     delta = cur_ts - *pts;
0245     *pts = cur_ts;
0246 
0247     /* When CPU is in idle, bail out to skip pstate statistics */
0248     if (*cstate != (u32)(-1))
0249         return 0;
0250 
0251     /*
0252      * The cpu changes to another different OPP (in below diagram
0253      * change frequency from OPP3 to OPP1), need recording interval
0254      * for previous frequency OPP3 and update timestamp as start
0255      * time for new frequency OPP1.
0256      *
0257      *                 OPP3
0258      *            +---------------------+
0259      *     OPP2   |                     |
0260      *   ---------+                     |
0261      *                                  |    OPP1
0262      *                                  +---------------
0263      *
0264      *            |<- pstate duration ->|
0265      *            ^                     ^
0266      *           pts                  cur_ts
0267      */
0268     pstate_idx = find_cpu_pstate_idx(*pstate);
0269     if (pstate_idx >= MAX_PSTATE_ENTRIES)
0270         return 0;
0271 
0272     key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
0273     val = bpf_map_lookup_elem(&pstate_duration, &key);
0274     if (val)
0275         __sync_fetch_and_add((long *)val, delta);
0276 
0277     return 0;
0278 }
0279 
0280 char _license[] SEC("license") = "GPL";
0281 u32 _version SEC("version") = LINUX_VERSION_CODE;