Back to home page

OSCL-LXR

 
 

    


0001 // SPDX-License-Identifier: GPL-2.0-only
0002 /*
0003  * Copyright (c) 2016 Facebook
0004  */
0005 #define _GNU_SOURCE
0006 #include <linux/types.h>
0007 #include <stdio.h>
0008 #include <unistd.h>
0009 #include <linux/bpf.h>
0010 #include <errno.h>
0011 #include <string.h>
0012 #include <assert.h>
0013 #include <sched.h>
0014 #include <sys/wait.h>
0015 #include <sys/stat.h>
0016 #include <fcntl.h>
0017 #include <stdlib.h>
0018 #include <time.h>
0019 
0020 #include <bpf/bpf.h>
0021 #include "bpf_util.h"
0022 
0023 #define min(a, b) ((a) < (b) ? (a) : (b))
0024 #ifndef offsetof
0025 # define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
0026 #endif
0027 #define container_of(ptr, type, member) ({          \
0028     const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
0029     (type *)( (char *)__mptr - offsetof(type,member) );})
0030 
0031 static int nr_cpus;
0032 static unsigned long long *dist_keys;
0033 static unsigned int dist_key_counts;
0034 
0035 struct list_head {
0036     struct list_head *next, *prev;
0037 };
0038 
0039 static inline void INIT_LIST_HEAD(struct list_head *list)
0040 {
0041     list->next = list;
0042     list->prev = list;
0043 }
0044 
0045 static inline int list_empty(const struct list_head *head)
0046 {
0047     return head->next == head;
0048 }
0049 
0050 static inline void __list_add(struct list_head *new,
0051                   struct list_head *prev,
0052                   struct list_head *next)
0053 {
0054     next->prev = new;
0055     new->next = next;
0056     new->prev = prev;
0057     prev->next = new;
0058 }
0059 
0060 static inline void list_add(struct list_head *new, struct list_head *head)
0061 {
0062     __list_add(new, head, head->next);
0063 }
0064 
0065 static inline void __list_del(struct list_head *prev, struct list_head *next)
0066 {
0067     next->prev = prev;
0068     prev->next = next;
0069 }
0070 
0071 static inline void __list_del_entry(struct list_head *entry)
0072 {
0073     __list_del(entry->prev, entry->next);
0074 }
0075 
0076 static inline void list_move(struct list_head *list, struct list_head *head)
0077 {
0078     __list_del_entry(list);
0079     list_add(list, head);
0080 }
0081 
0082 #define list_entry(ptr, type, member) \
0083     container_of(ptr, type, member)
0084 
0085 #define list_last_entry(ptr, type, member) \
0086     list_entry((ptr)->prev, type, member)
0087 
0088 struct pfect_lru_node {
0089     struct list_head list;
0090     unsigned long long key;
0091 };
0092 
0093 struct pfect_lru {
0094     struct list_head list;
0095     struct pfect_lru_node *free_nodes;
0096     unsigned int cur_size;
0097     unsigned int lru_size;
0098     unsigned int nr_unique;
0099     unsigned int nr_misses;
0100     unsigned int total;
0101     int map_fd;
0102 };
0103 
0104 static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size,
0105                unsigned int nr_possible_elems)
0106 {
0107     lru->map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL,
0108                      sizeof(unsigned long long),
0109                      sizeof(struct pfect_lru_node *),
0110                      nr_possible_elems, NULL);
0111     assert(lru->map_fd != -1);
0112 
0113     lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node));
0114     assert(lru->free_nodes);
0115 
0116     INIT_LIST_HEAD(&lru->list);
0117     lru->cur_size = 0;
0118     lru->lru_size = lru_size;
0119     lru->nr_unique = lru->nr_misses = lru->total = 0;
0120 }
0121 
0122 static void pfect_lru_destroy(struct pfect_lru *lru)
0123 {
0124     close(lru->map_fd);
0125     free(lru->free_nodes);
0126 }
0127 
0128 static int pfect_lru_lookup_or_insert(struct pfect_lru *lru,
0129                       unsigned long long key)
0130 {
0131     struct pfect_lru_node *node = NULL;
0132     int seen = 0;
0133 
0134     lru->total++;
0135     if (!bpf_map_lookup_elem(lru->map_fd, &key, &node)) {
0136         if (node) {
0137             list_move(&node->list, &lru->list);
0138             return 1;
0139         }
0140         seen = 1;
0141     }
0142 
0143     if (lru->cur_size < lru->lru_size) {
0144         node =  &lru->free_nodes[lru->cur_size++];
0145         INIT_LIST_HEAD(&node->list);
0146     } else {
0147         struct pfect_lru_node *null_node = NULL;
0148 
0149         node = list_last_entry(&lru->list,
0150                        struct pfect_lru_node,
0151                        list);
0152         bpf_map_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST);
0153     }
0154 
0155     node->key = key;
0156     list_move(&node->list, &lru->list);
0157 
0158     lru->nr_misses++;
0159     if (seen) {
0160         assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_EXIST));
0161     } else {
0162         lru->nr_unique++;
0163         assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST));
0164     }
0165 
0166     return seen;
0167 }
0168 
0169 static unsigned int read_keys(const char *dist_file,
0170                   unsigned long long **keys)
0171 {
0172     struct stat fst;
0173     unsigned long long *retkeys;
0174     unsigned int counts = 0;
0175     int dist_fd;
0176     char *b, *l;
0177     int i;
0178 
0179     dist_fd = open(dist_file, 0);
0180     assert(dist_fd != -1);
0181 
0182     assert(fstat(dist_fd, &fst) == 0);
0183     b = malloc(fst.st_size);
0184     assert(b);
0185 
0186     assert(read(dist_fd, b, fst.st_size) == fst.st_size);
0187     close(dist_fd);
0188     for (i = 0; i < fst.st_size; i++) {
0189         if (b[i] == '\n')
0190             counts++;
0191     }
0192     counts++; /* in case the last line has no \n */
0193 
0194     retkeys = malloc(counts * sizeof(unsigned long long));
0195     assert(retkeys);
0196 
0197     counts = 0;
0198     for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n"))
0199         retkeys[counts++] = strtoull(l, NULL, 10);
0200     free(b);
0201 
0202     *keys = retkeys;
0203 
0204     return counts;
0205 }
0206 
0207 static int create_map(int map_type, int map_flags, unsigned int size)
0208 {
0209     LIBBPF_OPTS(bpf_map_create_opts, opts,
0210         .map_flags = map_flags,
0211     );
0212     int map_fd;
0213 
0214     map_fd = bpf_map_create(map_type, NULL, sizeof(unsigned long long),
0215                 sizeof(unsigned long long), size, &opts);
0216 
0217     if (map_fd == -1)
0218         perror("bpf_create_map");
0219 
0220     return map_fd;
0221 }
0222 
0223 static int sched_next_online(int pid, int next_to_try)
0224 {
0225     cpu_set_t cpuset;
0226 
0227     if (next_to_try == nr_cpus)
0228         return -1;
0229 
0230     while (next_to_try < nr_cpus) {
0231         CPU_ZERO(&cpuset);
0232         CPU_SET(next_to_try++, &cpuset);
0233         if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset))
0234             break;
0235     }
0236 
0237     return next_to_try;
0238 }
0239 
0240 static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data),
0241              void *data)
0242 {
0243     int next_sched_cpu = 0;
0244     pid_t pid[tasks];
0245     int i;
0246 
0247     for (i = 0; i < tasks; i++) {
0248         pid[i] = fork();
0249         if (pid[i] == 0) {
0250             next_sched_cpu = sched_next_online(0, next_sched_cpu);
0251             fn(i, data);
0252             exit(0);
0253         } else if (pid[i] == -1) {
0254             printf("couldn't spawn #%d process\n", i);
0255             exit(1);
0256         }
0257         /* It is mostly redundant and just allow the parent
0258          * process to update next_shced_cpu for the next child
0259          * process
0260          */
0261         next_sched_cpu = sched_next_online(pid[i], next_sched_cpu);
0262     }
0263     for (i = 0; i < tasks; i++) {
0264         int status;
0265 
0266         assert(waitpid(pid[i], &status, 0) == pid[i]);
0267         assert(status == 0);
0268     }
0269 }
0270 
0271 static void do_test_lru_dist(int task, void *data)
0272 {
0273     unsigned int nr_misses = 0;
0274     struct pfect_lru pfect_lru;
0275     unsigned long long key, value = 1234;
0276     unsigned int i;
0277 
0278     unsigned int lru_map_fd = ((unsigned int *)data)[0];
0279     unsigned int lru_size = ((unsigned int *)data)[1];
0280     unsigned long long key_offset = task * dist_key_counts;
0281 
0282     pfect_lru_init(&pfect_lru, lru_size, dist_key_counts);
0283 
0284     for (i = 0; i < dist_key_counts; i++) {
0285         key = dist_keys[i] + key_offset;
0286 
0287         pfect_lru_lookup_or_insert(&pfect_lru, key);
0288 
0289         if (!bpf_map_lookup_elem(lru_map_fd, &key, &value))
0290             continue;
0291 
0292         if (bpf_map_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) {
0293             printf("bpf_map_update_elem(lru_map_fd, %llu): errno:%d\n",
0294                    key, errno);
0295             assert(0);
0296         }
0297 
0298         nr_misses++;
0299     }
0300 
0301     printf("    task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
0302            task, pfect_lru.nr_unique, dist_key_counts, nr_misses,
0303            dist_key_counts);
0304     printf("    task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
0305            task, pfect_lru.nr_unique, pfect_lru.total,
0306            pfect_lru.nr_misses, pfect_lru.total);
0307 
0308     pfect_lru_destroy(&pfect_lru);
0309     close(lru_map_fd);
0310 }
0311 
0312 static void test_parallel_lru_dist(int map_type, int map_flags,
0313                    int nr_tasks, unsigned int lru_size)
0314 {
0315     int child_data[2];
0316     int lru_map_fd;
0317 
0318     printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
0319            map_flags);
0320 
0321     if (map_flags & BPF_F_NO_COMMON_LRU)
0322         lru_map_fd = create_map(map_type, map_flags,
0323                     nr_cpus * lru_size);
0324     else
0325         lru_map_fd = create_map(map_type, map_flags,
0326                     nr_tasks * lru_size);
0327     assert(lru_map_fd != -1);
0328 
0329     child_data[0] = lru_map_fd;
0330     child_data[1] = lru_size;
0331 
0332     run_parallel(nr_tasks, do_test_lru_dist, child_data);
0333 
0334     close(lru_map_fd);
0335 }
0336 
0337 static void test_lru_loss0(int map_type, int map_flags)
0338 {
0339     unsigned long long key, value[nr_cpus];
0340     unsigned int old_unused_losses = 0;
0341     unsigned int new_unused_losses = 0;
0342     unsigned int used_losses = 0;
0343     int map_fd;
0344 
0345     printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
0346            map_flags);
0347 
0348     assert(sched_next_online(0, 0) != -1);
0349 
0350     if (map_flags & BPF_F_NO_COMMON_LRU)
0351         map_fd = create_map(map_type, map_flags, 900 * nr_cpus);
0352     else
0353         map_fd = create_map(map_type, map_flags, 900);
0354 
0355     assert(map_fd != -1);
0356 
0357     value[0] = 1234;
0358 
0359     for (key = 1; key <= 1000; key++) {
0360         int start_key, end_key;
0361 
0362         assert(bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0);
0363 
0364         start_key = 101;
0365         end_key = min(key, 900);
0366 
0367         while (start_key <= end_key) {
0368             bpf_map_lookup_elem(map_fd, &start_key, value);
0369             start_key++;
0370         }
0371     }
0372 
0373     for (key = 1; key <= 1000; key++) {
0374         if (bpf_map_lookup_elem(map_fd, &key, value)) {
0375             if (key <= 100)
0376                 old_unused_losses++;
0377             else if (key <= 900)
0378                 used_losses++;
0379             else
0380                 new_unused_losses++;
0381         }
0382     }
0383 
0384     close(map_fd);
0385 
0386     printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) "
0387            "newer-elem-losses:%d(/100)\n",
0388            old_unused_losses, used_losses, new_unused_losses);
0389 }
0390 
0391 static void test_lru_loss1(int map_type, int map_flags)
0392 {
0393     unsigned long long key, value[nr_cpus];
0394     int map_fd;
0395     unsigned int nr_losses = 0;
0396 
0397     printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
0398            map_flags);
0399 
0400     assert(sched_next_online(0, 0) != -1);
0401 
0402     if (map_flags & BPF_F_NO_COMMON_LRU)
0403         map_fd = create_map(map_type, map_flags, 1000 * nr_cpus);
0404     else
0405         map_fd = create_map(map_type, map_flags, 1000);
0406 
0407     assert(map_fd != -1);
0408 
0409     value[0] = 1234;
0410 
0411     for (key = 1; key <= 1000; key++)
0412         assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
0413 
0414     for (key = 1; key <= 1000; key++) {
0415         if (bpf_map_lookup_elem(map_fd, &key, value))
0416             nr_losses++;
0417     }
0418 
0419     close(map_fd);
0420 
0421     printf("nr_losses:%d(/1000)\n", nr_losses);
0422 }
0423 
0424 static void do_test_parallel_lru_loss(int task, void *data)
0425 {
0426     const unsigned int nr_stable_elems = 1000;
0427     const unsigned int nr_repeats = 100000;
0428 
0429     int map_fd = *(int *)data;
0430     unsigned long long stable_base;
0431     unsigned long long key, value[nr_cpus];
0432     unsigned long long next_ins_key;
0433     unsigned int nr_losses = 0;
0434     unsigned int i;
0435 
0436     stable_base = task * nr_repeats * 2 + 1;
0437     next_ins_key = stable_base;
0438     value[0] = 1234;
0439     for (i = 0; i < nr_stable_elems; i++) {
0440         assert(bpf_map_update_elem(map_fd, &next_ins_key, value,
0441                        BPF_NOEXIST) == 0);
0442         next_ins_key++;
0443     }
0444 
0445     for (i = 0; i < nr_repeats; i++) {
0446         int rn;
0447 
0448         rn = rand();
0449 
0450         if (rn % 10) {
0451             key = rn % nr_stable_elems + stable_base;
0452             bpf_map_lookup_elem(map_fd, &key, value);
0453         } else {
0454             bpf_map_update_elem(map_fd, &next_ins_key, value,
0455                     BPF_NOEXIST);
0456             next_ins_key++;
0457         }
0458     }
0459 
0460     key = stable_base;
0461     for (i = 0; i < nr_stable_elems; i++) {
0462         if (bpf_map_lookup_elem(map_fd, &key, value))
0463             nr_losses++;
0464         key++;
0465     }
0466 
0467     printf("    task:%d nr_losses:%u\n", task, nr_losses);
0468 }
0469 
0470 static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks)
0471 {
0472     int map_fd;
0473 
0474     printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
0475            map_flags);
0476 
0477     /* Give 20% more than the active working set */
0478     if (map_flags & BPF_F_NO_COMMON_LRU)
0479         map_fd = create_map(map_type, map_flags,
0480                     nr_cpus * (1000 + 200));
0481     else
0482         map_fd = create_map(map_type, map_flags,
0483                     nr_tasks * (1000 + 200));
0484 
0485     assert(map_fd != -1);
0486 
0487     run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd);
0488 
0489     close(map_fd);
0490 }
0491 
0492 int main(int argc, char **argv)
0493 {
0494     int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
0495     const char *dist_file;
0496     int nr_tasks = 1;
0497     int lru_size;
0498     int f;
0499 
0500     if (argc < 4) {
0501         printf("Usage: %s <dist-file> <lru-size> <nr-tasks>\n",
0502                argv[0]);
0503         return -1;
0504     }
0505 
0506     dist_file = argv[1];
0507     lru_size = atoi(argv[2]);
0508     nr_tasks = atoi(argv[3]);
0509 
0510     setbuf(stdout, NULL);
0511 
0512     srand(time(NULL));
0513 
0514     nr_cpus = bpf_num_possible_cpus();
0515     assert(nr_cpus != -1);
0516     printf("nr_cpus:%d\n\n", nr_cpus);
0517 
0518     nr_tasks = min(nr_tasks, nr_cpus);
0519 
0520     dist_key_counts = read_keys(dist_file, &dist_keys);
0521     if (!dist_key_counts) {
0522         printf("%s has no key\n", dist_file);
0523         return -1;
0524     }
0525 
0526     for (f = 0; f < ARRAY_SIZE(map_flags); f++) {
0527         test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
0528         test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
0529         test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
0530                        nr_tasks);
0531         test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
0532                        nr_tasks, lru_size);
0533         printf("\n");
0534     }
0535 
0536     free(dist_keys);
0537 
0538     return 0;
0539 }