0001
0002
0003
0004
0005 #define _GNU_SOURCE
0006 #include <linux/types.h>
0007 #include <stdio.h>
0008 #include <unistd.h>
0009 #include <linux/bpf.h>
0010 #include <errno.h>
0011 #include <string.h>
0012 #include <assert.h>
0013 #include <sched.h>
0014 #include <sys/wait.h>
0015 #include <sys/stat.h>
0016 #include <fcntl.h>
0017 #include <stdlib.h>
0018 #include <time.h>
0019
0020 #include <bpf/bpf.h>
0021 #include "bpf_util.h"
0022
0023 #define min(a, b) ((a) < (b) ? (a) : (b))
0024 #ifndef offsetof
0025 # define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
0026 #endif
0027 #define container_of(ptr, type, member) ({ \
0028 const typeof( ((type *)0)->member ) *__mptr = (ptr); \
0029 (type *)( (char *)__mptr - offsetof(type,member) );})
0030
0031 static int nr_cpus;
0032 static unsigned long long *dist_keys;
0033 static unsigned int dist_key_counts;
0034
0035 struct list_head {
0036 struct list_head *next, *prev;
0037 };
0038
0039 static inline void INIT_LIST_HEAD(struct list_head *list)
0040 {
0041 list->next = list;
0042 list->prev = list;
0043 }
0044
0045 static inline int list_empty(const struct list_head *head)
0046 {
0047 return head->next == head;
0048 }
0049
0050 static inline void __list_add(struct list_head *new,
0051 struct list_head *prev,
0052 struct list_head *next)
0053 {
0054 next->prev = new;
0055 new->next = next;
0056 new->prev = prev;
0057 prev->next = new;
0058 }
0059
0060 static inline void list_add(struct list_head *new, struct list_head *head)
0061 {
0062 __list_add(new, head, head->next);
0063 }
0064
0065 static inline void __list_del(struct list_head *prev, struct list_head *next)
0066 {
0067 next->prev = prev;
0068 prev->next = next;
0069 }
0070
0071 static inline void __list_del_entry(struct list_head *entry)
0072 {
0073 __list_del(entry->prev, entry->next);
0074 }
0075
0076 static inline void list_move(struct list_head *list, struct list_head *head)
0077 {
0078 __list_del_entry(list);
0079 list_add(list, head);
0080 }
0081
0082 #define list_entry(ptr, type, member) \
0083 container_of(ptr, type, member)
0084
0085 #define list_last_entry(ptr, type, member) \
0086 list_entry((ptr)->prev, type, member)
0087
0088 struct pfect_lru_node {
0089 struct list_head list;
0090 unsigned long long key;
0091 };
0092
0093 struct pfect_lru {
0094 struct list_head list;
0095 struct pfect_lru_node *free_nodes;
0096 unsigned int cur_size;
0097 unsigned int lru_size;
0098 unsigned int nr_unique;
0099 unsigned int nr_misses;
0100 unsigned int total;
0101 int map_fd;
0102 };
0103
0104 static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size,
0105 unsigned int nr_possible_elems)
0106 {
0107 lru->map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL,
0108 sizeof(unsigned long long),
0109 sizeof(struct pfect_lru_node *),
0110 nr_possible_elems, NULL);
0111 assert(lru->map_fd != -1);
0112
0113 lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node));
0114 assert(lru->free_nodes);
0115
0116 INIT_LIST_HEAD(&lru->list);
0117 lru->cur_size = 0;
0118 lru->lru_size = lru_size;
0119 lru->nr_unique = lru->nr_misses = lru->total = 0;
0120 }
0121
0122 static void pfect_lru_destroy(struct pfect_lru *lru)
0123 {
0124 close(lru->map_fd);
0125 free(lru->free_nodes);
0126 }
0127
0128 static int pfect_lru_lookup_or_insert(struct pfect_lru *lru,
0129 unsigned long long key)
0130 {
0131 struct pfect_lru_node *node = NULL;
0132 int seen = 0;
0133
0134 lru->total++;
0135 if (!bpf_map_lookup_elem(lru->map_fd, &key, &node)) {
0136 if (node) {
0137 list_move(&node->list, &lru->list);
0138 return 1;
0139 }
0140 seen = 1;
0141 }
0142
0143 if (lru->cur_size < lru->lru_size) {
0144 node = &lru->free_nodes[lru->cur_size++];
0145 INIT_LIST_HEAD(&node->list);
0146 } else {
0147 struct pfect_lru_node *null_node = NULL;
0148
0149 node = list_last_entry(&lru->list,
0150 struct pfect_lru_node,
0151 list);
0152 bpf_map_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST);
0153 }
0154
0155 node->key = key;
0156 list_move(&node->list, &lru->list);
0157
0158 lru->nr_misses++;
0159 if (seen) {
0160 assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_EXIST));
0161 } else {
0162 lru->nr_unique++;
0163 assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST));
0164 }
0165
0166 return seen;
0167 }
0168
0169 static unsigned int read_keys(const char *dist_file,
0170 unsigned long long **keys)
0171 {
0172 struct stat fst;
0173 unsigned long long *retkeys;
0174 unsigned int counts = 0;
0175 int dist_fd;
0176 char *b, *l;
0177 int i;
0178
0179 dist_fd = open(dist_file, 0);
0180 assert(dist_fd != -1);
0181
0182 assert(fstat(dist_fd, &fst) == 0);
0183 b = malloc(fst.st_size);
0184 assert(b);
0185
0186 assert(read(dist_fd, b, fst.st_size) == fst.st_size);
0187 close(dist_fd);
0188 for (i = 0; i < fst.st_size; i++) {
0189 if (b[i] == '\n')
0190 counts++;
0191 }
0192 counts++;
0193
0194 retkeys = malloc(counts * sizeof(unsigned long long));
0195 assert(retkeys);
0196
0197 counts = 0;
0198 for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n"))
0199 retkeys[counts++] = strtoull(l, NULL, 10);
0200 free(b);
0201
0202 *keys = retkeys;
0203
0204 return counts;
0205 }
0206
0207 static int create_map(int map_type, int map_flags, unsigned int size)
0208 {
0209 LIBBPF_OPTS(bpf_map_create_opts, opts,
0210 .map_flags = map_flags,
0211 );
0212 int map_fd;
0213
0214 map_fd = bpf_map_create(map_type, NULL, sizeof(unsigned long long),
0215 sizeof(unsigned long long), size, &opts);
0216
0217 if (map_fd == -1)
0218 perror("bpf_create_map");
0219
0220 return map_fd;
0221 }
0222
0223 static int sched_next_online(int pid, int next_to_try)
0224 {
0225 cpu_set_t cpuset;
0226
0227 if (next_to_try == nr_cpus)
0228 return -1;
0229
0230 while (next_to_try < nr_cpus) {
0231 CPU_ZERO(&cpuset);
0232 CPU_SET(next_to_try++, &cpuset);
0233 if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset))
0234 break;
0235 }
0236
0237 return next_to_try;
0238 }
0239
0240 static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data),
0241 void *data)
0242 {
0243 int next_sched_cpu = 0;
0244 pid_t pid[tasks];
0245 int i;
0246
0247 for (i = 0; i < tasks; i++) {
0248 pid[i] = fork();
0249 if (pid[i] == 0) {
0250 next_sched_cpu = sched_next_online(0, next_sched_cpu);
0251 fn(i, data);
0252 exit(0);
0253 } else if (pid[i] == -1) {
0254 printf("couldn't spawn #%d process\n", i);
0255 exit(1);
0256 }
0257
0258
0259
0260
0261 next_sched_cpu = sched_next_online(pid[i], next_sched_cpu);
0262 }
0263 for (i = 0; i < tasks; i++) {
0264 int status;
0265
0266 assert(waitpid(pid[i], &status, 0) == pid[i]);
0267 assert(status == 0);
0268 }
0269 }
0270
0271 static void do_test_lru_dist(int task, void *data)
0272 {
0273 unsigned int nr_misses = 0;
0274 struct pfect_lru pfect_lru;
0275 unsigned long long key, value = 1234;
0276 unsigned int i;
0277
0278 unsigned int lru_map_fd = ((unsigned int *)data)[0];
0279 unsigned int lru_size = ((unsigned int *)data)[1];
0280 unsigned long long key_offset = task * dist_key_counts;
0281
0282 pfect_lru_init(&pfect_lru, lru_size, dist_key_counts);
0283
0284 for (i = 0; i < dist_key_counts; i++) {
0285 key = dist_keys[i] + key_offset;
0286
0287 pfect_lru_lookup_or_insert(&pfect_lru, key);
0288
0289 if (!bpf_map_lookup_elem(lru_map_fd, &key, &value))
0290 continue;
0291
0292 if (bpf_map_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) {
0293 printf("bpf_map_update_elem(lru_map_fd, %llu): errno:%d\n",
0294 key, errno);
0295 assert(0);
0296 }
0297
0298 nr_misses++;
0299 }
0300
0301 printf(" task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
0302 task, pfect_lru.nr_unique, dist_key_counts, nr_misses,
0303 dist_key_counts);
0304 printf(" task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
0305 task, pfect_lru.nr_unique, pfect_lru.total,
0306 pfect_lru.nr_misses, pfect_lru.total);
0307
0308 pfect_lru_destroy(&pfect_lru);
0309 close(lru_map_fd);
0310 }
0311
0312 static void test_parallel_lru_dist(int map_type, int map_flags,
0313 int nr_tasks, unsigned int lru_size)
0314 {
0315 int child_data[2];
0316 int lru_map_fd;
0317
0318 printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
0319 map_flags);
0320
0321 if (map_flags & BPF_F_NO_COMMON_LRU)
0322 lru_map_fd = create_map(map_type, map_flags,
0323 nr_cpus * lru_size);
0324 else
0325 lru_map_fd = create_map(map_type, map_flags,
0326 nr_tasks * lru_size);
0327 assert(lru_map_fd != -1);
0328
0329 child_data[0] = lru_map_fd;
0330 child_data[1] = lru_size;
0331
0332 run_parallel(nr_tasks, do_test_lru_dist, child_data);
0333
0334 close(lru_map_fd);
0335 }
0336
0337 static void test_lru_loss0(int map_type, int map_flags)
0338 {
0339 unsigned long long key, value[nr_cpus];
0340 unsigned int old_unused_losses = 0;
0341 unsigned int new_unused_losses = 0;
0342 unsigned int used_losses = 0;
0343 int map_fd;
0344
0345 printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
0346 map_flags);
0347
0348 assert(sched_next_online(0, 0) != -1);
0349
0350 if (map_flags & BPF_F_NO_COMMON_LRU)
0351 map_fd = create_map(map_type, map_flags, 900 * nr_cpus);
0352 else
0353 map_fd = create_map(map_type, map_flags, 900);
0354
0355 assert(map_fd != -1);
0356
0357 value[0] = 1234;
0358
0359 for (key = 1; key <= 1000; key++) {
0360 int start_key, end_key;
0361
0362 assert(bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0);
0363
0364 start_key = 101;
0365 end_key = min(key, 900);
0366
0367 while (start_key <= end_key) {
0368 bpf_map_lookup_elem(map_fd, &start_key, value);
0369 start_key++;
0370 }
0371 }
0372
0373 for (key = 1; key <= 1000; key++) {
0374 if (bpf_map_lookup_elem(map_fd, &key, value)) {
0375 if (key <= 100)
0376 old_unused_losses++;
0377 else if (key <= 900)
0378 used_losses++;
0379 else
0380 new_unused_losses++;
0381 }
0382 }
0383
0384 close(map_fd);
0385
0386 printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) "
0387 "newer-elem-losses:%d(/100)\n",
0388 old_unused_losses, used_losses, new_unused_losses);
0389 }
0390
0391 static void test_lru_loss1(int map_type, int map_flags)
0392 {
0393 unsigned long long key, value[nr_cpus];
0394 int map_fd;
0395 unsigned int nr_losses = 0;
0396
0397 printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
0398 map_flags);
0399
0400 assert(sched_next_online(0, 0) != -1);
0401
0402 if (map_flags & BPF_F_NO_COMMON_LRU)
0403 map_fd = create_map(map_type, map_flags, 1000 * nr_cpus);
0404 else
0405 map_fd = create_map(map_type, map_flags, 1000);
0406
0407 assert(map_fd != -1);
0408
0409 value[0] = 1234;
0410
0411 for (key = 1; key <= 1000; key++)
0412 assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
0413
0414 for (key = 1; key <= 1000; key++) {
0415 if (bpf_map_lookup_elem(map_fd, &key, value))
0416 nr_losses++;
0417 }
0418
0419 close(map_fd);
0420
0421 printf("nr_losses:%d(/1000)\n", nr_losses);
0422 }
0423
0424 static void do_test_parallel_lru_loss(int task, void *data)
0425 {
0426 const unsigned int nr_stable_elems = 1000;
0427 const unsigned int nr_repeats = 100000;
0428
0429 int map_fd = *(int *)data;
0430 unsigned long long stable_base;
0431 unsigned long long key, value[nr_cpus];
0432 unsigned long long next_ins_key;
0433 unsigned int nr_losses = 0;
0434 unsigned int i;
0435
0436 stable_base = task * nr_repeats * 2 + 1;
0437 next_ins_key = stable_base;
0438 value[0] = 1234;
0439 for (i = 0; i < nr_stable_elems; i++) {
0440 assert(bpf_map_update_elem(map_fd, &next_ins_key, value,
0441 BPF_NOEXIST) == 0);
0442 next_ins_key++;
0443 }
0444
0445 for (i = 0; i < nr_repeats; i++) {
0446 int rn;
0447
0448 rn = rand();
0449
0450 if (rn % 10) {
0451 key = rn % nr_stable_elems + stable_base;
0452 bpf_map_lookup_elem(map_fd, &key, value);
0453 } else {
0454 bpf_map_update_elem(map_fd, &next_ins_key, value,
0455 BPF_NOEXIST);
0456 next_ins_key++;
0457 }
0458 }
0459
0460 key = stable_base;
0461 for (i = 0; i < nr_stable_elems; i++) {
0462 if (bpf_map_lookup_elem(map_fd, &key, value))
0463 nr_losses++;
0464 key++;
0465 }
0466
0467 printf(" task:%d nr_losses:%u\n", task, nr_losses);
0468 }
0469
0470 static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks)
0471 {
0472 int map_fd;
0473
0474 printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
0475 map_flags);
0476
0477
0478 if (map_flags & BPF_F_NO_COMMON_LRU)
0479 map_fd = create_map(map_type, map_flags,
0480 nr_cpus * (1000 + 200));
0481 else
0482 map_fd = create_map(map_type, map_flags,
0483 nr_tasks * (1000 + 200));
0484
0485 assert(map_fd != -1);
0486
0487 run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd);
0488
0489 close(map_fd);
0490 }
0491
0492 int main(int argc, char **argv)
0493 {
0494 int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
0495 const char *dist_file;
0496 int nr_tasks = 1;
0497 int lru_size;
0498 int f;
0499
0500 if (argc < 4) {
0501 printf("Usage: %s <dist-file> <lru-size> <nr-tasks>\n",
0502 argv[0]);
0503 return -1;
0504 }
0505
0506 dist_file = argv[1];
0507 lru_size = atoi(argv[2]);
0508 nr_tasks = atoi(argv[3]);
0509
0510 setbuf(stdout, NULL);
0511
0512 srand(time(NULL));
0513
0514 nr_cpus = bpf_num_possible_cpus();
0515 assert(nr_cpus != -1);
0516 printf("nr_cpus:%d\n\n", nr_cpus);
0517
0518 nr_tasks = min(nr_tasks, nr_cpus);
0519
0520 dist_key_counts = read_keys(dist_file, &dist_keys);
0521 if (!dist_key_counts) {
0522 printf("%s has no key\n", dist_file);
0523 return -1;
0524 }
0525
0526 for (f = 0; f < ARRAY_SIZE(map_flags); f++) {
0527 test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
0528 test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
0529 test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
0530 nr_tasks);
0531 test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
0532 nr_tasks, lru_size);
0533 printf("\n");
0534 }
0535
0536 free(dist_keys);
0537
0538 return 0;
0539 }