]>
Commit | Line | Data |
---|---|---|
5db58faf MKL |
1 | /* |
2 | * Copyright (c) 2016 Facebook | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of version 2 of the GNU General Public | |
6 | * License as published by the Free Software Foundation. | |
7 | */ | |
8 | #define _GNU_SOURCE | |
9 | #include <linux/types.h> | |
10 | #include <stdio.h> | |
11 | #include <unistd.h> | |
12 | #include <linux/bpf.h> | |
13 | #include <errno.h> | |
14 | #include <string.h> | |
15 | #include <assert.h> | |
16 | #include <sched.h> | |
17 | #include <sys/wait.h> | |
18 | #include <sys/stat.h> | |
e00c7b21 | 19 | #include <sys/resource.h> |
5db58faf MKL |
20 | #include <fcntl.h> |
21 | #include <stdlib.h> | |
22 | #include <time.h> | |
e00c7b21 | 23 | |
5db58faf | 24 | #include "libbpf.h" |
e00c7b21 | 25 | #include "bpf_util.h" |
5db58faf MKL |
26 | |
27 | #define min(a, b) ((a) < (b) ? (a) : (b)) | |
dfc5be0d AA |
28 | #ifndef offsetof |
29 | # define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) | |
30 | #endif | |
5db58faf MKL |
31 | #define container_of(ptr, type, member) ({ \ |
32 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ | |
33 | (type *)( (char *)__mptr - offsetof(type,member) );}) | |
34 | ||
35 | static int nr_cpus; | |
36 | static unsigned long long *dist_keys; | |
37 | static unsigned int dist_key_counts; | |
38 | ||
39 | struct list_head { | |
40 | struct list_head *next, *prev; | |
41 | }; | |
42 | ||
43 | static inline void INIT_LIST_HEAD(struct list_head *list) | |
44 | { | |
45 | list->next = list; | |
46 | list->prev = list; | |
47 | } | |
48 | ||
49 | static inline int list_empty(const struct list_head *head) | |
50 | { | |
51 | return head->next == head; | |
52 | } | |
53 | ||
54 | static inline void __list_add(struct list_head *new, | |
55 | struct list_head *prev, | |
56 | struct list_head *next) | |
57 | { | |
58 | next->prev = new; | |
59 | new->next = next; | |
60 | new->prev = prev; | |
61 | prev->next = new; | |
62 | } | |
63 | ||
64 | static inline void list_add(struct list_head *new, struct list_head *head) | |
65 | { | |
66 | __list_add(new, head, head->next); | |
67 | } | |
68 | ||
69 | static inline void __list_del(struct list_head *prev, struct list_head *next) | |
70 | { | |
71 | next->prev = prev; | |
72 | prev->next = next; | |
73 | } | |
74 | ||
75 | static inline void __list_del_entry(struct list_head *entry) | |
76 | { | |
77 | __list_del(entry->prev, entry->next); | |
78 | } | |
79 | ||
80 | static inline void list_move(struct list_head *list, struct list_head *head) | |
81 | { | |
82 | __list_del_entry(list); | |
83 | list_add(list, head); | |
84 | } | |
85 | ||
86 | #define list_entry(ptr, type, member) \ | |
87 | container_of(ptr, type, member) | |
88 | ||
89 | #define list_last_entry(ptr, type, member) \ | |
90 | list_entry((ptr)->prev, type, member) | |
91 | ||
92 | struct pfect_lru_node { | |
93 | struct list_head list; | |
94 | unsigned long long key; | |
95 | }; | |
96 | ||
97 | struct pfect_lru { | |
98 | struct list_head list; | |
99 | struct pfect_lru_node *free_nodes; | |
100 | unsigned int cur_size; | |
101 | unsigned int lru_size; | |
102 | unsigned int nr_unique; | |
103 | unsigned int nr_misses; | |
104 | unsigned int total; | |
105 | int map_fd; | |
106 | }; | |
107 | ||
108 | static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size, | |
109 | unsigned int nr_possible_elems) | |
110 | { | |
111 | lru->map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, | |
112 | sizeof(unsigned long long), | |
113 | sizeof(struct pfect_lru_node *), | |
114 | nr_possible_elems, 0); | |
115 | assert(lru->map_fd != -1); | |
116 | ||
117 | lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node)); | |
118 | assert(lru->free_nodes); | |
119 | ||
120 | INIT_LIST_HEAD(&lru->list); | |
121 | lru->cur_size = 0; | |
122 | lru->lru_size = lru_size; | |
123 | lru->nr_unique = lru->nr_misses = lru->total = 0; | |
124 | } | |
125 | ||
126 | static void pfect_lru_destroy(struct pfect_lru *lru) | |
127 | { | |
128 | close(lru->map_fd); | |
129 | free(lru->free_nodes); | |
130 | } | |
131 | ||
132 | static int pfect_lru_lookup_or_insert(struct pfect_lru *lru, | |
133 | unsigned long long key) | |
134 | { | |
135 | struct pfect_lru_node *node = NULL; | |
136 | int seen = 0; | |
137 | ||
138 | lru->total++; | |
d40fc181 | 139 | if (!bpf_map_lookup_elem(lru->map_fd, &key, &node)) { |
5db58faf MKL |
140 | if (node) { |
141 | list_move(&node->list, &lru->list); | |
142 | return 1; | |
143 | } | |
144 | seen = 1; | |
145 | } | |
146 | ||
147 | if (lru->cur_size < lru->lru_size) { | |
148 | node = &lru->free_nodes[lru->cur_size++]; | |
149 | INIT_LIST_HEAD(&node->list); | |
150 | } else { | |
151 | struct pfect_lru_node *null_node = NULL; | |
152 | ||
153 | node = list_last_entry(&lru->list, | |
154 | struct pfect_lru_node, | |
155 | list); | |
d40fc181 | 156 | bpf_map_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST); |
5db58faf MKL |
157 | } |
158 | ||
159 | node->key = key; | |
160 | list_move(&node->list, &lru->list); | |
161 | ||
162 | lru->nr_misses++; | |
163 | if (seen) { | |
d40fc181 | 164 | assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_EXIST)); |
5db58faf MKL |
165 | } else { |
166 | lru->nr_unique++; | |
d40fc181 | 167 | assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST)); |
5db58faf MKL |
168 | } |
169 | ||
170 | return seen; | |
171 | } | |
172 | ||
173 | static unsigned int read_keys(const char *dist_file, | |
174 | unsigned long long **keys) | |
175 | { | |
176 | struct stat fst; | |
177 | unsigned long long *retkeys; | |
178 | unsigned int counts = 0; | |
179 | int dist_fd; | |
180 | char *b, *l; | |
181 | int i; | |
182 | ||
183 | dist_fd = open(dist_file, 0); | |
184 | assert(dist_fd != -1); | |
185 | ||
186 | assert(fstat(dist_fd, &fst) == 0); | |
187 | b = malloc(fst.st_size); | |
188 | assert(b); | |
189 | ||
190 | assert(read(dist_fd, b, fst.st_size) == fst.st_size); | |
191 | close(dist_fd); | |
192 | for (i = 0; i < fst.st_size; i++) { | |
193 | if (b[i] == '\n') | |
194 | counts++; | |
195 | } | |
196 | counts++; /* in case the last line has no \n */ | |
197 | ||
198 | retkeys = malloc(counts * sizeof(unsigned long long)); | |
199 | assert(retkeys); | |
200 | ||
201 | counts = 0; | |
202 | for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n")) | |
203 | retkeys[counts++] = strtoull(l, NULL, 10); | |
204 | free(b); | |
205 | ||
206 | *keys = retkeys; | |
207 | ||
208 | return counts; | |
209 | } | |
210 | ||
211 | static int create_map(int map_type, int map_flags, unsigned int size) | |
212 | { | |
213 | int map_fd; | |
214 | ||
215 | map_fd = bpf_create_map(map_type, sizeof(unsigned long long), | |
216 | sizeof(unsigned long long), size, map_flags); | |
217 | ||
218 | if (map_fd == -1) | |
219 | perror("bpf_create_map"); | |
220 | ||
221 | return map_fd; | |
222 | } | |
223 | ||
224 | static int sched_next_online(int pid, int next_to_try) | |
225 | { | |
226 | cpu_set_t cpuset; | |
227 | ||
228 | if (next_to_try == nr_cpus) | |
229 | return -1; | |
230 | ||
231 | while (next_to_try < nr_cpus) { | |
232 | CPU_ZERO(&cpuset); | |
233 | CPU_SET(next_to_try++, &cpuset); | |
234 | if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset)) | |
235 | break; | |
236 | } | |
237 | ||
238 | return next_to_try; | |
239 | } | |
240 | ||
241 | static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data), | |
242 | void *data) | |
243 | { | |
244 | int next_sched_cpu = 0; | |
245 | pid_t pid[tasks]; | |
246 | int i; | |
247 | ||
248 | for (i = 0; i < tasks; i++) { | |
249 | pid[i] = fork(); | |
250 | if (pid[i] == 0) { | |
251 | next_sched_cpu = sched_next_online(0, next_sched_cpu); | |
252 | fn(i, data); | |
253 | exit(0); | |
254 | } else if (pid[i] == -1) { | |
255 | printf("couldn't spawn #%d process\n", i); | |
256 | exit(1); | |
257 | } | |
258 | /* It is mostly redundant and just allow the parent | |
259 | * process to update next_shced_cpu for the next child | |
260 | * process | |
261 | */ | |
262 | next_sched_cpu = sched_next_online(pid[i], next_sched_cpu); | |
263 | } | |
264 | for (i = 0; i < tasks; i++) { | |
265 | int status; | |
266 | ||
267 | assert(waitpid(pid[i], &status, 0) == pid[i]); | |
268 | assert(status == 0); | |
269 | } | |
270 | } | |
271 | ||
272 | static void do_test_lru_dist(int task, void *data) | |
273 | { | |
274 | unsigned int nr_misses = 0; | |
275 | struct pfect_lru pfect_lru; | |
276 | unsigned long long key, value = 1234; | |
277 | unsigned int i; | |
278 | ||
279 | unsigned int lru_map_fd = ((unsigned int *)data)[0]; | |
280 | unsigned int lru_size = ((unsigned int *)data)[1]; | |
281 | unsigned long long key_offset = task * dist_key_counts; | |
282 | ||
283 | pfect_lru_init(&pfect_lru, lru_size, dist_key_counts); | |
284 | ||
285 | for (i = 0; i < dist_key_counts; i++) { | |
286 | key = dist_keys[i] + key_offset; | |
287 | ||
288 | pfect_lru_lookup_or_insert(&pfect_lru, key); | |
289 | ||
d40fc181 | 290 | if (!bpf_map_lookup_elem(lru_map_fd, &key, &value)) |
5db58faf MKL |
291 | continue; |
292 | ||
d40fc181 JS |
293 | if (bpf_map_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) { |
294 | printf("bpf_map_update_elem(lru_map_fd, %llu): errno:%d\n", | |
5db58faf MKL |
295 | key, errno); |
296 | assert(0); | |
297 | } | |
298 | ||
299 | nr_misses++; | |
300 | } | |
301 | ||
302 | printf(" task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n", | |
303 | task, pfect_lru.nr_unique, dist_key_counts, nr_misses, | |
304 | dist_key_counts); | |
305 | printf(" task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n", | |
306 | task, pfect_lru.nr_unique, pfect_lru.total, | |
307 | pfect_lru.nr_misses, pfect_lru.total); | |
308 | ||
309 | pfect_lru_destroy(&pfect_lru); | |
310 | close(lru_map_fd); | |
311 | } | |
312 | ||
313 | static void test_parallel_lru_dist(int map_type, int map_flags, | |
314 | int nr_tasks, unsigned int lru_size) | |
315 | { | |
316 | int child_data[2]; | |
317 | int lru_map_fd; | |
318 | ||
319 | printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type, | |
320 | map_flags); | |
321 | ||
322 | if (map_flags & BPF_F_NO_COMMON_LRU) | |
323 | lru_map_fd = create_map(map_type, map_flags, | |
324 | nr_cpus * lru_size); | |
325 | else | |
326 | lru_map_fd = create_map(map_type, map_flags, | |
327 | nr_tasks * lru_size); | |
328 | assert(lru_map_fd != -1); | |
329 | ||
330 | child_data[0] = lru_map_fd; | |
331 | child_data[1] = lru_size; | |
332 | ||
333 | run_parallel(nr_tasks, do_test_lru_dist, child_data); | |
334 | ||
335 | close(lru_map_fd); | |
336 | } | |
337 | ||
338 | static void test_lru_loss0(int map_type, int map_flags) | |
339 | { | |
340 | unsigned long long key, value[nr_cpus]; | |
341 | unsigned int old_unused_losses = 0; | |
342 | unsigned int new_unused_losses = 0; | |
343 | unsigned int used_losses = 0; | |
344 | int map_fd; | |
345 | ||
346 | printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, | |
347 | map_flags); | |
348 | ||
349 | assert(sched_next_online(0, 0) != -1); | |
350 | ||
351 | if (map_flags & BPF_F_NO_COMMON_LRU) | |
352 | map_fd = create_map(map_type, map_flags, 900 * nr_cpus); | |
353 | else | |
354 | map_fd = create_map(map_type, map_flags, 900); | |
355 | ||
356 | assert(map_fd != -1); | |
357 | ||
358 | value[0] = 1234; | |
359 | ||
360 | for (key = 1; key <= 1000; key++) { | |
361 | int start_key, end_key; | |
362 | ||
d40fc181 | 363 | assert(bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0); |
5db58faf MKL |
364 | |
365 | start_key = 101; | |
366 | end_key = min(key, 900); | |
367 | ||
368 | while (start_key <= end_key) { | |
d40fc181 | 369 | bpf_map_lookup_elem(map_fd, &start_key, value); |
5db58faf MKL |
370 | start_key++; |
371 | } | |
372 | } | |
373 | ||
374 | for (key = 1; key <= 1000; key++) { | |
d40fc181 | 375 | if (bpf_map_lookup_elem(map_fd, &key, value)) { |
5db58faf MKL |
376 | if (key <= 100) |
377 | old_unused_losses++; | |
378 | else if (key <= 900) | |
379 | used_losses++; | |
380 | else | |
381 | new_unused_losses++; | |
382 | } | |
383 | } | |
384 | ||
385 | close(map_fd); | |
386 | ||
387 | printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) " | |
388 | "newer-elem-losses:%d(/100)\n", | |
389 | old_unused_losses, used_losses, new_unused_losses); | |
390 | } | |
391 | ||
392 | static void test_lru_loss1(int map_type, int map_flags) | |
393 | { | |
394 | unsigned long long key, value[nr_cpus]; | |
395 | int map_fd; | |
396 | unsigned int nr_losses = 0; | |
397 | ||
398 | printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, | |
399 | map_flags); | |
400 | ||
401 | assert(sched_next_online(0, 0) != -1); | |
402 | ||
403 | if (map_flags & BPF_F_NO_COMMON_LRU) | |
404 | map_fd = create_map(map_type, map_flags, 1000 * nr_cpus); | |
405 | else | |
406 | map_fd = create_map(map_type, map_flags, 1000); | |
407 | ||
408 | assert(map_fd != -1); | |
409 | ||
410 | value[0] = 1234; | |
411 | ||
412 | for (key = 1; key <= 1000; key++) | |
d40fc181 | 413 | assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST)); |
5db58faf MKL |
414 | |
415 | for (key = 1; key <= 1000; key++) { | |
d40fc181 | 416 | if (bpf_map_lookup_elem(map_fd, &key, value)) |
5db58faf MKL |
417 | nr_losses++; |
418 | } | |
419 | ||
420 | close(map_fd); | |
421 | ||
422 | printf("nr_losses:%d(/1000)\n", nr_losses); | |
423 | } | |
424 | ||
425 | static void do_test_parallel_lru_loss(int task, void *data) | |
426 | { | |
427 | const unsigned int nr_stable_elems = 1000; | |
428 | const unsigned int nr_repeats = 100000; | |
429 | ||
430 | int map_fd = *(int *)data; | |
431 | unsigned long long stable_base; | |
432 | unsigned long long key, value[nr_cpus]; | |
433 | unsigned long long next_ins_key; | |
434 | unsigned int nr_losses = 0; | |
435 | unsigned int i; | |
436 | ||
437 | stable_base = task * nr_repeats * 2 + 1; | |
438 | next_ins_key = stable_base; | |
439 | value[0] = 1234; | |
440 | for (i = 0; i < nr_stable_elems; i++) { | |
d40fc181 | 441 | assert(bpf_map_update_elem(map_fd, &next_ins_key, value, |
5db58faf MKL |
442 | BPF_NOEXIST) == 0); |
443 | next_ins_key++; | |
444 | } | |
445 | ||
446 | for (i = 0; i < nr_repeats; i++) { | |
447 | int rn; | |
448 | ||
449 | rn = rand(); | |
450 | ||
451 | if (rn % 10) { | |
452 | key = rn % nr_stable_elems + stable_base; | |
d40fc181 | 453 | bpf_map_lookup_elem(map_fd, &key, value); |
5db58faf | 454 | } else { |
d40fc181 | 455 | bpf_map_update_elem(map_fd, &next_ins_key, value, |
5db58faf MKL |
456 | BPF_NOEXIST); |
457 | next_ins_key++; | |
458 | } | |
459 | } | |
460 | ||
461 | key = stable_base; | |
462 | for (i = 0; i < nr_stable_elems; i++) { | |
d40fc181 | 463 | if (bpf_map_lookup_elem(map_fd, &key, value)) |
5db58faf MKL |
464 | nr_losses++; |
465 | key++; | |
466 | } | |
467 | ||
468 | printf(" task:%d nr_losses:%u\n", task, nr_losses); | |
469 | } | |
470 | ||
471 | static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks) | |
472 | { | |
473 | int map_fd; | |
474 | ||
475 | printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type, | |
476 | map_flags); | |
477 | ||
478 | /* Give 20% more than the active working set */ | |
479 | if (map_flags & BPF_F_NO_COMMON_LRU) | |
480 | map_fd = create_map(map_type, map_flags, | |
481 | nr_cpus * (1000 + 200)); | |
482 | else | |
483 | map_fd = create_map(map_type, map_flags, | |
484 | nr_tasks * (1000 + 200)); | |
485 | ||
486 | assert(map_fd != -1); | |
487 | ||
488 | run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd); | |
489 | ||
490 | close(map_fd); | |
491 | } | |
492 | ||
493 | int main(int argc, char **argv) | |
494 | { | |
495 | struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; | |
496 | int map_flags[] = {0, BPF_F_NO_COMMON_LRU}; | |
497 | const char *dist_file; | |
498 | int nr_tasks = 1; | |
499 | int lru_size; | |
500 | int f; | |
501 | ||
502 | if (argc < 4) { | |
503 | printf("Usage: %s <dist-file> <lru-size> <nr-tasks>\n", | |
504 | argv[0]); | |
505 | return -1; | |
506 | } | |
507 | ||
508 | dist_file = argv[1]; | |
509 | lru_size = atoi(argv[2]); | |
510 | nr_tasks = atoi(argv[3]); | |
511 | ||
512 | setbuf(stdout, NULL); | |
513 | ||
514 | assert(!setrlimit(RLIMIT_MEMLOCK, &r)); | |
515 | ||
516 | srand(time(NULL)); | |
517 | ||
e00c7b21 | 518 | nr_cpus = bpf_num_possible_cpus(); |
5db58faf MKL |
519 | assert(nr_cpus != -1); |
520 | printf("nr_cpus:%d\n\n", nr_cpus); | |
521 | ||
522 | nr_tasks = min(nr_tasks, nr_cpus); | |
523 | ||
524 | dist_key_counts = read_keys(dist_file, &dist_keys); | |
525 | if (!dist_key_counts) { | |
526 | printf("%s has no key\n", dist_file); | |
527 | return -1; | |
528 | } | |
529 | ||
530 | for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) { | |
531 | test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]); | |
532 | test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]); | |
533 | test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f], | |
534 | nr_tasks); | |
535 | test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f], | |
536 | nr_tasks, lru_size); | |
537 | printf("\n"); | |
538 | } | |
539 | ||
540 | free(dist_keys); | |
541 | ||
542 | return 0; | |
543 | } |