]>
Commit | Line | Data |
---|---|---|
fc44ef5a | 1 | // SPDX-License-Identifier: GPL-2.0-only |
77f65ebd WB |
2 | /* |
3 | * Copyright 2013 Google Inc. | |
4 | * Author: Willem de Bruijn (willemb@google.com) | |
5 | * | |
6 | * A basic test of packet socket fanout behavior. | |
7 | * | |
8 | * Control: | |
9 | * - create fanout fails as expected with illegal flag combinations | |
10 | * - join fanout fails as expected with diverging types or flags | |
11 | * | |
12 | * Datapath: | |
13 | * Open a pair of packet sockets and a pair of INET sockets, send a known | |
14 | * number of packets across the two INET sockets and count the number of | |
15 | * packets enqueued onto the two packet sockets. | |
16 | * | |
17 | * The test currently runs for | |
18 | * - PACKET_FANOUT_HASH | |
19 | * - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER | |
23a9072e WB |
20 | * - PACKET_FANOUT_LB |
21 | * - PACKET_FANOUT_CPU | |
77f65ebd | 22 | * - PACKET_FANOUT_ROLLOVER |
95e22792 | 23 | * - PACKET_FANOUT_CBPF |
30da679e | 24 | * - PACKET_FANOUT_EBPF |
77f65ebd WB |
25 | * |
26 | * Todo: | |
77f65ebd | 27 | * - functionality: PACKET_FANOUT_FLAG_DEFRAG |
77f65ebd WB |
28 | */ |
29 | ||
23a9072e WB |
30 | #define _GNU_SOURCE /* for sched_setaffinity */ |
31 | ||
77f65ebd WB |
32 | #include <arpa/inet.h> |
33 | #include <errno.h> | |
23a9072e | 34 | #include <fcntl.h> |
30da679e | 35 | #include <linux/unistd.h> /* for __NR_bpf */ |
77f65ebd | 36 | #include <linux/filter.h> |
30da679e | 37 | #include <linux/bpf.h> |
77f65ebd | 38 | #include <linux/if_packet.h> |
cc30c93f | 39 | #include <net/if.h> |
77f65ebd WB |
40 | #include <net/ethernet.h> |
41 | #include <netinet/ip.h> | |
42 | #include <netinet/udp.h> | |
23a9072e WB |
43 | #include <poll.h> |
44 | #include <sched.h> | |
77f65ebd WB |
45 | #include <stdint.h> |
46 | #include <stdio.h> | |
47 | #include <stdlib.h> | |
48 | #include <string.h> | |
23a9072e | 49 | #include <sys/mman.h> |
77f65ebd WB |
50 | #include <sys/socket.h> |
51 | #include <sys/stat.h> | |
52 | #include <sys/types.h> | |
53 | #include <unistd.h> | |
54 | ||
23a95442 | 55 | #include "psock_lib.h" |
77f65ebd | 56 | |
23a95442 | 57 | #define RING_NUM_FRAMES 20 |
77f65ebd WB |
58 | |
59 | /* Open a socket in a given fanout mode. | |
60 | * @return -1 if mode is bad, a valid socket otherwise */ | |
28be04f5 | 61 | static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) |
77f65ebd | 62 | { |
cc30c93f | 63 | struct sockaddr_ll addr = {0}; |
77f65ebd WB |
64 | int fd, val; |
65 | ||
cc30c93f | 66 | fd = socket(PF_PACKET, SOCK_RAW, 0); |
77f65ebd WB |
67 | if (fd < 0) { |
68 | perror("socket packet"); | |
69 | exit(1); | |
70 | } | |
71 | ||
cc30c93f WB |
72 | pair_udp_setfilter(fd); |
73 | ||
74 | addr.sll_family = AF_PACKET; | |
75 | addr.sll_protocol = htons(ETH_P_IP); | |
76 | addr.sll_ifindex = if_nametoindex("lo"); | |
77 | if (addr.sll_ifindex == 0) { | |
78 | perror("if_nametoindex"); | |
79 | exit(1); | |
80 | } | |
81 | if (bind(fd, (void *) &addr, sizeof(addr))) { | |
82 | perror("bind packet"); | |
83 | exit(1); | |
84 | } | |
85 | ||
28be04f5 | 86 | val = (((int) typeflags) << 16) | group_id; |
77f65ebd WB |
87 | if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) { |
88 | if (close(fd)) { | |
89 | perror("close packet"); | |
90 | exit(1); | |
91 | } | |
92 | return -1; | |
93 | } | |
94 | ||
77f65ebd WB |
95 | return fd; |
96 | } | |
97 | ||
c1f8d0f9 MM |
98 | static void sock_fanout_set_cbpf(int fd) |
99 | { | |
100 | struct sock_filter bpf_filter[] = { | |
101 | BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80), /* ldb [80] */ | |
102 | BPF_STMT(BPF_RET+BPF_A, 0), /* ret A */ | |
103 | }; | |
104 | struct sock_fprog bpf_prog; | |
105 | ||
106 | bpf_prog.filter = bpf_filter; | |
107 | bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); | |
108 | ||
109 | if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &bpf_prog, | |
110 | sizeof(bpf_prog))) { | |
111 | perror("fanout data cbpf"); | |
112 | exit(1); | |
113 | } | |
114 | } | |
115 | ||
28be04f5 MM |
116 | static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id) |
117 | { | |
118 | int sockopt; | |
119 | socklen_t sockopt_len = sizeof(sockopt); | |
120 | ||
121 | if (getsockopt(fd, SOL_PACKET, PACKET_FANOUT, | |
122 | &sockopt, &sockopt_len)) { | |
123 | perror("failed to getsockopt"); | |
124 | exit(1); | |
125 | } | |
126 | *typeflags = sockopt >> 16; | |
127 | *group_id = sockopt & 0xfffff; | |
128 | } | |
129 | ||
30da679e WB |
130 | static void sock_fanout_set_ebpf(int fd) |
131 | { | |
ddd00103 PB |
132 | static char log_buf[65536]; |
133 | ||
30da679e WB |
134 | const int len_off = __builtin_offsetof(struct __sk_buff, len); |
135 | struct bpf_insn prog[] = { | |
136 | { BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 }, | |
137 | { BPF_LDX | BPF_W | BPF_MEM, 0, 6, len_off, 0 }, | |
138 | { BPF_JMP | BPF_JGE | BPF_K, 0, 0, 1, DATA_LEN }, | |
139 | { BPF_JMP | BPF_JA | BPF_K, 0, 0, 4, 0 }, | |
140 | { BPF_LD | BPF_B | BPF_ABS, 0, 0, 0, 0x50 }, | |
141 | { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 2, DATA_CHAR }, | |
142 | { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1, DATA_CHAR_1 }, | |
143 | { BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 }, | |
144 | { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 } | |
145 | }; | |
30da679e WB |
146 | union bpf_attr attr; |
147 | int pfd; | |
148 | ||
149 | memset(&attr, 0, sizeof(attr)); | |
150 | attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; | |
151 | attr.insns = (unsigned long) prog; | |
152 | attr.insn_cnt = sizeof(prog) / sizeof(prog[0]); | |
153 | attr.license = (unsigned long) "GPL"; | |
154 | attr.log_buf = (unsigned long) log_buf, | |
155 | attr.log_size = sizeof(log_buf), | |
156 | attr.log_level = 1, | |
157 | ||
158 | pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); | |
159 | if (pfd < 0) { | |
160 | perror("bpf"); | |
161 | fprintf(stderr, "bpf verifier:\n%s\n", log_buf); | |
162 | exit(1); | |
163 | } | |
164 | ||
165 | if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) { | |
166 | perror("fanout data ebpf"); | |
167 | exit(1); | |
168 | } | |
169 | ||
170 | if (close(pfd)) { | |
171 | perror("close ebpf"); | |
172 | exit(1); | |
173 | } | |
174 | } | |
175 | ||
23a9072e | 176 | static char *sock_fanout_open_ring(int fd) |
77f65ebd | 177 | { |
23a9072e WB |
178 | struct tpacket_req req = { |
179 | .tp_block_size = getpagesize(), | |
180 | .tp_frame_size = getpagesize(), | |
181 | .tp_block_nr = RING_NUM_FRAMES, | |
182 | .tp_frame_nr = RING_NUM_FRAMES, | |
183 | }; | |
184 | char *ring; | |
98e821a2 | 185 | int val = TPACKET_V2; |
77f65ebd | 186 | |
98e821a2 WB |
187 | if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, (void *) &val, |
188 | sizeof(val))) { | |
189 | perror("packetsock ring setsockopt version"); | |
190 | exit(1); | |
191 | } | |
23a9072e WB |
192 | if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, |
193 | sizeof(req))) { | |
194 | perror("packetsock ring setsockopt"); | |
77f65ebd WB |
195 | exit(1); |
196 | } | |
23a9072e WB |
197 | |
198 | ring = mmap(0, req.tp_block_size * req.tp_block_nr, | |
199 | PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); | |
95e22792 WB |
200 | if (ring == MAP_FAILED) { |
201 | perror("packetsock ring mmap"); | |
77f65ebd WB |
202 | exit(1); |
203 | } | |
23a9072e WB |
204 | |
205 | return ring; | |
206 | } | |
207 | ||
208 | static int sock_fanout_read_ring(int fd, void *ring) | |
209 | { | |
98e821a2 | 210 | struct tpacket2_hdr *header = ring; |
23a9072e WB |
211 | int count = 0; |
212 | ||
fbf8e721 | 213 | while (count < RING_NUM_FRAMES && header->tp_status & TP_STATUS_USER) { |
23a9072e WB |
214 | count++; |
215 | header = ring + (count * getpagesize()); | |
216 | } | |
217 | ||
218 | return count; | |
219 | } | |
220 | ||
221 | static int sock_fanout_read(int fds[], char *rings[], const int expect[]) | |
222 | { | |
223 | int ret[2]; | |
224 | ||
225 | ret[0] = sock_fanout_read_ring(fds[0], rings[0]); | |
226 | ret[1] = sock_fanout_read_ring(fds[1], rings[1]); | |
77f65ebd WB |
227 | |
228 | fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n", | |
229 | ret[0], ret[1], expect[0], expect[1]); | |
230 | ||
231 | if ((!(ret[0] == expect[0] && ret[1] == expect[1])) && | |
232 | (!(ret[0] == expect[1] && ret[1] == expect[0]))) { | |
cc30c93f | 233 | fprintf(stderr, "warning: incorrect queue lengths\n"); |
23a9072e | 234 | return 1; |
77f65ebd | 235 | } |
23a9072e WB |
236 | |
237 | return 0; | |
77f65ebd WB |
238 | } |
239 | ||
240 | /* Test illegal mode + flag combination */ | |
241 | static void test_control_single(void) | |
242 | { | |
243 | fprintf(stderr, "test: control single socket\n"); | |
244 | ||
245 | if (sock_fanout_open(PACKET_FANOUT_ROLLOVER | | |
28be04f5 | 246 | PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) { |
77f65ebd WB |
247 | fprintf(stderr, "ERROR: opened socket with dual rollover\n"); |
248 | exit(1); | |
249 | } | |
250 | } | |
251 | ||
252 | /* Test illegal group with different modes or flags */ | |
253 | static void test_control_group(void) | |
254 | { | |
255 | int fds[2]; | |
256 | ||
257 | fprintf(stderr, "test: control multiple sockets\n"); | |
258 | ||
28be04f5 | 259 | fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0); |
77f65ebd WB |
260 | if (fds[0] == -1) { |
261 | fprintf(stderr, "ERROR: failed to open HASH socket\n"); | |
262 | exit(1); | |
263 | } | |
264 | if (sock_fanout_open(PACKET_FANOUT_HASH | | |
28be04f5 | 265 | PACKET_FANOUT_FLAG_DEFRAG, 0) != -1) { |
77f65ebd WB |
266 | fprintf(stderr, "ERROR: joined group with wrong flag defrag\n"); |
267 | exit(1); | |
268 | } | |
269 | if (sock_fanout_open(PACKET_FANOUT_HASH | | |
28be04f5 | 270 | PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) { |
77f65ebd WB |
271 | fprintf(stderr, "ERROR: joined group with wrong flag ro\n"); |
272 | exit(1); | |
273 | } | |
28be04f5 | 274 | if (sock_fanout_open(PACKET_FANOUT_CPU, 0) != -1) { |
77f65ebd WB |
275 | fprintf(stderr, "ERROR: joined group with wrong mode\n"); |
276 | exit(1); | |
277 | } | |
28be04f5 | 278 | fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0); |
77f65ebd WB |
279 | if (fds[1] == -1) { |
280 | fprintf(stderr, "ERROR: failed to join group\n"); | |
281 | exit(1); | |
282 | } | |
283 | if (close(fds[1]) || close(fds[0])) { | |
284 | fprintf(stderr, "ERROR: closing sockets\n"); | |
285 | exit(1); | |
286 | } | |
287 | } | |
288 | ||
28be04f5 MM |
289 | /* Test creating a unique fanout group ids */ |
290 | static void test_unique_fanout_group_ids(void) | |
291 | { | |
292 | int fds[3]; | |
293 | uint16_t typeflags, first_group_id, second_group_id; | |
294 | ||
295 | fprintf(stderr, "test: unique ids\n"); | |
296 | ||
297 | fds[0] = sock_fanout_open(PACKET_FANOUT_HASH | | |
298 | PACKET_FANOUT_FLAG_UNIQUEID, 0); | |
299 | if (fds[0] == -1) { | |
300 | fprintf(stderr, "ERROR: failed to create a unique id group.\n"); | |
301 | exit(1); | |
302 | } | |
303 | ||
304 | sock_fanout_getopts(fds[0], &typeflags, &first_group_id); | |
305 | if (typeflags != PACKET_FANOUT_HASH) { | |
306 | fprintf(stderr, "ERROR: unexpected typeflags %x\n", typeflags); | |
307 | exit(1); | |
308 | } | |
309 | ||
472ecf08 | 310 | if (sock_fanout_open(PACKET_FANOUT_CPU, first_group_id) != -1) { |
28be04f5 MM |
311 | fprintf(stderr, "ERROR: joined group with wrong type.\n"); |
312 | exit(1); | |
313 | } | |
314 | ||
315 | fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, first_group_id); | |
316 | if (fds[1] == -1) { | |
317 | fprintf(stderr, | |
318 | "ERROR: failed to join previously created group.\n"); | |
319 | exit(1); | |
320 | } | |
321 | ||
322 | fds[2] = sock_fanout_open(PACKET_FANOUT_HASH | | |
323 | PACKET_FANOUT_FLAG_UNIQUEID, 0); | |
324 | if (fds[2] == -1) { | |
325 | fprintf(stderr, | |
326 | "ERROR: failed to create a second unique id group.\n"); | |
327 | exit(1); | |
328 | } | |
329 | ||
330 | sock_fanout_getopts(fds[2], &typeflags, &second_group_id); | |
331 | if (sock_fanout_open(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_UNIQUEID, | |
332 | second_group_id) != -1) { | |
333 | fprintf(stderr, | |
334 | "ERROR: specified a group id when requesting unique id\n"); | |
335 | exit(1); | |
336 | } | |
337 | ||
338 | if (close(fds[0]) || close(fds[1]) || close(fds[2])) { | |
339 | fprintf(stderr, "ERROR: closing sockets\n"); | |
340 | exit(1); | |
341 | } | |
342 | } | |
343 | ||
23a9072e WB |
344 | static int test_datapath(uint16_t typeflags, int port_off, |
345 | const int expect1[], const int expect2[]) | |
77f65ebd WB |
346 | { |
347 | const int expect0[] = { 0, 0 }; | |
23a9072e | 348 | char *rings[2]; |
95e22792 | 349 | uint8_t type = typeflags & 0xFF; |
23a9072e | 350 | int fds[2], fds_udp[2][2], ret; |
77f65ebd | 351 | |
cc30c93f WB |
352 | fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n", |
353 | typeflags, PORT_BASE, PORT_BASE + port_off); | |
77f65ebd | 354 | |
28be04f5 MM |
355 | fds[0] = sock_fanout_open(typeflags, 0); |
356 | fds[1] = sock_fanout_open(typeflags, 0); | |
77f65ebd WB |
357 | if (fds[0] == -1 || fds[1] == -1) { |
358 | fprintf(stderr, "ERROR: failed open\n"); | |
359 | exit(1); | |
360 | } | |
95e22792 | 361 | if (type == PACKET_FANOUT_CBPF) |
c1f8d0f9 | 362 | sock_fanout_set_cbpf(fds[0]); |
30da679e WB |
363 | else if (type == PACKET_FANOUT_EBPF) |
364 | sock_fanout_set_ebpf(fds[0]); | |
95e22792 | 365 | |
23a9072e WB |
366 | rings[0] = sock_fanout_open_ring(fds[0]); |
367 | rings[1] = sock_fanout_open_ring(fds[1]); | |
368 | pair_udp_open(fds_udp[0], PORT_BASE); | |
369 | pair_udp_open(fds_udp[1], PORT_BASE + port_off); | |
370 | sock_fanout_read(fds, rings, expect0); | |
77f65ebd WB |
371 | |
372 | /* Send data, but not enough to overflow a queue */ | |
373 | pair_udp_send(fds_udp[0], 15); | |
95e22792 | 374 | pair_udp_send_char(fds_udp[1], 5, DATA_CHAR_1); |
23a9072e | 375 | ret = sock_fanout_read(fds, rings, expect1); |
77f65ebd WB |
376 | |
377 | /* Send more data, overflow the queue */ | |
95e22792 | 378 | pair_udp_send_char(fds_udp[0], 15, DATA_CHAR_1); |
77f65ebd | 379 | /* TODO: ensure consistent order between expect1 and expect2 */ |
23a9072e | 380 | ret |= sock_fanout_read(fds, rings, expect2); |
77f65ebd | 381 | |
23a9072e WB |
382 | if (munmap(rings[1], RING_NUM_FRAMES * getpagesize()) || |
383 | munmap(rings[0], RING_NUM_FRAMES * getpagesize())) { | |
384 | fprintf(stderr, "close rings\n"); | |
385 | exit(1); | |
386 | } | |
77f65ebd WB |
387 | if (close(fds_udp[1][1]) || close(fds_udp[1][0]) || |
388 | close(fds_udp[0][1]) || close(fds_udp[0][0]) || | |
389 | close(fds[1]) || close(fds[0])) { | |
390 | fprintf(stderr, "close datapath\n"); | |
391 | exit(1); | |
392 | } | |
23a9072e WB |
393 | |
394 | return ret; | |
395 | } | |
396 | ||
397 | static int set_cpuaffinity(int cpuid) | |
398 | { | |
399 | cpu_set_t mask; | |
400 | ||
401 | CPU_ZERO(&mask); | |
402 | CPU_SET(cpuid, &mask); | |
403 | if (sched_setaffinity(0, sizeof(mask), &mask)) { | |
404 | if (errno != EINVAL) { | |
405 | fprintf(stderr, "setaffinity %d\n", cpuid); | |
406 | exit(1); | |
407 | } | |
408 | return 1; | |
409 | } | |
410 | ||
411 | return 0; | |
77f65ebd WB |
412 | } |
413 | ||
414 | int main(int argc, char **argv) | |
415 | { | |
23a9072e WB |
416 | const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } }; |
417 | const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } }; | |
418 | const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } }; | |
a2ad5d2a | 419 | const int expect_rb[2][2] = { { 15, 5 }, { 20, 15 } }; |
23a9072e WB |
420 | const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } }; |
421 | const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } }; | |
95e22792 | 422 | const int expect_bpf[2][2] = { { 15, 5 }, { 15, 20 } }; |
28be04f5 | 423 | const int expect_uniqueid[2][2] = { { 20, 20}, { 20, 20 } }; |
cc30c93f | 424 | int port_off = 2, tries = 20, ret; |
77f65ebd WB |
425 | |
426 | test_control_single(); | |
427 | test_control_group(); | |
28be04f5 | 428 | test_unique_fanout_group_ids(); |
77f65ebd | 429 | |
23a9072e WB |
430 | /* find a set of ports that do not collide onto the same socket */ |
431 | ret = test_datapath(PACKET_FANOUT_HASH, port_off, | |
432 | expect_hash[0], expect_hash[1]); | |
cc30c93f | 433 | while (ret) { |
23a9072e WB |
434 | fprintf(stderr, "info: trying alternate ports (%d)\n", tries); |
435 | ret = test_datapath(PACKET_FANOUT_HASH, ++port_off, | |
436 | expect_hash[0], expect_hash[1]); | |
cc30c93f WB |
437 | if (!--tries) { |
438 | fprintf(stderr, "too many collisions\n"); | |
439 | return 1; | |
440 | } | |
23a9072e WB |
441 | } |
442 | ||
443 | ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER, | |
444 | port_off, expect_hash_rb[0], expect_hash_rb[1]); | |
445 | ret |= test_datapath(PACKET_FANOUT_LB, | |
446 | port_off, expect_lb[0], expect_lb[1]); | |
447 | ret |= test_datapath(PACKET_FANOUT_ROLLOVER, | |
448 | port_off, expect_rb[0], expect_rb[1]); | |
30da679e | 449 | |
95e22792 WB |
450 | ret |= test_datapath(PACKET_FANOUT_CBPF, |
451 | port_off, expect_bpf[0], expect_bpf[1]); | |
30da679e WB |
452 | ret |= test_datapath(PACKET_FANOUT_EBPF, |
453 | port_off, expect_bpf[0], expect_bpf[1]); | |
23a9072e WB |
454 | |
455 | set_cpuaffinity(0); | |
456 | ret |= test_datapath(PACKET_FANOUT_CPU, port_off, | |
457 | expect_cpu0[0], expect_cpu0[1]); | |
458 | if (!set_cpuaffinity(1)) | |
459 | /* TODO: test that choice alternates with previous */ | |
460 | ret |= test_datapath(PACKET_FANOUT_CPU, port_off, | |
461 | expect_cpu1[0], expect_cpu1[1]); | |
462 | ||
28be04f5 MM |
463 | ret |= test_datapath(PACKET_FANOUT_FLAG_UNIQUEID, port_off, |
464 | expect_uniqueid[0], expect_uniqueid[1]); | |
465 | ||
23a9072e WB |
466 | if (ret) |
467 | return 1; | |
77f65ebd WB |
468 | |
469 | printf("OK. All tests passed\n"); | |
470 | return 0; | |
471 | } |