]> git.ipfire.org Git - thirdparty/linux.git/blame - net/core/filter.c
bpf: add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_FIXED_GSO
[thirdparty/linux.git] / net / core / filter.c
CommitLineData
1da177e4
LT
1/*
2 * Linux Socket Filter - Kernel level socket filtering
3 *
bd4cf0ed
AS
4 * Based on the design of the Berkeley Packet Filter. The new
5 * internal format has been designed by PLUMgrid:
1da177e4 6 *
bd4cf0ed
AS
7 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
8 *
9 * Authors:
10 *
11 * Jay Schulist <jschlst@samba.org>
12 * Alexei Starovoitov <ast@plumgrid.com>
13 * Daniel Borkmann <dborkman@redhat.com>
1da177e4
LT
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
19 *
20 * Andi Kleen - Fix a few bad bugs and races.
4df95ff4 21 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
1da177e4
LT
22 */
23
24#include <linux/module.h>
25#include <linux/types.h>
1da177e4
LT
26#include <linux/mm.h>
27#include <linux/fcntl.h>
28#include <linux/socket.h>
91b8270f 29#include <linux/sock_diag.h>
1da177e4
LT
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/netdevice.h>
33#include <linux/if_packet.h>
c491680f 34#include <linux/if_arp.h>
5a0e3ad6 35#include <linux/gfp.h>
d74bad4e 36#include <net/inet_common.h>
1da177e4
LT
37#include <net/ip.h>
38#include <net/protocol.h>
4738c1db 39#include <net/netlink.h>
1da177e4 40#include <linux/skbuff.h>
604326b4 41#include <linux/skmsg.h>
1da177e4 42#include <net/sock.h>
10b89ee4 43#include <net/flow_dissector.h>
1da177e4
LT
44#include <linux/errno.h>
45#include <linux/timer.h>
7c0f6ba6 46#include <linux/uaccess.h>
40daafc8 47#include <asm/unaligned.h>
d66f2b91 48#include <asm/cmpxchg.h>
1da177e4 49#include <linux/filter.h>
86e4ca66 50#include <linux/ratelimit.h>
46b325c7 51#include <linux/seccomp.h>
f3335031 52#include <linux/if_vlan.h>
89aa0758 53#include <linux/bpf.h>
d691f9e8 54#include <net/sch_generic.h>
8d20aabe 55#include <net/cls_cgroup.h>
d3aa45ce 56#include <net/dst_metadata.h>
c46646d0 57#include <net/dst.h>
538950a1 58#include <net/sock_reuseport.h>
b1d9fc41 59#include <net/busy_poll.h>
8c4b4c7e 60#include <net/tcp.h>
12bed760 61#include <net/xfrm.h>
6acc9b43 62#include <net/udp.h>
5acaee0a 63#include <linux/bpf_trace.h>
02671e23 64#include <net/xdp_sock.h>
87f5fc7e 65#include <linux/inetdevice.h>
6acc9b43
JS
66#include <net/inet_hashtables.h>
67#include <net/inet6_hashtables.h>
87f5fc7e
DA
68#include <net/ip_fib.h>
69#include <net/flow.h>
70#include <net/arp.h>
fe94cc29 71#include <net/ipv6.h>
6acc9b43 72#include <net/net_namespace.h>
fe94cc29
MX
73#include <linux/seg6_local.h>
74#include <net/seg6.h>
75#include <net/seg6_local.h>
52f27877 76#include <net/lwtunnel.h>
1da177e4 77
43db6d65 78/**
f4979fce 79 * sk_filter_trim_cap - run a packet through a socket filter
43db6d65
SH
80 * @sk: sock associated with &sk_buff
81 * @skb: buffer to filter
f4979fce 82 * @cap: limit on how short the eBPF program may trim the packet
43db6d65 83 *
ff936a04
AS
84 * Run the eBPF program and then cut skb->data to correct size returned by
85 * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
43db6d65 86 * than pkt_len we keep whole skb->data. This is the socket level
ff936a04 87 * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
43db6d65
SH
88 * be accepted or -EPERM if the packet should be tossed.
89 *
90 */
f4979fce 91int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
43db6d65
SH
92{
93 int err;
94 struct sk_filter *filter;
95
c93bdd0e
MG
96 /*
97 * If the skb was allocated from pfmemalloc reserves, only
98 * allow SOCK_MEMALLOC sockets to use it as this socket is
99 * helping free memory
100 */
8fe809a9
ED
101 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
102 NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
c93bdd0e 103 return -ENOMEM;
8fe809a9 104 }
c11cd3a6
DM
105 err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
106 if (err)
107 return err;
108
43db6d65
SH
109 err = security_sock_rcv_skb(sk, skb);
110 if (err)
111 return err;
112
80f8f102
ED
113 rcu_read_lock();
114 filter = rcu_dereference(sk->sk_filter);
43db6d65 115 if (filter) {
8f917bba
WB
116 struct sock *save_sk = skb->sk;
117 unsigned int pkt_len;
118
119 skb->sk = sk;
120 pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
8f917bba 121 skb->sk = save_sk;
d1f496fd 122 err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
43db6d65 123 }
80f8f102 124 rcu_read_unlock();
43db6d65
SH
125
126 return err;
127}
f4979fce 128EXPORT_SYMBOL(sk_filter_trim_cap);
43db6d65 129
b390134c 130BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb)
bd4cf0ed 131{
f3694e00 132 return skb_get_poff(skb);
bd4cf0ed
AS
133}
134
b390134c 135BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
bd4cf0ed 136{
bd4cf0ed
AS
137 struct nlattr *nla;
138
139 if (skb_is_nonlinear(skb))
140 return 0;
141
05ab8f26
MK
142 if (skb->len < sizeof(struct nlattr))
143 return 0;
144
30743837 145 if (a > skb->len - sizeof(struct nlattr))
bd4cf0ed
AS
146 return 0;
147
30743837 148 nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
bd4cf0ed
AS
149 if (nla)
150 return (void *) nla - (void *) skb->data;
151
152 return 0;
153}
154
b390134c 155BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
bd4cf0ed 156{
bd4cf0ed
AS
157 struct nlattr *nla;
158
159 if (skb_is_nonlinear(skb))
160 return 0;
161
05ab8f26
MK
162 if (skb->len < sizeof(struct nlattr))
163 return 0;
164
30743837 165 if (a > skb->len - sizeof(struct nlattr))
bd4cf0ed
AS
166 return 0;
167
30743837
DB
168 nla = (struct nlattr *) &skb->data[a];
169 if (nla->nla_len > skb->len - a)
bd4cf0ed
AS
170 return 0;
171
30743837 172 nla = nla_find_nested(nla, x);
bd4cf0ed
AS
173 if (nla)
174 return (void *) nla - (void *) skb->data;
175
176 return 0;
177}
178
e0cea7ce
DB
179BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
180 data, int, headlen, int, offset)
181{
182 u8 tmp, *ptr;
183 const int len = sizeof(tmp);
184
185 if (offset >= 0) {
186 if (headlen - offset >= len)
187 return *(u8 *)(data + offset);
188 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
189 return tmp;
190 } else {
191 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
192 if (likely(ptr))
193 return *(u8 *)ptr;
194 }
195
196 return -EFAULT;
197}
198
199BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
200 int, offset)
201{
202 return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len,
203 offset);
204}
205
206BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
207 data, int, headlen, int, offset)
208{
209 u16 tmp, *ptr;
210 const int len = sizeof(tmp);
211
212 if (offset >= 0) {
213 if (headlen - offset >= len)
214 return get_unaligned_be16(data + offset);
215 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
216 return be16_to_cpu(tmp);
217 } else {
218 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
219 if (likely(ptr))
220 return get_unaligned_be16(ptr);
221 }
222
223 return -EFAULT;
224}
225
226BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
227 int, offset)
228{
229 return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len,
230 offset);
231}
232
233BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
234 data, int, headlen, int, offset)
235{
236 u32 tmp, *ptr;
237 const int len = sizeof(tmp);
238
239 if (likely(offset >= 0)) {
240 if (headlen - offset >= len)
241 return get_unaligned_be32(data + offset);
242 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
243 return be32_to_cpu(tmp);
244 } else {
245 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
246 if (likely(ptr))
247 return get_unaligned_be32(ptr);
248 }
249
250 return -EFAULT;
251}
252
253BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
254 int, offset)
255{
256 return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len,
257 offset);
258}
259
b390134c 260BPF_CALL_0(bpf_get_raw_cpu_id)
bd4cf0ed
AS
261{
262 return raw_smp_processor_id();
263}
264
80b48c44 265static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
b390134c 266 .func = bpf_get_raw_cpu_id,
80b48c44
DB
267 .gpl_only = false,
268 .ret_type = RET_INTEGER,
269};
270
9bac3d6d
AS
271static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
272 struct bpf_insn *insn_buf)
273{
274 struct bpf_insn *insn = insn_buf;
275
276 switch (skb_field) {
277 case SKF_AD_MARK:
278 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
279
280 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
281 offsetof(struct sk_buff, mark));
282 break;
283
284 case SKF_AD_PKTTYPE:
285 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET());
286 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
287#ifdef __BIG_ENDIAN_BITFIELD
288 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
289#endif
290 break;
291
292 case SKF_AD_QUEUE:
293 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
294
295 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
296 offsetof(struct sk_buff, queue_mapping));
297 break;
c2497395 298
c2497395 299 case SKF_AD_VLAN_TAG:
c2497395 300 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
c2497395
AS
301
302 /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
303 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
304 offsetof(struct sk_buff, vlan_tci));
9c212255
MM
305 break;
306 case SKF_AD_VLAN_TAG_PRESENT:
307 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET());
308 if (PKT_VLAN_PRESENT_BIT)
309 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT);
310 if (PKT_VLAN_PRESENT_BIT < 7)
c2497395 311 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
c2497395 312 break;
9bac3d6d
AS
313 }
314
315 return insn - insn_buf;
316}
317
bd4cf0ed 318static bool convert_bpf_extensions(struct sock_filter *fp,
2695fb55 319 struct bpf_insn **insnp)
bd4cf0ed 320{
2695fb55 321 struct bpf_insn *insn = *insnp;
9bac3d6d 322 u32 cnt;
bd4cf0ed
AS
323
324 switch (fp->k) {
325 case SKF_AD_OFF + SKF_AD_PROTOCOL:
0b8c707d
DB
326 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
327
328 /* A = *(u16 *) (CTX + offsetof(protocol)) */
329 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
330 offsetof(struct sk_buff, protocol));
331 /* A = ntohs(A) [emitting a nop or swap16] */
332 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
bd4cf0ed
AS
333 break;
334
335 case SKF_AD_OFF + SKF_AD_PKTTYPE:
9bac3d6d
AS
336 cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
337 insn += cnt - 1;
bd4cf0ed
AS
338 break;
339
340 case SKF_AD_OFF + SKF_AD_IFINDEX:
341 case SKF_AD_OFF + SKF_AD_HATYPE:
bd4cf0ed
AS
342 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
343 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
f8f6d679 344
f035a515 345 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
f8f6d679
DB
346 BPF_REG_TMP, BPF_REG_CTX,
347 offsetof(struct sk_buff, dev));
348 /* if (tmp != 0) goto pc + 1 */
349 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
350 *insn++ = BPF_EXIT_INSN();
351 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
352 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
353 offsetof(struct net_device, ifindex));
354 else
355 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
356 offsetof(struct net_device, type));
bd4cf0ed
AS
357 break;
358
359 case SKF_AD_OFF + SKF_AD_MARK:
9bac3d6d
AS
360 cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
361 insn += cnt - 1;
bd4cf0ed
AS
362 break;
363
364 case SKF_AD_OFF + SKF_AD_RXHASH:
365 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
366
9739eef1
AS
367 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
368 offsetof(struct sk_buff, hash));
bd4cf0ed
AS
369 break;
370
371 case SKF_AD_OFF + SKF_AD_QUEUE:
9bac3d6d
AS
372 cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
373 insn += cnt - 1;
bd4cf0ed
AS
374 break;
375
376 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
c2497395
AS
377 cnt = convert_skb_access(SKF_AD_VLAN_TAG,
378 BPF_REG_A, BPF_REG_CTX, insn);
379 insn += cnt - 1;
380 break;
bd4cf0ed 381
c2497395
AS
382 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
383 cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
384 BPF_REG_A, BPF_REG_CTX, insn);
385 insn += cnt - 1;
bd4cf0ed
AS
386 break;
387
27cd5452
MS
388 case SKF_AD_OFF + SKF_AD_VLAN_TPID:
389 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
390
391 /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
392 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
393 offsetof(struct sk_buff, vlan_proto));
394 /* A = ntohs(A) [emitting a nop or swap16] */
395 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
396 break;
397
bd4cf0ed
AS
398 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
399 case SKF_AD_OFF + SKF_AD_NLATTR:
400 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
401 case SKF_AD_OFF + SKF_AD_CPU:
4cd3675e 402 case SKF_AD_OFF + SKF_AD_RANDOM:
e430f34e 403 /* arg1 = CTX */
f8f6d679 404 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
bd4cf0ed 405 /* arg2 = A */
f8f6d679 406 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
bd4cf0ed 407 /* arg3 = X */
f8f6d679 408 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
e430f34e 409 /* Emit call(arg1=CTX, arg2=A, arg3=X) */
bd4cf0ed
AS
410 switch (fp->k) {
411 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
b390134c 412 *insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset);
bd4cf0ed
AS
413 break;
414 case SKF_AD_OFF + SKF_AD_NLATTR:
b390134c 415 *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr);
bd4cf0ed
AS
416 break;
417 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
b390134c 418 *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest);
bd4cf0ed
AS
419 break;
420 case SKF_AD_OFF + SKF_AD_CPU:
b390134c 421 *insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id);
bd4cf0ed 422 break;
4cd3675e 423 case SKF_AD_OFF + SKF_AD_RANDOM:
3ad00405
DB
424 *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
425 bpf_user_rnd_init_once();
4cd3675e 426 break;
bd4cf0ed
AS
427 }
428 break;
429
430 case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
9739eef1
AS
431 /* A ^= X */
432 *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
bd4cf0ed
AS
433 break;
434
435 default:
436 /* This is just a dummy call to avoid letting the compiler
437 * evict __bpf_call_base() as an optimization. Placed here
438 * where no-one bothers.
439 */
440 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
441 return false;
442 }
443
444 *insnp = insn;
445 return true;
446}
447
e0cea7ce
DB
448static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
449{
450 const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS);
451 int size = bpf_size_to_bytes(BPF_SIZE(fp->code));
452 bool endian = BPF_SIZE(fp->code) == BPF_H ||
453 BPF_SIZE(fp->code) == BPF_W;
454 bool indirect = BPF_MODE(fp->code) == BPF_IND;
455 const int ip_align = NET_IP_ALIGN;
456 struct bpf_insn *insn = *insnp;
457 int offset = fp->k;
458
459 if (!indirect &&
460 ((unaligned_ok && offset >= 0) ||
461 (!unaligned_ok && offset >= 0 &&
462 offset + ip_align >= 0 &&
463 offset + ip_align % size == 0))) {
59ee4129
DB
464 bool ldx_off_ok = offset <= S16_MAX;
465
e0cea7ce 466 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
d8f3e978
DM
467 if (offset)
468 *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
59ee4129
DB
469 *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
470 size, 2 + endian + (!ldx_off_ok * 2));
471 if (ldx_off_ok) {
472 *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
473 BPF_REG_D, offset);
474 } else {
475 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
476 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
477 *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
478 BPF_REG_TMP, 0);
479 }
e0cea7ce
DB
480 if (endian)
481 *insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);
482 *insn++ = BPF_JMP_A(8);
483 }
484
485 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
486 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D);
487 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H);
488 if (!indirect) {
489 *insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset);
490 } else {
491 *insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X);
492 if (fp->k)
493 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset);
494 }
495
496 switch (BPF_SIZE(fp->code)) {
497 case BPF_B:
498 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8);
499 break;
500 case BPF_H:
501 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16);
502 break;
503 case BPF_W:
504 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32);
505 break;
506 default:
507 return false;
508 }
509
510 *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2);
511 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
512 *insn = BPF_EXIT_INSN();
513
514 *insnp = insn;
515 return true;
516}
517
bd4cf0ed 518/**
8fb575ca 519 * bpf_convert_filter - convert filter program
bd4cf0ed
AS
520 * @prog: the user passed filter program
521 * @len: the length of the user passed filter program
50bbfed9 522 * @new_prog: allocated 'struct bpf_prog' or NULL
bd4cf0ed 523 * @new_len: pointer to store length of converted program
e0cea7ce 524 * @seen_ld_abs: bool whether we've seen ld_abs/ind
bd4cf0ed 525 *
1f504ec9
TK
526 * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
527 * style extended BPF (eBPF).
bd4cf0ed
AS
528 * Conversion workflow:
529 *
530 * 1) First pass for calculating the new program length:
e0cea7ce 531 * bpf_convert_filter(old_prog, old_len, NULL, &new_len, &seen_ld_abs)
bd4cf0ed
AS
532 *
533 * 2) 2nd pass to remap in two passes: 1st pass finds new
534 * jump offsets, 2nd pass remapping:
e0cea7ce 535 * bpf_convert_filter(old_prog, old_len, new_prog, &new_len, &seen_ld_abs)
bd4cf0ed 536 */
d9e12f42 537static int bpf_convert_filter(struct sock_filter *prog, int len,
e0cea7ce
DB
538 struct bpf_prog *new_prog, int *new_len,
539 bool *seen_ld_abs)
bd4cf0ed 540{
50bbfed9
AS
541 int new_flen = 0, pass = 0, target, i, stack_off;
542 struct bpf_insn *new_insn, *first_insn = NULL;
bd4cf0ed
AS
543 struct sock_filter *fp;
544 int *addrs = NULL;
545 u8 bpf_src;
546
547 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
30743837 548 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
bd4cf0ed 549
6f9a093b 550 if (len <= 0 || len > BPF_MAXINSNS)
bd4cf0ed
AS
551 return -EINVAL;
552
553 if (new_prog) {
50bbfed9 554 first_insn = new_prog->insnsi;
658da937
DB
555 addrs = kcalloc(len, sizeof(*addrs),
556 GFP_KERNEL | __GFP_NOWARN);
bd4cf0ed
AS
557 if (!addrs)
558 return -ENOMEM;
559 }
560
561do_pass:
50bbfed9 562 new_insn = first_insn;
bd4cf0ed
AS
563 fp = prog;
564
8b614aeb 565 /* Classic BPF related prologue emission. */
50bbfed9 566 if (new_prog) {
8b614aeb
DB
567 /* Classic BPF expects A and X to be reset first. These need
568 * to be guaranteed to be the first two instructions.
569 */
1d621674
DB
570 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
571 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
8b614aeb
DB
572
573 /* All programs must keep CTX in callee saved BPF_REG_CTX.
574 * In eBPF case it's done by the compiler, here we need to
575 * do this ourself. Initial CTX is present in BPF_REG_ARG1.
576 */
577 *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
e0cea7ce
DB
578 if (*seen_ld_abs) {
579 /* For packet access in classic BPF, cache skb->data
580 * in callee-saved BPF R8 and skb->len - skb->data_len
581 * (headlen) in BPF R9. Since classic BPF is read-only
582 * on CTX, we only need to cache it once.
583 */
584 *new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
585 BPF_REG_D, BPF_REG_CTX,
586 offsetof(struct sk_buff, data));
587 *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX,
588 offsetof(struct sk_buff, len));
589 *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX,
590 offsetof(struct sk_buff, data_len));
591 *new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP);
592 }
8b614aeb
DB
593 } else {
594 new_insn += 3;
595 }
bd4cf0ed
AS
596
597 for (i = 0; i < len; fp++, i++) {
e0cea7ce 598 struct bpf_insn tmp_insns[32] = { };
2695fb55 599 struct bpf_insn *insn = tmp_insns;
bd4cf0ed
AS
600
601 if (addrs)
50bbfed9 602 addrs[i] = new_insn - first_insn;
bd4cf0ed
AS
603
604 switch (fp->code) {
605 /* All arithmetic insns and skb loads map as-is. */
606 case BPF_ALU | BPF_ADD | BPF_X:
607 case BPF_ALU | BPF_ADD | BPF_K:
608 case BPF_ALU | BPF_SUB | BPF_X:
609 case BPF_ALU | BPF_SUB | BPF_K:
610 case BPF_ALU | BPF_AND | BPF_X:
611 case BPF_ALU | BPF_AND | BPF_K:
612 case BPF_ALU | BPF_OR | BPF_X:
613 case BPF_ALU | BPF_OR | BPF_K:
614 case BPF_ALU | BPF_LSH | BPF_X:
615 case BPF_ALU | BPF_LSH | BPF_K:
616 case BPF_ALU | BPF_RSH | BPF_X:
617 case BPF_ALU | BPF_RSH | BPF_K:
618 case BPF_ALU | BPF_XOR | BPF_X:
619 case BPF_ALU | BPF_XOR | BPF_K:
620 case BPF_ALU | BPF_MUL | BPF_X:
621 case BPF_ALU | BPF_MUL | BPF_K:
622 case BPF_ALU | BPF_DIV | BPF_X:
623 case BPF_ALU | BPF_DIV | BPF_K:
624 case BPF_ALU | BPF_MOD | BPF_X:
625 case BPF_ALU | BPF_MOD | BPF_K:
626 case BPF_ALU | BPF_NEG:
627 case BPF_LD | BPF_ABS | BPF_W:
628 case BPF_LD | BPF_ABS | BPF_H:
629 case BPF_LD | BPF_ABS | BPF_B:
630 case BPF_LD | BPF_IND | BPF_W:
631 case BPF_LD | BPF_IND | BPF_H:
632 case BPF_LD | BPF_IND | BPF_B:
633 /* Check for overloaded BPF extension and
634 * directly convert it if found, otherwise
635 * just move on with mapping.
636 */
637 if (BPF_CLASS(fp->code) == BPF_LD &&
638 BPF_MODE(fp->code) == BPF_ABS &&
639 convert_bpf_extensions(fp, &insn))
640 break;
e0cea7ce
DB
641 if (BPF_CLASS(fp->code) == BPF_LD &&
642 convert_bpf_ld_abs(fp, &insn)) {
643 *seen_ld_abs = true;
644 break;
645 }
bd4cf0ed 646
68fda450 647 if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
f6b1b3bf 648 fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
68fda450 649 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
f6b1b3bf
DB
650 /* Error with exception code on div/mod by 0.
651 * For cBPF programs, this was always return 0.
652 */
653 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
654 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
655 *insn++ = BPF_EXIT_INSN();
656 }
68fda450 657
f8f6d679 658 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
bd4cf0ed
AS
659 break;
660
f8f6d679
DB
661 /* Jump transformation cannot use BPF block macros
662 * everywhere as offset calculation and target updates
663 * require a bit more work than the rest, i.e. jump
664 * opcodes map as-is, but offsets need adjustment.
665 */
666
667#define BPF_EMIT_JMP \
bd4cf0ed 668 do { \
050fad7c
DB
669 const s32 off_min = S16_MIN, off_max = S16_MAX; \
670 s32 off; \
671 \
bd4cf0ed
AS
672 if (target >= len || target < 0) \
673 goto err; \
050fad7c 674 off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
bd4cf0ed 675 /* Adjust pc relative offset for 2nd or 3rd insn. */ \
050fad7c
DB
676 off -= insn - tmp_insns; \
677 /* Reject anything not fitting into insn->off. */ \
678 if (off < off_min || off > off_max) \
679 goto err; \
680 insn->off = off; \
bd4cf0ed
AS
681 } while (0)
682
f8f6d679
DB
683 case BPF_JMP | BPF_JA:
684 target = i + fp->k + 1;
685 insn->code = fp->code;
686 BPF_EMIT_JMP;
bd4cf0ed
AS
687 break;
688
689 case BPF_JMP | BPF_JEQ | BPF_K:
690 case BPF_JMP | BPF_JEQ | BPF_X:
691 case BPF_JMP | BPF_JSET | BPF_K:
692 case BPF_JMP | BPF_JSET | BPF_X:
693 case BPF_JMP | BPF_JGT | BPF_K:
694 case BPF_JMP | BPF_JGT | BPF_X:
695 case BPF_JMP | BPF_JGE | BPF_K:
696 case BPF_JMP | BPF_JGE | BPF_X:
697 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
698 /* BPF immediates are signed, zero extend
699 * immediate into tmp register and use it
700 * in compare insn.
701 */
f8f6d679 702 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
bd4cf0ed 703
e430f34e
AS
704 insn->dst_reg = BPF_REG_A;
705 insn->src_reg = BPF_REG_TMP;
bd4cf0ed
AS
706 bpf_src = BPF_X;
707 } else {
e430f34e 708 insn->dst_reg = BPF_REG_A;
bd4cf0ed
AS
709 insn->imm = fp->k;
710 bpf_src = BPF_SRC(fp->code);
19539ce7 711 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
1da177e4 712 }
bd4cf0ed
AS
713
714 /* Common case where 'jump_false' is next insn. */
715 if (fp->jf == 0) {
716 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
717 target = i + fp->jt + 1;
f8f6d679 718 BPF_EMIT_JMP;
bd4cf0ed 719 break;
1da177e4 720 }
bd4cf0ed 721
92b31a9a
DB
722 /* Convert some jumps when 'jump_true' is next insn. */
723 if (fp->jt == 0) {
724 switch (BPF_OP(fp->code)) {
725 case BPF_JEQ:
726 insn->code = BPF_JMP | BPF_JNE | bpf_src;
727 break;
728 case BPF_JGT:
729 insn->code = BPF_JMP | BPF_JLE | bpf_src;
730 break;
731 case BPF_JGE:
732 insn->code = BPF_JMP | BPF_JLT | bpf_src;
733 break;
734 default:
735 goto jmp_rest;
736 }
737
bd4cf0ed 738 target = i + fp->jf + 1;
f8f6d679 739 BPF_EMIT_JMP;
bd4cf0ed 740 break;
0b05b2a4 741 }
92b31a9a 742jmp_rest:
bd4cf0ed
AS
743 /* Other jumps are mapped into two insns: Jxx and JA. */
744 target = i + fp->jt + 1;
745 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
f8f6d679 746 BPF_EMIT_JMP;
bd4cf0ed
AS
747 insn++;
748
749 insn->code = BPF_JMP | BPF_JA;
750 target = i + fp->jf + 1;
f8f6d679 751 BPF_EMIT_JMP;
bd4cf0ed
AS
752 break;
753
754 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
e0cea7ce
DB
755 case BPF_LDX | BPF_MSH | BPF_B: {
756 struct sock_filter tmp = {
757 .code = BPF_LD | BPF_ABS | BPF_B,
758 .k = fp->k,
759 };
760
761 *seen_ld_abs = true;
762
763 /* X = A */
764 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
1268e253 765 /* A = BPF_R0 = *(u8 *) (skb->data + K) */
e0cea7ce
DB
766 convert_bpf_ld_abs(&tmp, &insn);
767 insn++;
9739eef1 768 /* A &= 0xf */
f8f6d679 769 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
9739eef1 770 /* A <<= 2 */
f8f6d679 771 *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
e0cea7ce
DB
772 /* tmp = X */
773 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X);
9739eef1 774 /* X = A */
f8f6d679 775 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
9739eef1 776 /* A = tmp */
f8f6d679 777 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
bd4cf0ed 778 break;
e0cea7ce 779 }
6205b9cf
DB
780 /* RET_K is remaped into 2 insns. RET_A case doesn't need an
781 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
782 */
bd4cf0ed
AS
783 case BPF_RET | BPF_A:
784 case BPF_RET | BPF_K:
6205b9cf
DB
785 if (BPF_RVAL(fp->code) == BPF_K)
786 *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
787 0, fp->k);
9739eef1 788 *insn = BPF_EXIT_INSN();
bd4cf0ed
AS
789 break;
790
791 /* Store to stack. */
792 case BPF_ST:
793 case BPF_STX:
50bbfed9 794 stack_off = fp->k * 4 + 4;
f8f6d679
DB
795 *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
796 BPF_ST ? BPF_REG_A : BPF_REG_X,
50bbfed9
AS
797 -stack_off);
798 /* check_load_and_stores() verifies that classic BPF can
799 * load from stack only after write, so tracking
800 * stack_depth for ST|STX insns is enough
801 */
802 if (new_prog && new_prog->aux->stack_depth < stack_off)
803 new_prog->aux->stack_depth = stack_off;
bd4cf0ed
AS
804 break;
805
806 /* Load from stack. */
807 case BPF_LD | BPF_MEM:
808 case BPF_LDX | BPF_MEM:
50bbfed9 809 stack_off = fp->k * 4 + 4;
f8f6d679
DB
810 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
811 BPF_REG_A : BPF_REG_X, BPF_REG_FP,
50bbfed9 812 -stack_off);
bd4cf0ed
AS
813 break;
814
815 /* A = K or X = K */
816 case BPF_LD | BPF_IMM:
817 case BPF_LDX | BPF_IMM:
f8f6d679
DB
818 *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
819 BPF_REG_A : BPF_REG_X, fp->k);
bd4cf0ed
AS
820 break;
821
822 /* X = A */
823 case BPF_MISC | BPF_TAX:
f8f6d679 824 *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
bd4cf0ed
AS
825 break;
826
827 /* A = X */
828 case BPF_MISC | BPF_TXA:
f8f6d679 829 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
bd4cf0ed
AS
830 break;
831
832 /* A = skb->len or X = skb->len */
833 case BPF_LD | BPF_W | BPF_LEN:
834 case BPF_LDX | BPF_W | BPF_LEN:
f8f6d679
DB
835 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
836 BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
837 offsetof(struct sk_buff, len));
bd4cf0ed
AS
838 break;
839
f8f6d679 840 /* Access seccomp_data fields. */
bd4cf0ed 841 case BPF_LDX | BPF_ABS | BPF_W:
9739eef1
AS
842 /* A = *(u32 *) (ctx + K) */
843 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
bd4cf0ed
AS
844 break;
845
ca9f1fd2 846 /* Unknown instruction. */
1da177e4 847 default:
bd4cf0ed 848 goto err;
1da177e4 849 }
bd4cf0ed
AS
850
851 insn++;
852 if (new_prog)
853 memcpy(new_insn, tmp_insns,
854 sizeof(*insn) * (insn - tmp_insns));
bd4cf0ed 855 new_insn += insn - tmp_insns;
1da177e4
LT
856 }
857
bd4cf0ed
AS
858 if (!new_prog) {
859 /* Only calculating new length. */
50bbfed9 860 *new_len = new_insn - first_insn;
e0cea7ce
DB
861 if (*seen_ld_abs)
862 *new_len += 4; /* Prologue bits. */
bd4cf0ed
AS
863 return 0;
864 }
865
866 pass++;
50bbfed9
AS
867 if (new_flen != new_insn - first_insn) {
868 new_flen = new_insn - first_insn;
bd4cf0ed
AS
869 if (pass > 2)
870 goto err;
bd4cf0ed
AS
871 goto do_pass;
872 }
873
874 kfree(addrs);
875 BUG_ON(*new_len != new_flen);
1da177e4 876 return 0;
bd4cf0ed
AS
877err:
878 kfree(addrs);
879 return -EINVAL;
1da177e4
LT
880}
881
bd4cf0ed 882/* Security:
bd4cf0ed 883 *
2d5311e4 884 * As we dont want to clear mem[] array for each packet going through
8ea6e345 885 * __bpf_prog_run(), we check that filter loaded by user never try to read
2d5311e4 886 * a cell if not previously written, and we check all branches to be sure
25985edc 887 * a malicious user doesn't try to abuse us.
2d5311e4 888 */
ec31a05c 889static int check_load_and_stores(const struct sock_filter *filter, int flen)
2d5311e4 890{
34805931 891 u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
2d5311e4
ED
892 int pc, ret = 0;
893
894 BUILD_BUG_ON(BPF_MEMWORDS > 16);
34805931 895
99e72a0f 896 masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
2d5311e4
ED
897 if (!masks)
898 return -ENOMEM;
34805931 899
2d5311e4
ED
900 memset(masks, 0xff, flen * sizeof(*masks));
901
902 for (pc = 0; pc < flen; pc++) {
903 memvalid &= masks[pc];
904
905 switch (filter[pc].code) {
34805931
DB
906 case BPF_ST:
907 case BPF_STX:
2d5311e4
ED
908 memvalid |= (1 << filter[pc].k);
909 break;
34805931
DB
910 case BPF_LD | BPF_MEM:
911 case BPF_LDX | BPF_MEM:
2d5311e4
ED
912 if (!(memvalid & (1 << filter[pc].k))) {
913 ret = -EINVAL;
914 goto error;
915 }
916 break;
34805931
DB
917 case BPF_JMP | BPF_JA:
918 /* A jump must set masks on target */
2d5311e4
ED
919 masks[pc + 1 + filter[pc].k] &= memvalid;
920 memvalid = ~0;
921 break;
34805931
DB
922 case BPF_JMP | BPF_JEQ | BPF_K:
923 case BPF_JMP | BPF_JEQ | BPF_X:
924 case BPF_JMP | BPF_JGE | BPF_K:
925 case BPF_JMP | BPF_JGE | BPF_X:
926 case BPF_JMP | BPF_JGT | BPF_K:
927 case BPF_JMP | BPF_JGT | BPF_X:
928 case BPF_JMP | BPF_JSET | BPF_K:
929 case BPF_JMP | BPF_JSET | BPF_X:
930 /* A jump must set masks on targets */
2d5311e4
ED
931 masks[pc + 1 + filter[pc].jt] &= memvalid;
932 masks[pc + 1 + filter[pc].jf] &= memvalid;
933 memvalid = ~0;
934 break;
935 }
936 }
937error:
938 kfree(masks);
939 return ret;
940}
941
34805931
DB
942static bool chk_code_allowed(u16 code_to_probe)
943{
944 static const bool codes[] = {
945 /* 32 bit ALU operations */
946 [BPF_ALU | BPF_ADD | BPF_K] = true,
947 [BPF_ALU | BPF_ADD | BPF_X] = true,
948 [BPF_ALU | BPF_SUB | BPF_K] = true,
949 [BPF_ALU | BPF_SUB | BPF_X] = true,
950 [BPF_ALU | BPF_MUL | BPF_K] = true,
951 [BPF_ALU | BPF_MUL | BPF_X] = true,
952 [BPF_ALU | BPF_DIV | BPF_K] = true,
953 [BPF_ALU | BPF_DIV | BPF_X] = true,
954 [BPF_ALU | BPF_MOD | BPF_K] = true,
955 [BPF_ALU | BPF_MOD | BPF_X] = true,
956 [BPF_ALU | BPF_AND | BPF_K] = true,
957 [BPF_ALU | BPF_AND | BPF_X] = true,
958 [BPF_ALU | BPF_OR | BPF_K] = true,
959 [BPF_ALU | BPF_OR | BPF_X] = true,
960 [BPF_ALU | BPF_XOR | BPF_K] = true,
961 [BPF_ALU | BPF_XOR | BPF_X] = true,
962 [BPF_ALU | BPF_LSH | BPF_K] = true,
963 [BPF_ALU | BPF_LSH | BPF_X] = true,
964 [BPF_ALU | BPF_RSH | BPF_K] = true,
965 [BPF_ALU | BPF_RSH | BPF_X] = true,
966 [BPF_ALU | BPF_NEG] = true,
967 /* Load instructions */
968 [BPF_LD | BPF_W | BPF_ABS] = true,
969 [BPF_LD | BPF_H | BPF_ABS] = true,
970 [BPF_LD | BPF_B | BPF_ABS] = true,
971 [BPF_LD | BPF_W | BPF_LEN] = true,
972 [BPF_LD | BPF_W | BPF_IND] = true,
973 [BPF_LD | BPF_H | BPF_IND] = true,
974 [BPF_LD | BPF_B | BPF_IND] = true,
975 [BPF_LD | BPF_IMM] = true,
976 [BPF_LD | BPF_MEM] = true,
977 [BPF_LDX | BPF_W | BPF_LEN] = true,
978 [BPF_LDX | BPF_B | BPF_MSH] = true,
979 [BPF_LDX | BPF_IMM] = true,
980 [BPF_LDX | BPF_MEM] = true,
981 /* Store instructions */
982 [BPF_ST] = true,
983 [BPF_STX] = true,
984 /* Misc instructions */
985 [BPF_MISC | BPF_TAX] = true,
986 [BPF_MISC | BPF_TXA] = true,
987 /* Return instructions */
988 [BPF_RET | BPF_K] = true,
989 [BPF_RET | BPF_A] = true,
990 /* Jump instructions */
991 [BPF_JMP | BPF_JA] = true,
992 [BPF_JMP | BPF_JEQ | BPF_K] = true,
993 [BPF_JMP | BPF_JEQ | BPF_X] = true,
994 [BPF_JMP | BPF_JGE | BPF_K] = true,
995 [BPF_JMP | BPF_JGE | BPF_X] = true,
996 [BPF_JMP | BPF_JGT | BPF_K] = true,
997 [BPF_JMP | BPF_JGT | BPF_X] = true,
998 [BPF_JMP | BPF_JSET | BPF_K] = true,
999 [BPF_JMP | BPF_JSET | BPF_X] = true,
1000 };
1001
1002 if (code_to_probe >= ARRAY_SIZE(codes))
1003 return false;
1004
1005 return codes[code_to_probe];
1006}
1007
f7bd9e36
DB
1008static bool bpf_check_basics_ok(const struct sock_filter *filter,
1009 unsigned int flen)
1010{
1011 if (filter == NULL)
1012 return false;
1013 if (flen == 0 || flen > BPF_MAXINSNS)
1014 return false;
1015
1016 return true;
1017}
1018
1da177e4 1019/**
4df95ff4 1020 * bpf_check_classic - verify socket filter code
1da177e4
LT
1021 * @filter: filter to verify
1022 * @flen: length of filter
1023 *
1024 * Check the user's filter code. If we let some ugly
1025 * filter code slip through kaboom! The filter must contain
93699863
KK
1026 * no references or jumps that are out of range, no illegal
1027 * instructions, and must end with a RET instruction.
1da177e4 1028 *
7b11f69f
KK
1029 * All jumps are forward as they are not signed.
1030 *
1031 * Returns 0 if the rule set is legal or -EINVAL if not.
1da177e4 1032 */
d9e12f42
NS
1033static int bpf_check_classic(const struct sock_filter *filter,
1034 unsigned int flen)
1da177e4 1035{
aa1113d9 1036 bool anc_found;
34805931 1037 int pc;
1da177e4 1038
34805931 1039 /* Check the filter code now */
1da177e4 1040 for (pc = 0; pc < flen; pc++) {
ec31a05c 1041 const struct sock_filter *ftest = &filter[pc];
93699863 1042
34805931
DB
1043 /* May we actually operate on this code? */
1044 if (!chk_code_allowed(ftest->code))
cba328fc 1045 return -EINVAL;
34805931 1046
93699863 1047 /* Some instructions need special checks */
34805931
DB
1048 switch (ftest->code) {
1049 case BPF_ALU | BPF_DIV | BPF_K:
1050 case BPF_ALU | BPF_MOD | BPF_K:
1051 /* Check for division by zero */
b6069a95
ED
1052 if (ftest->k == 0)
1053 return -EINVAL;
1054 break;
229394e8
RV
1055 case BPF_ALU | BPF_LSH | BPF_K:
1056 case BPF_ALU | BPF_RSH | BPF_K:
1057 if (ftest->k >= 32)
1058 return -EINVAL;
1059 break;
34805931
DB
1060 case BPF_LD | BPF_MEM:
1061 case BPF_LDX | BPF_MEM:
1062 case BPF_ST:
1063 case BPF_STX:
1064 /* Check for invalid memory addresses */
93699863
KK
1065 if (ftest->k >= BPF_MEMWORDS)
1066 return -EINVAL;
1067 break;
34805931
DB
1068 case BPF_JMP | BPF_JA:
1069 /* Note, the large ftest->k might cause loops.
93699863
KK
1070 * Compare this with conditional jumps below,
1071 * where offsets are limited. --ANK (981016)
1072 */
34805931 1073 if (ftest->k >= (unsigned int)(flen - pc - 1))
93699863 1074 return -EINVAL;
01f2f3f6 1075 break;
34805931
DB
1076 case BPF_JMP | BPF_JEQ | BPF_K:
1077 case BPF_JMP | BPF_JEQ | BPF_X:
1078 case BPF_JMP | BPF_JGE | BPF_K:
1079 case BPF_JMP | BPF_JGE | BPF_X:
1080 case BPF_JMP | BPF_JGT | BPF_K:
1081 case BPF_JMP | BPF_JGT | BPF_X:
1082 case BPF_JMP | BPF_JSET | BPF_K:
1083 case BPF_JMP | BPF_JSET | BPF_X:
1084 /* Both conditionals must be safe */
e35bedf3 1085 if (pc + ftest->jt + 1 >= flen ||
93699863
KK
1086 pc + ftest->jf + 1 >= flen)
1087 return -EINVAL;
cba328fc 1088 break;
34805931
DB
1089 case BPF_LD | BPF_W | BPF_ABS:
1090 case BPF_LD | BPF_H | BPF_ABS:
1091 case BPF_LD | BPF_B | BPF_ABS:
aa1113d9 1092 anc_found = false;
34805931
DB
1093 if (bpf_anc_helper(ftest) & BPF_ANC)
1094 anc_found = true;
1095 /* Ancillary operation unknown or unsupported */
aa1113d9
DB
1096 if (anc_found == false && ftest->k >= SKF_AD_OFF)
1097 return -EINVAL;
01f2f3f6
HPP
1098 }
1099 }
93699863 1100
34805931 1101 /* Last instruction must be a RET code */
01f2f3f6 1102 switch (filter[flen - 1].code) {
34805931
DB
1103 case BPF_RET | BPF_K:
1104 case BPF_RET | BPF_A:
2d5311e4 1105 return check_load_and_stores(filter, flen);
cba328fc 1106 }
34805931 1107
cba328fc 1108 return -EINVAL;
1da177e4
LT
1109}
1110
7ae457c1
AS
1111static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
1112 const struct sock_fprog *fprog)
a3ea269b 1113{
009937e7 1114 unsigned int fsize = bpf_classic_proglen(fprog);
a3ea269b
DB
1115 struct sock_fprog_kern *fkprog;
1116
1117 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
1118 if (!fp->orig_prog)
1119 return -ENOMEM;
1120
1121 fkprog = fp->orig_prog;
1122 fkprog->len = fprog->len;
658da937
DB
1123
1124 fkprog->filter = kmemdup(fp->insns, fsize,
1125 GFP_KERNEL | __GFP_NOWARN);
a3ea269b
DB
1126 if (!fkprog->filter) {
1127 kfree(fp->orig_prog);
1128 return -ENOMEM;
1129 }
1130
1131 return 0;
1132}
1133
7ae457c1 1134static void bpf_release_orig_filter(struct bpf_prog *fp)
a3ea269b
DB
1135{
1136 struct sock_fprog_kern *fprog = fp->orig_prog;
1137
1138 if (fprog) {
1139 kfree(fprog->filter);
1140 kfree(fprog);
1141 }
1142}
1143
7ae457c1
AS
1144static void __bpf_prog_release(struct bpf_prog *prog)
1145{
24701ece 1146 if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
89aa0758
AS
1147 bpf_prog_put(prog);
1148 } else {
1149 bpf_release_orig_filter(prog);
1150 bpf_prog_free(prog);
1151 }
7ae457c1
AS
1152}
1153
34c5bd66
PN
1154static void __sk_filter_release(struct sk_filter *fp)
1155{
7ae457c1
AS
1156 __bpf_prog_release(fp->prog);
1157 kfree(fp);
34c5bd66
PN
1158}
1159
47e958ea 1160/**
46bcf14f 1161 * sk_filter_release_rcu - Release a socket filter by rcu_head
47e958ea
PE
1162 * @rcu: rcu_head that contains the sk_filter to free
1163 */
fbc907f0 1164static void sk_filter_release_rcu(struct rcu_head *rcu)
47e958ea
PE
1165{
1166 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
1167
34c5bd66 1168 __sk_filter_release(fp);
47e958ea 1169}
fbc907f0
DB
1170
1171/**
1172 * sk_filter_release - release a socket filter
1173 * @fp: filter to remove
1174 *
1175 * Remove a filter from a socket and release its resources.
1176 */
1177static void sk_filter_release(struct sk_filter *fp)
1178{
4c355cdf 1179 if (refcount_dec_and_test(&fp->refcnt))
fbc907f0
DB
1180 call_rcu(&fp->rcu, sk_filter_release_rcu);
1181}
1182
1183void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
1184{
7ae457c1 1185 u32 filter_size = bpf_prog_size(fp->prog->len);
fbc907f0 1186
278571ba
AS
1187 atomic_sub(filter_size, &sk->sk_omem_alloc);
1188 sk_filter_release(fp);
fbc907f0 1189}
47e958ea 1190
278571ba
AS
1191/* try to charge the socket memory if there is space available
1192 * return true on success
1193 */
4c355cdf 1194static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
bd4cf0ed 1195{
7ae457c1 1196 u32 filter_size = bpf_prog_size(fp->prog->len);
278571ba
AS
1197
1198 /* same check as in sock_kmalloc() */
1199 if (filter_size <= sysctl_optmem_max &&
1200 atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
278571ba
AS
1201 atomic_add(filter_size, &sk->sk_omem_alloc);
1202 return true;
bd4cf0ed 1203 }
278571ba 1204 return false;
bd4cf0ed
AS
1205}
1206
4c355cdf
RE
1207bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1208{
eefca20e
ED
1209 if (!refcount_inc_not_zero(&fp->refcnt))
1210 return false;
1211
1212 if (!__sk_filter_charge(sk, fp)) {
1213 sk_filter_release(fp);
1214 return false;
1215 }
1216 return true;
4c355cdf
RE
1217}
1218
7ae457c1 1219static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
bd4cf0ed
AS
1220{
1221 struct sock_filter *old_prog;
7ae457c1 1222 struct bpf_prog *old_fp;
34805931 1223 int err, new_len, old_len = fp->len;
e0cea7ce 1224 bool seen_ld_abs = false;
bd4cf0ed
AS
1225
1226 /* We are free to overwrite insns et al right here as it
1227 * won't be used at this point in time anymore internally
1228 * after the migration to the internal BPF instruction
1229 * representation.
1230 */
1231 BUILD_BUG_ON(sizeof(struct sock_filter) !=
2695fb55 1232 sizeof(struct bpf_insn));
bd4cf0ed 1233
bd4cf0ed
AS
1234 /* Conversion cannot happen on overlapping memory areas,
1235 * so we need to keep the user BPF around until the 2nd
1236 * pass. At this time, the user BPF is stored in fp->insns.
1237 */
1238 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
658da937 1239 GFP_KERNEL | __GFP_NOWARN);
bd4cf0ed
AS
1240 if (!old_prog) {
1241 err = -ENOMEM;
1242 goto out_err;
1243 }
1244
1245 /* 1st pass: calculate the new program length. */
e0cea7ce
DB
1246 err = bpf_convert_filter(old_prog, old_len, NULL, &new_len,
1247 &seen_ld_abs);
bd4cf0ed
AS
1248 if (err)
1249 goto out_err_free;
1250
1251 /* Expand fp for appending the new filter representation. */
1252 old_fp = fp;
60a3b225 1253 fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
bd4cf0ed
AS
1254 if (!fp) {
1255 /* The old_fp is still around in case we couldn't
1256 * allocate new memory, so uncharge on that one.
1257 */
1258 fp = old_fp;
1259 err = -ENOMEM;
1260 goto out_err_free;
1261 }
1262
bd4cf0ed
AS
1263 fp->len = new_len;
1264
2695fb55 1265 /* 2nd pass: remap sock_filter insns into bpf_insn insns. */
e0cea7ce
DB
1266 err = bpf_convert_filter(old_prog, old_len, fp, &new_len,
1267 &seen_ld_abs);
bd4cf0ed 1268 if (err)
8fb575ca 1269 /* 2nd bpf_convert_filter() can fail only if it fails
bd4cf0ed
AS
1270 * to allocate memory, remapping must succeed. Note,
1271 * that at this time old_fp has already been released
278571ba 1272 * by krealloc().
bd4cf0ed
AS
1273 */
1274 goto out_err_free;
1275
d1c55ab5 1276 fp = bpf_prog_select_runtime(fp, &err);
290af866
AS
1277 if (err)
1278 goto out_err_free;
5fe821a9 1279
bd4cf0ed
AS
1280 kfree(old_prog);
1281 return fp;
1282
1283out_err_free:
1284 kfree(old_prog);
1285out_err:
7ae457c1 1286 __bpf_prog_release(fp);
bd4cf0ed
AS
1287 return ERR_PTR(err);
1288}
1289
ac67eb2c
DB
1290static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
1291 bpf_aux_classic_check_t trans)
302d6637
JP
1292{
1293 int err;
1294
bd4cf0ed 1295 fp->bpf_func = NULL;
a91263d5 1296 fp->jited = 0;
302d6637 1297
4df95ff4 1298 err = bpf_check_classic(fp->insns, fp->len);
418c96ac 1299 if (err) {
7ae457c1 1300 __bpf_prog_release(fp);
bd4cf0ed 1301 return ERR_PTR(err);
418c96ac 1302 }
302d6637 1303
4ae92bc7
NS
1304 /* There might be additional checks and transformations
1305 * needed on classic filters, f.e. in case of seccomp.
1306 */
1307 if (trans) {
1308 err = trans(fp->insns, fp->len);
1309 if (err) {
1310 __bpf_prog_release(fp);
1311 return ERR_PTR(err);
1312 }
1313 }
1314
bd4cf0ed
AS
1315 /* Probe if we can JIT compile the filter and if so, do
1316 * the compilation of the filter.
1317 */
302d6637 1318 bpf_jit_compile(fp);
bd4cf0ed
AS
1319
1320 /* JIT compiler couldn't process this filter, so do the
1321 * internal BPF translation for the optimized interpreter.
1322 */
5fe821a9 1323 if (!fp->jited)
7ae457c1 1324 fp = bpf_migrate_filter(fp);
bd4cf0ed
AS
1325
1326 return fp;
302d6637
JP
1327}
1328
1329/**
7ae457c1 1330 * bpf_prog_create - create an unattached filter
c6c4b97c 1331 * @pfp: the unattached filter that is created
677a9fd3 1332 * @fprog: the filter program
302d6637 1333 *
c6c4b97c 1334 * Create a filter independent of any socket. We first run some
302d6637
JP
1335 * sanity checks on it to make sure it does not explode on us later.
1336 * If an error occurs or there is insufficient memory for the filter
1337 * a negative errno code is returned. On success the return is zero.
1338 */
7ae457c1 1339int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
302d6637 1340{
009937e7 1341 unsigned int fsize = bpf_classic_proglen(fprog);
7ae457c1 1342 struct bpf_prog *fp;
302d6637
JP
1343
1344 /* Make sure new filter is there and in the right amounts. */
f7bd9e36 1345 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
302d6637
JP
1346 return -EINVAL;
1347
60a3b225 1348 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
302d6637
JP
1349 if (!fp)
1350 return -ENOMEM;
a3ea269b 1351
302d6637
JP
1352 memcpy(fp->insns, fprog->filter, fsize);
1353
302d6637 1354 fp->len = fprog->len;
a3ea269b
DB
1355 /* Since unattached filters are not copied back to user
1356 * space through sk_get_filter(), we do not need to hold
1357 * a copy here, and can spare us the work.
1358 */
1359 fp->orig_prog = NULL;
302d6637 1360
7ae457c1 1361 /* bpf_prepare_filter() already takes care of freeing
bd4cf0ed
AS
1362 * memory in case something goes wrong.
1363 */
4ae92bc7 1364 fp = bpf_prepare_filter(fp, NULL);
bd4cf0ed
AS
1365 if (IS_ERR(fp))
1366 return PTR_ERR(fp);
302d6637
JP
1367
1368 *pfp = fp;
1369 return 0;
302d6637 1370}
7ae457c1 1371EXPORT_SYMBOL_GPL(bpf_prog_create);
302d6637 1372
ac67eb2c
DB
1373/**
1374 * bpf_prog_create_from_user - create an unattached filter from user buffer
1375 * @pfp: the unattached filter that is created
1376 * @fprog: the filter program
1377 * @trans: post-classic verifier transformation handler
bab18991 1378 * @save_orig: save classic BPF program
ac67eb2c
DB
1379 *
1380 * This function effectively does the same as bpf_prog_create(), only
1381 * that it builds up its insns buffer from user space provided buffer.
1382 * It also allows for passing a bpf_aux_classic_check_t handler.
1383 */
1384int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
bab18991 1385 bpf_aux_classic_check_t trans, bool save_orig)
ac67eb2c
DB
1386{
1387 unsigned int fsize = bpf_classic_proglen(fprog);
1388 struct bpf_prog *fp;
bab18991 1389 int err;
ac67eb2c
DB
1390
1391 /* Make sure new filter is there and in the right amounts. */
f7bd9e36 1392 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
ac67eb2c
DB
1393 return -EINVAL;
1394
1395 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1396 if (!fp)
1397 return -ENOMEM;
1398
1399 if (copy_from_user(fp->insns, fprog->filter, fsize)) {
1400 __bpf_prog_free(fp);
1401 return -EFAULT;
1402 }
1403
1404 fp->len = fprog->len;
ac67eb2c
DB
1405 fp->orig_prog = NULL;
1406
bab18991
DB
1407 if (save_orig) {
1408 err = bpf_prog_store_orig_filter(fp, fprog);
1409 if (err) {
1410 __bpf_prog_free(fp);
1411 return -ENOMEM;
1412 }
1413 }
1414
ac67eb2c
DB
1415 /* bpf_prepare_filter() already takes care of freeing
1416 * memory in case something goes wrong.
1417 */
1418 fp = bpf_prepare_filter(fp, trans);
1419 if (IS_ERR(fp))
1420 return PTR_ERR(fp);
1421
1422 *pfp = fp;
1423 return 0;
1424}
2ea273d7 1425EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
ac67eb2c 1426
7ae457c1 1427void bpf_prog_destroy(struct bpf_prog *fp)
302d6637 1428{
7ae457c1 1429 __bpf_prog_release(fp);
302d6637 1430}
7ae457c1 1431EXPORT_SYMBOL_GPL(bpf_prog_destroy);
302d6637 1432
8ced425e 1433static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
49b31e57
DB
1434{
1435 struct sk_filter *fp, *old_fp;
1436
1437 fp = kmalloc(sizeof(*fp), GFP_KERNEL);
1438 if (!fp)
1439 return -ENOMEM;
1440
1441 fp->prog = prog;
49b31e57 1442
4c355cdf 1443 if (!__sk_filter_charge(sk, fp)) {
49b31e57
DB
1444 kfree(fp);
1445 return -ENOMEM;
1446 }
4c355cdf 1447 refcount_set(&fp->refcnt, 1);
49b31e57 1448
8ced425e
HFS
1449 old_fp = rcu_dereference_protected(sk->sk_filter,
1450 lockdep_sock_is_held(sk));
49b31e57 1451 rcu_assign_pointer(sk->sk_filter, fp);
8ced425e 1452
49b31e57
DB
1453 if (old_fp)
1454 sk_filter_uncharge(sk, old_fp);
1455
1456 return 0;
1457}
1458
538950a1
CG
1459static
1460struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
1da177e4 1461{
009937e7 1462 unsigned int fsize = bpf_classic_proglen(fprog);
7ae457c1 1463 struct bpf_prog *prog;
1da177e4
LT
1464 int err;
1465
d59577b6 1466 if (sock_flag(sk, SOCK_FILTER_LOCKED))
538950a1 1467 return ERR_PTR(-EPERM);
d59577b6 1468
1da177e4 1469 /* Make sure new filter is there and in the right amounts. */
f7bd9e36 1470 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
538950a1 1471 return ERR_PTR(-EINVAL);
1da177e4 1472
f7bd9e36 1473 prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
7ae457c1 1474 if (!prog)
538950a1 1475 return ERR_PTR(-ENOMEM);
a3ea269b 1476
7ae457c1 1477 if (copy_from_user(prog->insns, fprog->filter, fsize)) {
c0d1379a 1478 __bpf_prog_free(prog);
538950a1 1479 return ERR_PTR(-EFAULT);
1da177e4
LT
1480 }
1481
7ae457c1 1482 prog->len = fprog->len;
1da177e4 1483
7ae457c1 1484 err = bpf_prog_store_orig_filter(prog, fprog);
a3ea269b 1485 if (err) {
c0d1379a 1486 __bpf_prog_free(prog);
538950a1 1487 return ERR_PTR(-ENOMEM);
a3ea269b
DB
1488 }
1489
7ae457c1 1490 /* bpf_prepare_filter() already takes care of freeing
bd4cf0ed
AS
1491 * memory in case something goes wrong.
1492 */
538950a1
CG
1493 return bpf_prepare_filter(prog, NULL);
1494}
1495
1496/**
1497 * sk_attach_filter - attach a socket filter
1498 * @fprog: the filter program
1499 * @sk: the socket to use
1500 *
1501 * Attach the user's filter code. We first run some sanity checks on
1502 * it to make sure it does not explode on us later. If an error
1503 * occurs or there is insufficient memory for the filter a negative
1504 * errno code is returned. On success the return is zero.
1505 */
8ced425e 1506int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
538950a1
CG
1507{
1508 struct bpf_prog *prog = __get_filter(fprog, sk);
1509 int err;
1510
7ae457c1
AS
1511 if (IS_ERR(prog))
1512 return PTR_ERR(prog);
1513
8ced425e 1514 err = __sk_attach_prog(prog, sk);
49b31e57 1515 if (err < 0) {
7ae457c1 1516 __bpf_prog_release(prog);
49b31e57 1517 return err;
278571ba
AS
1518 }
1519
d3904b73 1520 return 0;
1da177e4 1521}
8ced425e 1522EXPORT_SYMBOL_GPL(sk_attach_filter);
1da177e4 1523
538950a1 1524int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
89aa0758 1525{
538950a1 1526 struct bpf_prog *prog = __get_filter(fprog, sk);
49b31e57 1527 int err;
89aa0758 1528
538950a1
CG
1529 if (IS_ERR(prog))
1530 return PTR_ERR(prog);
1531
8217ca65
MKL
1532 if (bpf_prog_size(prog->len) > sysctl_optmem_max)
1533 err = -ENOMEM;
1534 else
1535 err = reuseport_attach_prog(sk, prog);
1536
1537 if (err)
538950a1 1538 __bpf_prog_release(prog);
538950a1 1539
8217ca65 1540 return err;
538950a1
CG
1541}
1542
1543static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
1544{
89aa0758 1545 if (sock_flag(sk, SOCK_FILTER_LOCKED))
538950a1 1546 return ERR_PTR(-EPERM);
89aa0758 1547
113214be 1548 return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
538950a1
CG
1549}
1550
1551int sk_attach_bpf(u32 ufd, struct sock *sk)
1552{
1553 struct bpf_prog *prog = __get_bpf(ufd, sk);
1554 int err;
1555
1556 if (IS_ERR(prog))
1557 return PTR_ERR(prog);
1558
8ced425e 1559 err = __sk_attach_prog(prog, sk);
49b31e57 1560 if (err < 0) {
89aa0758 1561 bpf_prog_put(prog);
49b31e57 1562 return err;
89aa0758
AS
1563 }
1564
89aa0758
AS
1565 return 0;
1566}
1567
538950a1
CG
1568int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1569{
8217ca65 1570 struct bpf_prog *prog;
538950a1
CG
1571 int err;
1572
8217ca65
MKL
1573 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1574 return -EPERM;
1575
1576 prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
1577 if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL)
1578 prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
538950a1
CG
1579 if (IS_ERR(prog))
1580 return PTR_ERR(prog);
1581
8217ca65
MKL
1582 if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
1583 /* Like other non BPF_PROG_TYPE_SOCKET_FILTER
1584 * bpf prog (e.g. sockmap). It depends on the
1585 * limitation imposed by bpf_prog_load().
1586 * Hence, sysctl_optmem_max is not checked.
1587 */
1588 if ((sk->sk_type != SOCK_STREAM &&
1589 sk->sk_type != SOCK_DGRAM) ||
1590 (sk->sk_protocol != IPPROTO_UDP &&
1591 sk->sk_protocol != IPPROTO_TCP) ||
1592 (sk->sk_family != AF_INET &&
1593 sk->sk_family != AF_INET6)) {
1594 err = -ENOTSUPP;
1595 goto err_prog_put;
1596 }
1597 } else {
1598 /* BPF_PROG_TYPE_SOCKET_FILTER */
1599 if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
1600 err = -ENOMEM;
1601 goto err_prog_put;
1602 }
538950a1
CG
1603 }
1604
8217ca65
MKL
1605 err = reuseport_attach_prog(sk, prog);
1606err_prog_put:
1607 if (err)
1608 bpf_prog_put(prog);
1609
1610 return err;
1611}
1612
1613void sk_reuseport_prog_free(struct bpf_prog *prog)
1614{
1615 if (!prog)
1616 return;
1617
1618 if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
1619 bpf_prog_put(prog);
1620 else
1621 bpf_prog_destroy(prog);
538950a1
CG
1622}
1623
21cafc1d
DB
1624struct bpf_scratchpad {
1625 union {
1626 __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
1627 u8 buff[MAX_BPF_STACK];
1628 };
1629};
1630
1631static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
91bc4822 1632
5293efe6
DB
1633static inline int __bpf_try_make_writable(struct sk_buff *skb,
1634 unsigned int write_len)
1635{
1636 return skb_ensure_writable(skb, write_len);
1637}
1638
db58ba45
AS
1639static inline int bpf_try_make_writable(struct sk_buff *skb,
1640 unsigned int write_len)
1641{
5293efe6 1642 int err = __bpf_try_make_writable(skb, write_len);
db58ba45 1643
6aaae2b6 1644 bpf_compute_data_pointers(skb);
db58ba45
AS
1645 return err;
1646}
1647
36bbef52
DB
1648static int bpf_try_make_head_writable(struct sk_buff *skb)
1649{
1650 return bpf_try_make_writable(skb, skb_headlen(skb));
1651}
1652
a2bfe6bf
DB
1653static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
1654{
1655 if (skb_at_tc_ingress(skb))
1656 skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1657}
1658
8065694e
DB
1659static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
1660{
1661 if (skb_at_tc_ingress(skb))
1662 skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1663}
1664
f3694e00
DB
1665BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
1666 const void *, from, u32, len, u64, flags)
608cd71a 1667{
608cd71a
AS
1668 void *ptr;
1669
8afd54c8 1670 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
781c53bc 1671 return -EINVAL;
0ed661d5 1672 if (unlikely(offset > 0xffff))
608cd71a 1673 return -EFAULT;
db58ba45 1674 if (unlikely(bpf_try_make_writable(skb, offset + len)))
608cd71a
AS
1675 return -EFAULT;
1676
0ed661d5 1677 ptr = skb->data + offset;
781c53bc 1678 if (flags & BPF_F_RECOMPUTE_CSUM)
479ffccc 1679 __skb_postpull_rcsum(skb, ptr, len, offset);
608cd71a
AS
1680
1681 memcpy(ptr, from, len);
1682
781c53bc 1683 if (flags & BPF_F_RECOMPUTE_CSUM)
479ffccc 1684 __skb_postpush_rcsum(skb, ptr, len, offset);
8afd54c8
DB
1685 if (flags & BPF_F_INVALIDATE_HASH)
1686 skb_clear_hash(skb);
f8ffad69 1687
608cd71a
AS
1688 return 0;
1689}
1690
577c50aa 1691static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
608cd71a
AS
1692 .func = bpf_skb_store_bytes,
1693 .gpl_only = false,
1694 .ret_type = RET_INTEGER,
1695 .arg1_type = ARG_PTR_TO_CTX,
1696 .arg2_type = ARG_ANYTHING,
39f19ebb
AS
1697 .arg3_type = ARG_PTR_TO_MEM,
1698 .arg4_type = ARG_CONST_SIZE,
91bc4822
AS
1699 .arg5_type = ARG_ANYTHING,
1700};
1701
f3694e00
DB
1702BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
1703 void *, to, u32, len)
05c74e5e 1704{
05c74e5e
DB
1705 void *ptr;
1706
0ed661d5 1707 if (unlikely(offset > 0xffff))
074f528e 1708 goto err_clear;
05c74e5e
DB
1709
1710 ptr = skb_header_pointer(skb, offset, len, to);
1711 if (unlikely(!ptr))
074f528e 1712 goto err_clear;
05c74e5e
DB
1713 if (ptr != to)
1714 memcpy(to, ptr, len);
1715
1716 return 0;
074f528e
DB
1717err_clear:
1718 memset(to, 0, len);
1719 return -EFAULT;
05c74e5e
DB
1720}
1721
577c50aa 1722static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
05c74e5e
DB
1723 .func = bpf_skb_load_bytes,
1724 .gpl_only = false,
1725 .ret_type = RET_INTEGER,
1726 .arg1_type = ARG_PTR_TO_CTX,
1727 .arg2_type = ARG_ANYTHING,
39f19ebb
AS
1728 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1729 .arg4_type = ARG_CONST_SIZE,
05c74e5e
DB
1730};
1731
4e1ec56c
DB
1732BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
1733 u32, offset, void *, to, u32, len, u32, start_header)
1734{
3eee1f75
DB
1735 u8 *end = skb_tail_pointer(skb);
1736 u8 *net = skb_network_header(skb);
1737 u8 *mac = skb_mac_header(skb);
4e1ec56c
DB
1738 u8 *ptr;
1739
3eee1f75 1740 if (unlikely(offset > 0xffff || len > (end - mac)))
4e1ec56c
DB
1741 goto err_clear;
1742
1743 switch (start_header) {
1744 case BPF_HDR_START_MAC:
3eee1f75 1745 ptr = mac + offset;
4e1ec56c
DB
1746 break;
1747 case BPF_HDR_START_NET:
3eee1f75 1748 ptr = net + offset;
4e1ec56c
DB
1749 break;
1750 default:
1751 goto err_clear;
1752 }
1753
3eee1f75 1754 if (likely(ptr >= mac && ptr + len <= end)) {
4e1ec56c
DB
1755 memcpy(to, ptr, len);
1756 return 0;
1757 }
1758
1759err_clear:
1760 memset(to, 0, len);
1761 return -EFAULT;
1762}
1763
1764static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = {
1765 .func = bpf_skb_load_bytes_relative,
1766 .gpl_only = false,
1767 .ret_type = RET_INTEGER,
1768 .arg1_type = ARG_PTR_TO_CTX,
1769 .arg2_type = ARG_ANYTHING,
1770 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1771 .arg4_type = ARG_CONST_SIZE,
1772 .arg5_type = ARG_ANYTHING,
1773};
1774
36bbef52
DB
1775BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
1776{
1777 /* Idea is the following: should the needed direct read/write
1778 * test fail during runtime, we can pull in more data and redo
1779 * again, since implicitly, we invalidate previous checks here.
1780 *
1781 * Or, since we know how much we need to make read/writeable,
1782 * this can be done once at the program beginning for direct
1783 * access case. By this we overcome limitations of only current
1784 * headroom being accessible.
1785 */
1786 return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
1787}
1788
1789static const struct bpf_func_proto bpf_skb_pull_data_proto = {
1790 .func = bpf_skb_pull_data,
1791 .gpl_only = false,
1792 .ret_type = RET_INTEGER,
1793 .arg1_type = ARG_PTR_TO_CTX,
1794 .arg2_type = ARG_ANYTHING,
1795};
1796
46f8bc92
MKL
1797BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
1798{
46f8bc92
MKL
1799 return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
1800}
1801
1802static const struct bpf_func_proto bpf_sk_fullsock_proto = {
1803 .func = bpf_sk_fullsock,
1804 .gpl_only = false,
1805 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
1806 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
1807};
1808
0ea488ff
JF
1809static inline int sk_skb_try_make_writable(struct sk_buff *skb,
1810 unsigned int write_len)
1811{
1812 int err = __bpf_try_make_writable(skb, write_len);
1813
1814 bpf_compute_data_end_sk_skb(skb);
1815 return err;
1816}
1817
1818BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
1819{
1820 /* Idea is the following: should the needed direct read/write
1821 * test fail during runtime, we can pull in more data and redo
1822 * again, since implicitly, we invalidate previous checks here.
1823 *
1824 * Or, since we know how much we need to make read/writeable,
1825 * this can be done once at the program beginning for direct
1826 * access case. By this we overcome limitations of only current
1827 * headroom being accessible.
1828 */
1829 return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
1830}
1831
1832static const struct bpf_func_proto sk_skb_pull_data_proto = {
1833 .func = sk_skb_pull_data,
1834 .gpl_only = false,
1835 .ret_type = RET_INTEGER,
1836 .arg1_type = ARG_PTR_TO_CTX,
1837 .arg2_type = ARG_ANYTHING,
1838};
1839
f3694e00
DB
1840BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
1841 u64, from, u64, to, u64, flags)
91bc4822 1842{
0ed661d5 1843 __sum16 *ptr;
91bc4822 1844
781c53bc
DB
1845 if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
1846 return -EINVAL;
0ed661d5 1847 if (unlikely(offset > 0xffff || offset & 1))
91bc4822 1848 return -EFAULT;
0ed661d5 1849 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
91bc4822
AS
1850 return -EFAULT;
1851
0ed661d5 1852 ptr = (__sum16 *)(skb->data + offset);
781c53bc 1853 switch (flags & BPF_F_HDR_FIELD_MASK) {
8050c0f0
DB
1854 case 0:
1855 if (unlikely(from != 0))
1856 return -EINVAL;
1857
1858 csum_replace_by_diff(ptr, to);
1859 break;
91bc4822
AS
1860 case 2:
1861 csum_replace2(ptr, from, to);
1862 break;
1863 case 4:
1864 csum_replace4(ptr, from, to);
1865 break;
1866 default:
1867 return -EINVAL;
1868 }
1869
91bc4822
AS
1870 return 0;
1871}
1872
577c50aa 1873static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
91bc4822
AS
1874 .func = bpf_l3_csum_replace,
1875 .gpl_only = false,
1876 .ret_type = RET_INTEGER,
1877 .arg1_type = ARG_PTR_TO_CTX,
1878 .arg2_type = ARG_ANYTHING,
1879 .arg3_type = ARG_ANYTHING,
1880 .arg4_type = ARG_ANYTHING,
1881 .arg5_type = ARG_ANYTHING,
1882};
1883
f3694e00
DB
1884BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
1885 u64, from, u64, to, u64, flags)
91bc4822 1886{
781c53bc 1887 bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
2f72959a 1888 bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
d1b662ad 1889 bool do_mforce = flags & BPF_F_MARK_ENFORCE;
0ed661d5 1890 __sum16 *ptr;
91bc4822 1891
d1b662ad
DB
1892 if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
1893 BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
781c53bc 1894 return -EINVAL;
0ed661d5 1895 if (unlikely(offset > 0xffff || offset & 1))
91bc4822 1896 return -EFAULT;
0ed661d5 1897 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
91bc4822
AS
1898 return -EFAULT;
1899
0ed661d5 1900 ptr = (__sum16 *)(skb->data + offset);
d1b662ad 1901 if (is_mmzero && !do_mforce && !*ptr)
2f72959a 1902 return 0;
91bc4822 1903
781c53bc 1904 switch (flags & BPF_F_HDR_FIELD_MASK) {
7d672345
DB
1905 case 0:
1906 if (unlikely(from != 0))
1907 return -EINVAL;
1908
1909 inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
1910 break;
91bc4822
AS
1911 case 2:
1912 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
1913 break;
1914 case 4:
1915 inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
1916 break;
1917 default:
1918 return -EINVAL;
1919 }
1920
2f72959a
DB
1921 if (is_mmzero && !*ptr)
1922 *ptr = CSUM_MANGLED_0;
91bc4822
AS
1923 return 0;
1924}
1925
577c50aa 1926static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
91bc4822
AS
1927 .func = bpf_l4_csum_replace,
1928 .gpl_only = false,
1929 .ret_type = RET_INTEGER,
1930 .arg1_type = ARG_PTR_TO_CTX,
1931 .arg2_type = ARG_ANYTHING,
1932 .arg3_type = ARG_ANYTHING,
1933 .arg4_type = ARG_ANYTHING,
1934 .arg5_type = ARG_ANYTHING,
608cd71a
AS
1935};
1936
f3694e00
DB
1937BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
1938 __be32 *, to, u32, to_size, __wsum, seed)
7d672345 1939{
21cafc1d 1940 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
f3694e00 1941 u32 diff_size = from_size + to_size;
7d672345
DB
1942 int i, j = 0;
1943
1944 /* This is quite flexible, some examples:
1945 *
1946 * from_size == 0, to_size > 0, seed := csum --> pushing data
1947 * from_size > 0, to_size == 0, seed := csum --> pulling data
1948 * from_size > 0, to_size > 0, seed := 0 --> diffing data
1949 *
1950 * Even for diffing, from_size and to_size don't need to be equal.
1951 */
1952 if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
1953 diff_size > sizeof(sp->diff)))
1954 return -EINVAL;
1955
1956 for (i = 0; i < from_size / sizeof(__be32); i++, j++)
1957 sp->diff[j] = ~from[i];
1958 for (i = 0; i < to_size / sizeof(__be32); i++, j++)
1959 sp->diff[j] = to[i];
1960
1961 return csum_partial(sp->diff, diff_size, seed);
1962}
1963
577c50aa 1964static const struct bpf_func_proto bpf_csum_diff_proto = {
7d672345
DB
1965 .func = bpf_csum_diff,
1966 .gpl_only = false,
36bbef52 1967 .pkt_access = true,
7d672345 1968 .ret_type = RET_INTEGER,
db1ac496 1969 .arg1_type = ARG_PTR_TO_MEM_OR_NULL,
39f19ebb 1970 .arg2_type = ARG_CONST_SIZE_OR_ZERO,
db1ac496 1971 .arg3_type = ARG_PTR_TO_MEM_OR_NULL,
39f19ebb 1972 .arg4_type = ARG_CONST_SIZE_OR_ZERO,
7d672345
DB
1973 .arg5_type = ARG_ANYTHING,
1974};
1975
36bbef52
DB
1976BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
1977{
1978 /* The interface is to be used in combination with bpf_csum_diff()
1979 * for direct packet writes. csum rotation for alignment as well
1980 * as emulating csum_sub() can be done from the eBPF program.
1981 */
1982 if (skb->ip_summed == CHECKSUM_COMPLETE)
1983 return (skb->csum = csum_add(skb->csum, csum));
1984
1985 return -ENOTSUPP;
1986}
1987
1988static const struct bpf_func_proto bpf_csum_update_proto = {
1989 .func = bpf_csum_update,
1990 .gpl_only = false,
1991 .ret_type = RET_INTEGER,
1992 .arg1_type = ARG_PTR_TO_CTX,
1993 .arg2_type = ARG_ANYTHING,
1994};
1995
a70b506e
DB
1996static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
1997{
a70b506e
DB
1998 return dev_forward_skb(dev, skb);
1999}
2000
4e3264d2
MKL
2001static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
2002 struct sk_buff *skb)
2003{
2004 int ret = ____dev_forward_skb(dev, skb);
2005
2006 if (likely(!ret)) {
2007 skb->dev = dev;
2008 ret = netif_rx(skb);
2009 }
2010
2011 return ret;
2012}
2013
a70b506e
DB
2014static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
2015{
2016 int ret;
2017
2018 if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) {
2019 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2020 kfree_skb(skb);
2021 return -ENETDOWN;
2022 }
2023
2024 skb->dev = dev;
2025
2026 __this_cpu_inc(xmit_recursion);
2027 ret = dev_queue_xmit(skb);
2028 __this_cpu_dec(xmit_recursion);
2029
2030 return ret;
2031}
2032
4e3264d2
MKL
2033static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
2034 u32 flags)
2035{
e7c87bd6 2036 unsigned int mlen = skb_network_offset(skb);
4e3264d2 2037
e7c87bd6
WB
2038 if (mlen) {
2039 __skb_pull(skb, mlen);
4e3264d2 2040
e7c87bd6
WB
2041 /* At ingress, the mac header has already been pulled once.
2042 * At egress, skb_pospull_rcsum has to be done in case that
2043 * the skb is originated from ingress (i.e. a forwarded skb)
2044 * to ensure that rcsum starts at net header.
2045 */
2046 if (!skb_at_tc_ingress(skb))
2047 skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
2048 }
4e3264d2
MKL
2049 skb_pop_mac_header(skb);
2050 skb_reset_mac_len(skb);
2051 return flags & BPF_F_INGRESS ?
2052 __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
2053}
2054
2055static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
2056 u32 flags)
2057{
3a0af8fd
TG
2058 /* Verify that a link layer header is carried */
2059 if (unlikely(skb->mac_header >= skb->network_header)) {
2060 kfree_skb(skb);
2061 return -ERANGE;
2062 }
2063
4e3264d2
MKL
2064 bpf_push_mac_rcsum(skb);
2065 return flags & BPF_F_INGRESS ?
2066 __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
2067}
2068
2069static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
2070 u32 flags)
2071{
c491680f 2072 if (dev_is_mac_header_xmit(dev))
4e3264d2 2073 return __bpf_redirect_common(skb, dev, flags);
c491680f
DB
2074 else
2075 return __bpf_redirect_no_mac(skb, dev, flags);
4e3264d2
MKL
2076}
2077
f3694e00 2078BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
3896d655 2079{
3896d655 2080 struct net_device *dev;
36bbef52
DB
2081 struct sk_buff *clone;
2082 int ret;
3896d655 2083
781c53bc
DB
2084 if (unlikely(flags & ~(BPF_F_INGRESS)))
2085 return -EINVAL;
2086
3896d655
AS
2087 dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
2088 if (unlikely(!dev))
2089 return -EINVAL;
2090
36bbef52
DB
2091 clone = skb_clone(skb, GFP_ATOMIC);
2092 if (unlikely(!clone))
3896d655
AS
2093 return -ENOMEM;
2094
36bbef52
DB
2095 /* For direct write, we need to keep the invariant that the skbs
2096 * we're dealing with need to be uncloned. Should uncloning fail
2097 * here, we need to free the just generated clone to unclone once
2098 * again.
2099 */
2100 ret = bpf_try_make_head_writable(skb);
2101 if (unlikely(ret)) {
2102 kfree_skb(clone);
2103 return -ENOMEM;
2104 }
2105
4e3264d2 2106 return __bpf_redirect(clone, dev, flags);
3896d655
AS
2107}
2108
577c50aa 2109static const struct bpf_func_proto bpf_clone_redirect_proto = {
3896d655
AS
2110 .func = bpf_clone_redirect,
2111 .gpl_only = false,
2112 .ret_type = RET_INTEGER,
2113 .arg1_type = ARG_PTR_TO_CTX,
2114 .arg2_type = ARG_ANYTHING,
2115 .arg3_type = ARG_ANYTHING,
2116};
2117
0b19cc0a
TM
2118DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
2119EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
781c53bc 2120
f3694e00 2121BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
27b29f63 2122{
0b19cc0a 2123 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
27b29f63 2124
781c53bc
DB
2125 if (unlikely(flags & ~(BPF_F_INGRESS)))
2126 return TC_ACT_SHOT;
2127
27b29f63
AS
2128 ri->ifindex = ifindex;
2129 ri->flags = flags;
781c53bc 2130
27b29f63
AS
2131 return TC_ACT_REDIRECT;
2132}
2133
2134int skb_do_redirect(struct sk_buff *skb)
2135{
0b19cc0a 2136 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
27b29f63
AS
2137 struct net_device *dev;
2138
2139 dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
2140 ri->ifindex = 0;
2141 if (unlikely(!dev)) {
2142 kfree_skb(skb);
2143 return -EINVAL;
2144 }
2145
4e3264d2 2146 return __bpf_redirect(skb, dev, ri->flags);
27b29f63
AS
2147}
2148
577c50aa 2149static const struct bpf_func_proto bpf_redirect_proto = {
27b29f63
AS
2150 .func = bpf_redirect,
2151 .gpl_only = false,
2152 .ret_type = RET_INTEGER,
2153 .arg1_type = ARG_ANYTHING,
2154 .arg2_type = ARG_ANYTHING,
2155};
2156
604326b4 2157BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
2a100317
JF
2158{
2159 msg->apply_bytes = bytes;
2160 return 0;
2161}
2162
2163static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
2164 .func = bpf_msg_apply_bytes,
2165 .gpl_only = false,
2166 .ret_type = RET_INTEGER,
2167 .arg1_type = ARG_PTR_TO_CTX,
2168 .arg2_type = ARG_ANYTHING,
2169};
2170
604326b4 2171BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
91843d54
JF
2172{
2173 msg->cork_bytes = bytes;
2174 return 0;
2175}
2176
2177static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
2178 .func = bpf_msg_cork_bytes,
2179 .gpl_only = false,
2180 .ret_type = RET_INTEGER,
2181 .arg1_type = ARG_PTR_TO_CTX,
2182 .arg2_type = ARG_ANYTHING,
2183};
2184
604326b4
DB
2185BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
2186 u32, end, u64, flags)
015632bb 2187{
604326b4
DB
2188 u32 len = 0, offset = 0, copy = 0, poffset = 0, bytes = end - start;
2189 u32 first_sge, last_sge, i, shift, bytes_sg_total;
2190 struct scatterlist *sge;
2191 u8 *raw, *to, *from;
015632bb
JF
2192 struct page *page;
2193
2194 if (unlikely(flags || end <= start))
2195 return -EINVAL;
2196
2197 /* First find the starting scatterlist element */
604326b4 2198 i = msg->sg.start;
015632bb 2199 do {
604326b4 2200 len = sk_msg_elem(msg, i)->length;
015632bb
JF
2201 if (start < offset + len)
2202 break;
5b24109b 2203 offset += len;
604326b4
DB
2204 sk_msg_iter_var_next(i);
2205 } while (i != msg->sg.end);
015632bb
JF
2206
2207 if (unlikely(start >= offset + len))
2208 return -EINVAL;
2209
604326b4 2210 first_sge = i;
5b24109b
DB
2211 /* The start may point into the sg element so we need to also
2212 * account for the headroom.
2213 */
2214 bytes_sg_total = start - offset + bytes;
604326b4 2215 if (!msg->sg.copy[i] && bytes_sg_total <= len)
015632bb 2216 goto out;
015632bb
JF
2217
2218 /* At this point we need to linearize multiple scatterlist
2219 * elements or a single shared page. Either way we need to
2220 * copy into a linear buffer exclusively owned by BPF. Then
2221 * place the buffer in the scatterlist and fixup the original
2222 * entries by removing the entries now in the linear buffer
2223 * and shifting the remaining entries. For now we do not try
2224 * to copy partial entries to avoid complexity of running out
2225 * of sg_entry slots. The downside is reading a single byte
2226 * will copy the entire sg entry.
2227 */
2228 do {
604326b4
DB
2229 copy += sk_msg_elem(msg, i)->length;
2230 sk_msg_iter_var_next(i);
5b24109b 2231 if (bytes_sg_total <= copy)
015632bb 2232 break;
604326b4
DB
2233 } while (i != msg->sg.end);
2234 last_sge = i;
015632bb 2235
5b24109b 2236 if (unlikely(bytes_sg_total > copy))
015632bb
JF
2237 return -EINVAL;
2238
4c3d795c
TD
2239 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2240 get_order(copy));
015632bb
JF
2241 if (unlikely(!page))
2242 return -ENOMEM;
015632bb 2243
604326b4
DB
2244 raw = page_address(page);
2245 i = first_sge;
015632bb 2246 do {
604326b4
DB
2247 sge = sk_msg_elem(msg, i);
2248 from = sg_virt(sge);
2249 len = sge->length;
2250 to = raw + poffset;
015632bb
JF
2251
2252 memcpy(to, from, len);
9db39f4d 2253 poffset += len;
604326b4
DB
2254 sge->length = 0;
2255 put_page(sg_page(sge));
015632bb 2256
604326b4
DB
2257 sk_msg_iter_var_next(i);
2258 } while (i != last_sge);
015632bb 2259
604326b4 2260 sg_set_page(&msg->sg.data[first_sge], page, copy, 0);
015632bb
JF
2261
2262 /* To repair sg ring we need to shift entries. If we only
2263 * had a single entry though we can just replace it and
2264 * be done. Otherwise walk the ring and shift the entries.
2265 */
604326b4
DB
2266 WARN_ON_ONCE(last_sge == first_sge);
2267 shift = last_sge > first_sge ?
2268 last_sge - first_sge - 1 :
2269 MAX_SKB_FRAGS - first_sge + last_sge - 1;
015632bb
JF
2270 if (!shift)
2271 goto out;
2272
604326b4
DB
2273 i = first_sge;
2274 sk_msg_iter_var_next(i);
015632bb 2275 do {
604326b4 2276 u32 move_from;
015632bb 2277
604326b4
DB
2278 if (i + shift >= MAX_MSG_FRAGS)
2279 move_from = i + shift - MAX_MSG_FRAGS;
015632bb
JF
2280 else
2281 move_from = i + shift;
604326b4 2282 if (move_from == msg->sg.end)
015632bb
JF
2283 break;
2284
604326b4
DB
2285 msg->sg.data[i] = msg->sg.data[move_from];
2286 msg->sg.data[move_from].length = 0;
2287 msg->sg.data[move_from].page_link = 0;
2288 msg->sg.data[move_from].offset = 0;
2289 sk_msg_iter_var_next(i);
015632bb 2290 } while (1);
604326b4
DB
2291
2292 msg->sg.end = msg->sg.end - shift > msg->sg.end ?
2293 msg->sg.end - shift + MAX_MSG_FRAGS :
2294 msg->sg.end - shift;
015632bb 2295out:
604326b4 2296 msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
015632bb 2297 msg->data_end = msg->data + bytes;
015632bb
JF
2298 return 0;
2299}
2300
2301static const struct bpf_func_proto bpf_msg_pull_data_proto = {
2302 .func = bpf_msg_pull_data,
2303 .gpl_only = false,
2304 .ret_type = RET_INTEGER,
2305 .arg1_type = ARG_PTR_TO_CTX,
2306 .arg2_type = ARG_ANYTHING,
2307 .arg3_type = ARG_ANYTHING,
2308 .arg4_type = ARG_ANYTHING,
2309};
2310
6fff607e
JF
2311BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
2312 u32, len, u64, flags)
2313{
2314 struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
2315 u32 new, i = 0, l, space, copy = 0, offset = 0;
2316 u8 *raw, *to, *from;
2317 struct page *page;
2318
2319 if (unlikely(flags))
2320 return -EINVAL;
2321
2322 /* First find the starting scatterlist element */
2323 i = msg->sg.start;
2324 do {
2325 l = sk_msg_elem(msg, i)->length;
2326
2327 if (start < offset + l)
2328 break;
2329 offset += l;
2330 sk_msg_iter_var_next(i);
2331 } while (i != msg->sg.end);
2332
2333 if (start >= offset + l)
2334 return -EINVAL;
2335
2336 space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2337
2338 /* If no space available will fallback to copy, we need at
2339 * least one scatterlist elem available to push data into
2340 * when start aligns to the beginning of an element or two
2341 * when it falls inside an element. We handle the start equals
2342 * offset case because its the common case for inserting a
2343 * header.
2344 */
2345 if (!space || (space == 1 && start != offset))
2346 copy = msg->sg.data[i].length;
2347
2348 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2349 get_order(copy + len));
2350 if (unlikely(!page))
2351 return -ENOMEM;
2352
2353 if (copy) {
2354 int front, back;
2355
2356 raw = page_address(page);
2357
2358 psge = sk_msg_elem(msg, i);
2359 front = start - offset;
2360 back = psge->length - front;
2361 from = sg_virt(psge);
2362
2363 if (front)
2364 memcpy(raw, from, front);
2365
2366 if (back) {
2367 from += front;
2368 to = raw + front + len;
2369
2370 memcpy(to, from, back);
2371 }
2372
2373 put_page(sg_page(psge));
2374 } else if (start - offset) {
2375 psge = sk_msg_elem(msg, i);
2376 rsge = sk_msg_elem_cpy(msg, i);
2377
2378 psge->length = start - offset;
2379 rsge.length -= psge->length;
2380 rsge.offset += start;
2381
2382 sk_msg_iter_var_next(i);
2383 sg_unmark_end(psge);
2384 sk_msg_iter_next(msg, end);
2385 }
2386
2387 /* Slot(s) to place newly allocated data */
2388 new = i;
2389
2390 /* Shift one or two slots as needed */
2391 if (!copy) {
2392 sge = sk_msg_elem_cpy(msg, i);
2393
2394 sk_msg_iter_var_next(i);
2395 sg_unmark_end(&sge);
2396 sk_msg_iter_next(msg, end);
2397
2398 nsge = sk_msg_elem_cpy(msg, i);
2399 if (rsge.length) {
2400 sk_msg_iter_var_next(i);
2401 nnsge = sk_msg_elem_cpy(msg, i);
2402 }
2403
2404 while (i != msg->sg.end) {
2405 msg->sg.data[i] = sge;
2406 sge = nsge;
2407 sk_msg_iter_var_next(i);
2408 if (rsge.length) {
2409 nsge = nnsge;
2410 nnsge = sk_msg_elem_cpy(msg, i);
2411 } else {
2412 nsge = sk_msg_elem_cpy(msg, i);
2413 }
2414 }
2415 }
2416
2417 /* Place newly allocated data buffer */
2418 sk_mem_charge(msg->sk, len);
2419 msg->sg.size += len;
2420 msg->sg.copy[new] = false;
2421 sg_set_page(&msg->sg.data[new], page, len + copy, 0);
2422 if (rsge.length) {
2423 get_page(sg_page(&rsge));
2424 sk_msg_iter_var_next(new);
2425 msg->sg.data[new] = rsge;
2426 }
2427
2428 sk_msg_compute_data_pointers(msg);
2429 return 0;
2430}
2431
2432static const struct bpf_func_proto bpf_msg_push_data_proto = {
2433 .func = bpf_msg_push_data,
2434 .gpl_only = false,
2435 .ret_type = RET_INTEGER,
2436 .arg1_type = ARG_PTR_TO_CTX,
2437 .arg2_type = ARG_ANYTHING,
2438 .arg3_type = ARG_ANYTHING,
2439 .arg4_type = ARG_ANYTHING,
2440};
2441
7246d8ed
JF
2442static void sk_msg_shift_left(struct sk_msg *msg, int i)
2443{
2444 int prev;
2445
2446 do {
2447 prev = i;
2448 sk_msg_iter_var_next(i);
2449 msg->sg.data[prev] = msg->sg.data[i];
2450 } while (i != msg->sg.end);
2451
2452 sk_msg_iter_prev(msg, end);
2453}
2454
2455static void sk_msg_shift_right(struct sk_msg *msg, int i)
2456{
2457 struct scatterlist tmp, sge;
2458
2459 sk_msg_iter_next(msg, end);
2460 sge = sk_msg_elem_cpy(msg, i);
2461 sk_msg_iter_var_next(i);
2462 tmp = sk_msg_elem_cpy(msg, i);
2463
2464 while (i != msg->sg.end) {
2465 msg->sg.data[i] = sge;
2466 sk_msg_iter_var_next(i);
2467 sge = tmp;
2468 tmp = sk_msg_elem_cpy(msg, i);
2469 }
2470}
2471
2472BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
2473 u32, len, u64, flags)
2474{
2475 u32 i = 0, l, space, offset = 0;
2476 u64 last = start + len;
2477 int pop;
2478
2479 if (unlikely(flags))
2480 return -EINVAL;
2481
2482 /* First find the starting scatterlist element */
2483 i = msg->sg.start;
2484 do {
2485 l = sk_msg_elem(msg, i)->length;
2486
2487 if (start < offset + l)
2488 break;
2489 offset += l;
2490 sk_msg_iter_var_next(i);
2491 } while (i != msg->sg.end);
2492
2493 /* Bounds checks: start and pop must be inside message */
2494 if (start >= offset + l || last >= msg->sg.size)
2495 return -EINVAL;
2496
2497 space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2498
2499 pop = len;
2500 /* --------------| offset
2501 * -| start |-------- len -------|
2502 *
2503 * |----- a ----|-------- pop -------|----- b ----|
2504 * |______________________________________________| length
2505 *
2506 *
2507 * a: region at front of scatter element to save
2508 * b: region at back of scatter element to save when length > A + pop
2509 * pop: region to pop from element, same as input 'pop' here will be
2510 * decremented below per iteration.
2511 *
2512 * Two top-level cases to handle when start != offset, first B is non
2513 * zero and second B is zero corresponding to when a pop includes more
2514 * than one element.
2515 *
2516 * Then if B is non-zero AND there is no space allocate space and
2517 * compact A, B regions into page. If there is space shift ring to
2518 * the rigth free'ing the next element in ring to place B, leaving
2519 * A untouched except to reduce length.
2520 */
2521 if (start != offset) {
2522 struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
2523 int a = start;
2524 int b = sge->length - pop - a;
2525
2526 sk_msg_iter_var_next(i);
2527
2528 if (pop < sge->length - a) {
2529 if (space) {
2530 sge->length = a;
2531 sk_msg_shift_right(msg, i);
2532 nsge = sk_msg_elem(msg, i);
2533 get_page(sg_page(sge));
2534 sg_set_page(nsge,
2535 sg_page(sge),
2536 b, sge->offset + pop + a);
2537 } else {
2538 struct page *page, *orig;
2539 u8 *to, *from;
2540
2541 page = alloc_pages(__GFP_NOWARN |
2542 __GFP_COMP | GFP_ATOMIC,
2543 get_order(a + b));
2544 if (unlikely(!page))
2545 return -ENOMEM;
2546
2547 sge->length = a;
2548 orig = sg_page(sge);
2549 from = sg_virt(sge);
2550 to = page_address(page);
2551 memcpy(to, from, a);
2552 memcpy(to + a, from + a + pop, b);
2553 sg_set_page(sge, page, a + b, 0);
2554 put_page(orig);
2555 }
2556 pop = 0;
2557 } else if (pop >= sge->length - a) {
2558 sge->length = a;
2559 pop -= (sge->length - a);
2560 }
2561 }
2562
2563 /* From above the current layout _must_ be as follows,
2564 *
2565 * -| offset
2566 * -| start
2567 *
2568 * |---- pop ---|---------------- b ------------|
2569 * |____________________________________________| length
2570 *
2571 * Offset and start of the current msg elem are equal because in the
2572 * previous case we handled offset != start and either consumed the
2573 * entire element and advanced to the next element OR pop == 0.
2574 *
2575 * Two cases to handle here are first pop is less than the length
2576 * leaving some remainder b above. Simply adjust the element's layout
2577 * in this case. Or pop >= length of the element so that b = 0. In this
2578 * case advance to next element decrementing pop.
2579 */
2580 while (pop) {
2581 struct scatterlist *sge = sk_msg_elem(msg, i);
2582
2583 if (pop < sge->length) {
2584 sge->length -= pop;
2585 sge->offset += pop;
2586 pop = 0;
2587 } else {
2588 pop -= sge->length;
2589 sk_msg_shift_left(msg, i);
2590 }
2591 sk_msg_iter_var_next(i);
2592 }
2593
2594 sk_mem_uncharge(msg->sk, len - pop);
2595 msg->sg.size -= (len - pop);
2596 sk_msg_compute_data_pointers(msg);
2597 return 0;
2598}
2599
2600static const struct bpf_func_proto bpf_msg_pop_data_proto = {
2601 .func = bpf_msg_pop_data,
2602 .gpl_only = false,
2603 .ret_type = RET_INTEGER,
2604 .arg1_type = ARG_PTR_TO_CTX,
2605 .arg2_type = ARG_ANYTHING,
2606 .arg3_type = ARG_ANYTHING,
2607 .arg4_type = ARG_ANYTHING,
2608};
2609
f3694e00 2610BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
8d20aabe 2611{
f3694e00 2612 return task_get_classid(skb);
8d20aabe
DB
2613}
2614
2615static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
2616 .func = bpf_get_cgroup_classid,
2617 .gpl_only = false,
2618 .ret_type = RET_INTEGER,
2619 .arg1_type = ARG_PTR_TO_CTX,
2620};
2621
f3694e00 2622BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
c46646d0 2623{
f3694e00 2624 return dst_tclassid(skb);
c46646d0
DB
2625}
2626
2627static const struct bpf_func_proto bpf_get_route_realm_proto = {
2628 .func = bpf_get_route_realm,
2629 .gpl_only = false,
2630 .ret_type = RET_INTEGER,
2631 .arg1_type = ARG_PTR_TO_CTX,
2632};
2633
f3694e00 2634BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
13c5c240
DB
2635{
2636 /* If skb_clear_hash() was called due to mangling, we can
2637 * trigger SW recalculation here. Later access to hash
2638 * can then use the inline skb->hash via context directly
2639 * instead of calling this helper again.
2640 */
f3694e00 2641 return skb_get_hash(skb);
13c5c240
DB
2642}
2643
2644static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
2645 .func = bpf_get_hash_recalc,
2646 .gpl_only = false,
2647 .ret_type = RET_INTEGER,
2648 .arg1_type = ARG_PTR_TO_CTX,
2649};
2650
7a4b28c6
DB
2651BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
2652{
2653 /* After all direct packet write, this can be used once for
2654 * triggering a lazy recalc on next skb_get_hash() invocation.
2655 */
2656 skb_clear_hash(skb);
2657 return 0;
2658}
2659
2660static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
2661 .func = bpf_set_hash_invalid,
2662 .gpl_only = false,
2663 .ret_type = RET_INTEGER,
2664 .arg1_type = ARG_PTR_TO_CTX,
2665};
2666
ded092cd
DB
2667BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash)
2668{
2669 /* Set user specified hash as L4(+), so that it gets returned
2670 * on skb_get_hash() call unless BPF prog later on triggers a
2671 * skb_clear_hash().
2672 */
2673 __skb_set_sw_hash(skb, hash, true);
2674 return 0;
2675}
2676
2677static const struct bpf_func_proto bpf_set_hash_proto = {
2678 .func = bpf_set_hash,
2679 .gpl_only = false,
2680 .ret_type = RET_INTEGER,
2681 .arg1_type = ARG_PTR_TO_CTX,
2682 .arg2_type = ARG_ANYTHING,
2683};
2684
f3694e00
DB
2685BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
2686 u16, vlan_tci)
4e10df9a 2687{
db58ba45 2688 int ret;
4e10df9a
AS
2689
2690 if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
2691 vlan_proto != htons(ETH_P_8021AD)))
2692 vlan_proto = htons(ETH_P_8021Q);
2693
8065694e 2694 bpf_push_mac_rcsum(skb);
db58ba45 2695 ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
8065694e
DB
2696 bpf_pull_mac_rcsum(skb);
2697
6aaae2b6 2698 bpf_compute_data_pointers(skb);
db58ba45 2699 return ret;
4e10df9a
AS
2700}
2701
93731ef0 2702static const struct bpf_func_proto bpf_skb_vlan_push_proto = {
4e10df9a
AS
2703 .func = bpf_skb_vlan_push,
2704 .gpl_only = false,
2705 .ret_type = RET_INTEGER,
2706 .arg1_type = ARG_PTR_TO_CTX,
2707 .arg2_type = ARG_ANYTHING,
2708 .arg3_type = ARG_ANYTHING,
2709};
2710
f3694e00 2711BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
4e10df9a 2712{
db58ba45 2713 int ret;
4e10df9a 2714
8065694e 2715 bpf_push_mac_rcsum(skb);
db58ba45 2716 ret = skb_vlan_pop(skb);
8065694e
DB
2717 bpf_pull_mac_rcsum(skb);
2718
6aaae2b6 2719 bpf_compute_data_pointers(skb);
db58ba45 2720 return ret;
4e10df9a
AS
2721}
2722
93731ef0 2723static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
4e10df9a
AS
2724 .func = bpf_skb_vlan_pop,
2725 .gpl_only = false,
2726 .ret_type = RET_INTEGER,
2727 .arg1_type = ARG_PTR_TO_CTX,
2728};
2729
6578171a
DB
2730static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
2731{
2732 /* Caller already did skb_cow() with len as headroom,
2733 * so no need to do it here.
2734 */
2735 skb_push(skb, len);
2736 memmove(skb->data, skb->data + len, off);
2737 memset(skb->data + off, 0, len);
2738
2739 /* No skb_postpush_rcsum(skb, skb->data + off, len)
2740 * needed here as it does not change the skb->csum
2741 * result for checksum complete when summing over
2742 * zeroed blocks.
2743 */
2744 return 0;
2745}
2746
2747static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
2748{
2749 /* skb_ensure_writable() is not needed here, as we're
2750 * already working on an uncloned skb.
2751 */
2752 if (unlikely(!pskb_may_pull(skb, off + len)))
2753 return -ENOMEM;
2754
2755 skb_postpull_rcsum(skb, skb->data + off, len);
2756 memmove(skb->data + len, skb->data, off);
2757 __skb_pull(skb, len);
2758
2759 return 0;
2760}
2761
2762static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
2763{
2764 bool trans_same = skb->transport_header == skb->network_header;
2765 int ret;
2766
2767 /* There's no need for __skb_push()/__skb_pull() pair to
2768 * get to the start of the mac header as we're guaranteed
2769 * to always start from here under eBPF.
2770 */
2771 ret = bpf_skb_generic_push(skb, off, len);
2772 if (likely(!ret)) {
2773 skb->mac_header -= len;
2774 skb->network_header -= len;
2775 if (trans_same)
2776 skb->transport_header = skb->network_header;
2777 }
2778
2779 return ret;
2780}
2781
2782static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
2783{
2784 bool trans_same = skb->transport_header == skb->network_header;
2785 int ret;
2786
2787 /* Same here, __skb_push()/__skb_pull() pair not needed. */
2788 ret = bpf_skb_generic_pop(skb, off, len);
2789 if (likely(!ret)) {
2790 skb->mac_header += len;
2791 skb->network_header += len;
2792 if (trans_same)
2793 skb->transport_header = skb->network_header;
2794 }
2795
2796 return ret;
2797}
2798
2799static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
2800{
2801 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
0daf4349 2802 u32 off = skb_mac_header_len(skb);
6578171a
DB
2803 int ret;
2804
4c3024de 2805 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
d02f51cb
DA
2806 return -ENOTSUPP;
2807
6578171a
DB
2808 ret = skb_cow(skb, len_diff);
2809 if (unlikely(ret < 0))
2810 return ret;
2811
2812 ret = bpf_skb_net_hdr_push(skb, off, len_diff);
2813 if (unlikely(ret < 0))
2814 return ret;
2815
2816 if (skb_is_gso(skb)) {
d02f51cb
DA
2817 struct skb_shared_info *shinfo = skb_shinfo(skb);
2818
880388aa
DM
2819 /* SKB_GSO_TCPV4 needs to be changed into
2820 * SKB_GSO_TCPV6.
6578171a 2821 */
d02f51cb
DA
2822 if (shinfo->gso_type & SKB_GSO_TCPV4) {
2823 shinfo->gso_type &= ~SKB_GSO_TCPV4;
2824 shinfo->gso_type |= SKB_GSO_TCPV6;
6578171a
DB
2825 }
2826
2827 /* Due to IPv6 header, MSS needs to be downgraded. */
d02f51cb 2828 skb_decrease_gso_size(shinfo, len_diff);
6578171a 2829 /* Header must be checked, and gso_segs recomputed. */
d02f51cb
DA
2830 shinfo->gso_type |= SKB_GSO_DODGY;
2831 shinfo->gso_segs = 0;
6578171a
DB
2832 }
2833
2834 skb->protocol = htons(ETH_P_IPV6);
2835 skb_clear_hash(skb);
2836
2837 return 0;
2838}
2839
2840static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
2841{
2842 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
0daf4349 2843 u32 off = skb_mac_header_len(skb);
6578171a
DB
2844 int ret;
2845
4c3024de 2846 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
d02f51cb
DA
2847 return -ENOTSUPP;
2848
6578171a
DB
2849 ret = skb_unclone(skb, GFP_ATOMIC);
2850 if (unlikely(ret < 0))
2851 return ret;
2852
2853 ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
2854 if (unlikely(ret < 0))
2855 return ret;
2856
2857 if (skb_is_gso(skb)) {
d02f51cb
DA
2858 struct skb_shared_info *shinfo = skb_shinfo(skb);
2859
880388aa
DM
2860 /* SKB_GSO_TCPV6 needs to be changed into
2861 * SKB_GSO_TCPV4.
6578171a 2862 */
d02f51cb
DA
2863 if (shinfo->gso_type & SKB_GSO_TCPV6) {
2864 shinfo->gso_type &= ~SKB_GSO_TCPV6;
2865 shinfo->gso_type |= SKB_GSO_TCPV4;
6578171a
DB
2866 }
2867
2868 /* Due to IPv4 header, MSS can be upgraded. */
d02f51cb 2869 skb_increase_gso_size(shinfo, len_diff);
6578171a 2870 /* Header must be checked, and gso_segs recomputed. */
d02f51cb
DA
2871 shinfo->gso_type |= SKB_GSO_DODGY;
2872 shinfo->gso_segs = 0;
6578171a
DB
2873 }
2874
2875 skb->protocol = htons(ETH_P_IP);
2876 skb_clear_hash(skb);
2877
2878 return 0;
2879}
2880
2881static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
2882{
2883 __be16 from_proto = skb->protocol;
2884
2885 if (from_proto == htons(ETH_P_IP) &&
2886 to_proto == htons(ETH_P_IPV6))
2887 return bpf_skb_proto_4_to_6(skb);
2888
2889 if (from_proto == htons(ETH_P_IPV6) &&
2890 to_proto == htons(ETH_P_IP))
2891 return bpf_skb_proto_6_to_4(skb);
2892
2893 return -ENOTSUPP;
2894}
2895
f3694e00
DB
2896BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
2897 u64, flags)
6578171a 2898{
6578171a
DB
2899 int ret;
2900
2901 if (unlikely(flags))
2902 return -EINVAL;
2903
2904 /* General idea is that this helper does the basic groundwork
2905 * needed for changing the protocol, and eBPF program fills the
2906 * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
2907 * and other helpers, rather than passing a raw buffer here.
2908 *
2909 * The rationale is to keep this minimal and without a need to
2910 * deal with raw packet data. F.e. even if we would pass buffers
2911 * here, the program still needs to call the bpf_lX_csum_replace()
2912 * helpers anyway. Plus, this way we keep also separation of
2913 * concerns, since f.e. bpf_skb_store_bytes() should only take
2914 * care of stores.
2915 *
2916 * Currently, additional options and extension header space are
2917 * not supported, but flags register is reserved so we can adapt
2918 * that. For offloads, we mark packet as dodgy, so that headers
2919 * need to be verified first.
2920 */
2921 ret = bpf_skb_proto_xlat(skb, proto);
6aaae2b6 2922 bpf_compute_data_pointers(skb);
6578171a
DB
2923 return ret;
2924}
2925
2926static const struct bpf_func_proto bpf_skb_change_proto_proto = {
2927 .func = bpf_skb_change_proto,
2928 .gpl_only = false,
2929 .ret_type = RET_INTEGER,
2930 .arg1_type = ARG_PTR_TO_CTX,
2931 .arg2_type = ARG_ANYTHING,
2932 .arg3_type = ARG_ANYTHING,
2933};
2934
f3694e00 2935BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
d2485c42 2936{
d2485c42 2937 /* We only allow a restricted subset to be changed for now. */
45c7fffa
DB
2938 if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
2939 !skb_pkt_type_ok(pkt_type)))
d2485c42
DB
2940 return -EINVAL;
2941
2942 skb->pkt_type = pkt_type;
2943 return 0;
2944}
2945
2946static const struct bpf_func_proto bpf_skb_change_type_proto = {
2947 .func = bpf_skb_change_type,
2948 .gpl_only = false,
2949 .ret_type = RET_INTEGER,
2950 .arg1_type = ARG_PTR_TO_CTX,
2951 .arg2_type = ARG_ANYTHING,
2952};
2953
2be7e212
DB
2954static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
2955{
2956 switch (skb->protocol) {
2957 case htons(ETH_P_IP):
2958 return sizeof(struct iphdr);
2959 case htons(ETH_P_IPV6):
2960 return sizeof(struct ipv6hdr);
2961 default:
2962 return ~0U;
2963 }
2964}
2965
2278f6cc
WB
2966#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO)
2967
2968static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
2969 u64 flags)
2be7e212 2970{
2be7e212
DB
2971 int ret;
2972
2278f6cc
WB
2973 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
2974 /* udp gso_size delineates datagrams, only allow if fixed */
2975 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
2976 !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
2977 return -ENOTSUPP;
2978 }
d02f51cb 2979
908adce6 2980 ret = skb_cow_head(skb, len_diff);
2be7e212
DB
2981 if (unlikely(ret < 0))
2982 return ret;
2983
2984 ret = bpf_skb_net_hdr_push(skb, off, len_diff);
2985 if (unlikely(ret < 0))
2986 return ret;
2987
2988 if (skb_is_gso(skb)) {
d02f51cb
DA
2989 struct skb_shared_info *shinfo = skb_shinfo(skb);
2990
2be7e212 2991 /* Due to header grow, MSS needs to be downgraded. */
2278f6cc
WB
2992 if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
2993 skb_decrease_gso_size(shinfo, len_diff);
2994
2be7e212 2995 /* Header must be checked, and gso_segs recomputed. */
d02f51cb
DA
2996 shinfo->gso_type |= SKB_GSO_DODGY;
2997 shinfo->gso_segs = 0;
2be7e212
DB
2998 }
2999
3000 return 0;
3001}
3002
2278f6cc
WB
3003static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
3004 u64 flags)
2be7e212 3005{
2be7e212
DB
3006 int ret;
3007
2278f6cc
WB
3008 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3009 /* udp gso_size delineates datagrams, only allow if fixed */
3010 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
3011 !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3012 return -ENOTSUPP;
3013 }
d02f51cb 3014
2be7e212
DB
3015 ret = skb_unclone(skb, GFP_ATOMIC);
3016 if (unlikely(ret < 0))
3017 return ret;
3018
3019 ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
3020 if (unlikely(ret < 0))
3021 return ret;
3022
3023 if (skb_is_gso(skb)) {
d02f51cb
DA
3024 struct skb_shared_info *shinfo = skb_shinfo(skb);
3025
2be7e212 3026 /* Due to header shrink, MSS can be upgraded. */
2278f6cc
WB
3027 if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3028 skb_increase_gso_size(shinfo, len_diff);
3029
2be7e212 3030 /* Header must be checked, and gso_segs recomputed. */
d02f51cb
DA
3031 shinfo->gso_type |= SKB_GSO_DODGY;
3032 shinfo->gso_segs = 0;
2be7e212
DB
3033 }
3034
3035 return 0;
3036}
3037
3038static u32 __bpf_skb_max_len(const struct sk_buff *skb)
3039{
0c6bc6e5
JF
3040 return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
3041 SKB_MAX_ALLOC;
2be7e212
DB
3042}
3043
14aa3192
WB
3044BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
3045 u32, mode, u64, flags)
2be7e212
DB
3046{
3047 bool trans_same = skb->transport_header == skb->network_header;
3048 u32 len_cur, len_diff_abs = abs(len_diff);
3049 u32 len_min = bpf_skb_net_base_len(skb);
3050 u32 len_max = __bpf_skb_max_len(skb);
3051 __be16 proto = skb->protocol;
3052 bool shrink = len_diff < 0;
14aa3192 3053 u32 off;
2be7e212
DB
3054 int ret;
3055
2278f6cc 3056 if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK))
14aa3192 3057 return -EINVAL;
2be7e212
DB
3058 if (unlikely(len_diff_abs > 0xfffU))
3059 return -EFAULT;
3060 if (unlikely(proto != htons(ETH_P_IP) &&
3061 proto != htons(ETH_P_IPV6)))
3062 return -ENOTSUPP;
3063
14aa3192
WB
3064 off = skb_mac_header_len(skb);
3065 switch (mode) {
3066 case BPF_ADJ_ROOM_NET:
3067 off += bpf_skb_net_base_len(skb);
3068 break;
3069 case BPF_ADJ_ROOM_MAC:
3070 break;
3071 default:
3072 return -ENOTSUPP;
3073 }
3074
2be7e212
DB
3075 len_cur = skb->len - skb_network_offset(skb);
3076 if (skb_transport_header_was_set(skb) && !trans_same)
3077 len_cur = skb_network_header_len(skb);
3078 if ((shrink && (len_diff_abs >= len_cur ||
3079 len_cur - len_diff_abs < len_min)) ||
3080 (!shrink && (skb->len + len_diff_abs > len_max &&
3081 !skb_is_gso(skb))))
3082 return -ENOTSUPP;
3083
2278f6cc
WB
3084 ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
3085 bpf_skb_net_grow(skb, off, len_diff_abs, flags);
2be7e212 3086
6aaae2b6 3087 bpf_compute_data_pointers(skb);
e4a6a342 3088 return ret;
2be7e212
DB
3089}
3090
2be7e212
DB
3091static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
3092 .func = bpf_skb_adjust_room,
3093 .gpl_only = false,
3094 .ret_type = RET_INTEGER,
3095 .arg1_type = ARG_PTR_TO_CTX,
3096 .arg2_type = ARG_ANYTHING,
3097 .arg3_type = ARG_ANYTHING,
3098 .arg4_type = ARG_ANYTHING,
3099};
3100
5293efe6
DB
3101static u32 __bpf_skb_min_len(const struct sk_buff *skb)
3102{
3103 u32 min_len = skb_network_offset(skb);
3104
3105 if (skb_transport_header_was_set(skb))
3106 min_len = skb_transport_offset(skb);
3107 if (skb->ip_summed == CHECKSUM_PARTIAL)
3108 min_len = skb_checksum_start_offset(skb) +
3109 skb->csum_offset + sizeof(__sum16);
3110 return min_len;
3111}
3112
5293efe6
DB
3113static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
3114{
3115 unsigned int old_len = skb->len;
3116 int ret;
3117
3118 ret = __skb_grow_rcsum(skb, new_len);
3119 if (!ret)
3120 memset(skb->data + old_len, 0, new_len - old_len);
3121 return ret;
3122}
3123
3124static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
3125{
3126 return __skb_trim_rcsum(skb, new_len);
3127}
3128
0ea488ff
JF
3129static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
3130 u64 flags)
5293efe6 3131{
5293efe6
DB
3132 u32 max_len = __bpf_skb_max_len(skb);
3133 u32 min_len = __bpf_skb_min_len(skb);
5293efe6
DB
3134 int ret;
3135
3136 if (unlikely(flags || new_len > max_len || new_len < min_len))
3137 return -EINVAL;
3138 if (skb->encapsulation)
3139 return -ENOTSUPP;
3140
3141 /* The basic idea of this helper is that it's performing the
3142 * needed work to either grow or trim an skb, and eBPF program
3143 * rewrites the rest via helpers like bpf_skb_store_bytes(),
3144 * bpf_lX_csum_replace() and others rather than passing a raw
3145 * buffer here. This one is a slow path helper and intended
3146 * for replies with control messages.
3147 *
3148 * Like in bpf_skb_change_proto(), we want to keep this rather
3149 * minimal and without protocol specifics so that we are able
3150 * to separate concerns as in bpf_skb_store_bytes() should only
3151 * be the one responsible for writing buffers.
3152 *
3153 * It's really expected to be a slow path operation here for
3154 * control message replies, so we're implicitly linearizing,
3155 * uncloning and drop offloads from the skb by this.
3156 */
3157 ret = __bpf_try_make_writable(skb, skb->len);
3158 if (!ret) {
3159 if (new_len > skb->len)
3160 ret = bpf_skb_grow_rcsum(skb, new_len);
3161 else if (new_len < skb->len)
3162 ret = bpf_skb_trim_rcsum(skb, new_len);
3163 if (!ret && skb_is_gso(skb))
3164 skb_gso_reset(skb);
3165 }
0ea488ff
JF
3166 return ret;
3167}
3168
3169BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3170 u64, flags)
3171{
3172 int ret = __bpf_skb_change_tail(skb, new_len, flags);
5293efe6 3173
6aaae2b6 3174 bpf_compute_data_pointers(skb);
5293efe6
DB
3175 return ret;
3176}
3177
3178static const struct bpf_func_proto bpf_skb_change_tail_proto = {
3179 .func = bpf_skb_change_tail,
3180 .gpl_only = false,
3181 .ret_type = RET_INTEGER,
3182 .arg1_type = ARG_PTR_TO_CTX,
3183 .arg2_type = ARG_ANYTHING,
3184 .arg3_type = ARG_ANYTHING,
3185};
3186
0ea488ff 3187BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3a0af8fd 3188 u64, flags)
0ea488ff
JF
3189{
3190 int ret = __bpf_skb_change_tail(skb, new_len, flags);
3191
3192 bpf_compute_data_end_sk_skb(skb);
3193 return ret;
3194}
3195
3196static const struct bpf_func_proto sk_skb_change_tail_proto = {
3197 .func = sk_skb_change_tail,
3198 .gpl_only = false,
3199 .ret_type = RET_INTEGER,
3200 .arg1_type = ARG_PTR_TO_CTX,
3201 .arg2_type = ARG_ANYTHING,
3202 .arg3_type = ARG_ANYTHING,
3203};
3204
3205static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
3206 u64 flags)
3a0af8fd
TG
3207{
3208 u32 max_len = __bpf_skb_max_len(skb);
3209 u32 new_len = skb->len + head_room;
3210 int ret;
3211
3212 if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
3213 new_len < skb->len))
3214 return -EINVAL;
3215
3216 ret = skb_cow(skb, head_room);
3217 if (likely(!ret)) {
3218 /* Idea for this helper is that we currently only
3219 * allow to expand on mac header. This means that
3220 * skb->protocol network header, etc, stay as is.
3221 * Compared to bpf_skb_change_tail(), we're more
3222 * flexible due to not needing to linearize or
3223 * reset GSO. Intention for this helper is to be
3224 * used by an L3 skb that needs to push mac header
3225 * for redirection into L2 device.
3226 */
3227 __skb_push(skb, head_room);
3228 memset(skb->data, 0, head_room);
3229 skb_reset_mac_header(skb);
3230 }
3231
0ea488ff
JF
3232 return ret;
3233}
3234
3235BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
3236 u64, flags)
3237{
3238 int ret = __bpf_skb_change_head(skb, head_room, flags);
3239
6aaae2b6 3240 bpf_compute_data_pointers(skb);
0ea488ff 3241 return ret;
3a0af8fd
TG
3242}
3243
3244static const struct bpf_func_proto bpf_skb_change_head_proto = {
3245 .func = bpf_skb_change_head,
3246 .gpl_only = false,
3247 .ret_type = RET_INTEGER,
3248 .arg1_type = ARG_PTR_TO_CTX,
3249 .arg2_type = ARG_ANYTHING,
3250 .arg3_type = ARG_ANYTHING,
3251};
3252
0ea488ff
JF
3253BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
3254 u64, flags)
3255{
3256 int ret = __bpf_skb_change_head(skb, head_room, flags);
3257
3258 bpf_compute_data_end_sk_skb(skb);
3259 return ret;
3260}
3261
3262static const struct bpf_func_proto sk_skb_change_head_proto = {
3263 .func = sk_skb_change_head,
3264 .gpl_only = false,
3265 .ret_type = RET_INTEGER,
3266 .arg1_type = ARG_PTR_TO_CTX,
3267 .arg2_type = ARG_ANYTHING,
3268 .arg3_type = ARG_ANYTHING,
3269};
de8f3a83
DB
3270static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
3271{
3272 return xdp_data_meta_unsupported(xdp) ? 0 :
3273 xdp->data - xdp->data_meta;
3274}
3275
17bedab2
MKL
3276BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
3277{
6dfb970d 3278 void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
de8f3a83 3279 unsigned long metalen = xdp_get_metalen(xdp);
97e19cce 3280 void *data_start = xdp_frame_end + metalen;
17bedab2
MKL
3281 void *data = xdp->data + offset;
3282
de8f3a83 3283 if (unlikely(data < data_start ||
17bedab2
MKL
3284 data > xdp->data_end - ETH_HLEN))
3285 return -EINVAL;
3286
de8f3a83
DB
3287 if (metalen)
3288 memmove(xdp->data_meta + offset,
3289 xdp->data_meta, metalen);
3290 xdp->data_meta += offset;
17bedab2
MKL
3291 xdp->data = data;
3292
3293 return 0;
3294}
3295
3296static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
3297 .func = bpf_xdp_adjust_head,
3298 .gpl_only = false,
3299 .ret_type = RET_INTEGER,
3300 .arg1_type = ARG_PTR_TO_CTX,
3301 .arg2_type = ARG_ANYTHING,
3302};
3303
b32cc5b9
NS
3304BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
3305{
3306 void *data_end = xdp->data_end + offset;
3307
3308 /* only shrinking is allowed for now. */
3309 if (unlikely(offset >= 0))
3310 return -EINVAL;
3311
3312 if (unlikely(data_end < xdp->data + ETH_HLEN))
3313 return -EINVAL;
3314
3315 xdp->data_end = data_end;
3316
3317 return 0;
3318}
3319
3320static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
3321 .func = bpf_xdp_adjust_tail,
3322 .gpl_only = false,
3323 .ret_type = RET_INTEGER,
3324 .arg1_type = ARG_PTR_TO_CTX,
3325 .arg2_type = ARG_ANYTHING,
3326};
3327
de8f3a83
DB
3328BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
3329{
97e19cce 3330 void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
de8f3a83
DB
3331 void *meta = xdp->data_meta + offset;
3332 unsigned long metalen = xdp->data - meta;
3333
3334 if (xdp_data_meta_unsupported(xdp))
3335 return -ENOTSUPP;
97e19cce 3336 if (unlikely(meta < xdp_frame_end ||
de8f3a83
DB
3337 meta > xdp->data))
3338 return -EINVAL;
3339 if (unlikely((metalen & (sizeof(__u32) - 1)) ||
3340 (metalen > 32)))
3341 return -EACCES;
3342
3343 xdp->data_meta = meta;
3344
3345 return 0;
3346}
3347
3348static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
3349 .func = bpf_xdp_adjust_meta,
3350 .gpl_only = false,
3351 .ret_type = RET_INTEGER,
3352 .arg1_type = ARG_PTR_TO_CTX,
3353 .arg2_type = ARG_ANYTHING,
3354};
3355
11393cc9
JF
3356static int __bpf_tx_xdp(struct net_device *dev,
3357 struct bpf_map *map,
3358 struct xdp_buff *xdp,
3359 u32 index)
814abfab 3360{
44fa2dbd 3361 struct xdp_frame *xdpf;
d8d7218a 3362 int err, sent;
11393cc9
JF
3363
3364 if (!dev->netdev_ops->ndo_xdp_xmit) {
11393cc9 3365 return -EOPNOTSUPP;
814abfab 3366 }
11393cc9 3367
d8d7218a
TM
3368 err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
3369 if (unlikely(err))
3370 return err;
3371
44fa2dbd
JDB
3372 xdpf = convert_to_xdp_frame(xdp);
3373 if (unlikely(!xdpf))
3374 return -EOVERFLOW;
3375
1e67575a 3376 sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH);
735fc405
JDB
3377 if (sent <= 0)
3378 return sent;
9c270af3
JDB
3379 return 0;
3380}
3381
47b123ed
JDB
3382static noinline int
3383xdp_do_redirect_slow(struct net_device *dev, struct xdp_buff *xdp,
3384 struct bpf_prog *xdp_prog, struct bpf_redirect_info *ri)
3385{
3386 struct net_device *fwd;
3387 u32 index = ri->ifindex;
3388 int err;
3389
3390 fwd = dev_get_by_index_rcu(dev_net(dev), index);
3391 ri->ifindex = 0;
3392 if (unlikely(!fwd)) {
3393 err = -EINVAL;
3394 goto err;
3395 }
3396
3397 err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
3398 if (unlikely(err))
3399 goto err;
3400
3401 _trace_xdp_redirect(dev, xdp_prog, index);
3402 return 0;
3403err:
3404 _trace_xdp_redirect_err(dev, xdp_prog, index, err);
3405 return err;
3406}
3407
9c270af3
JDB
3408static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
3409 struct bpf_map *map,
3410 struct xdp_buff *xdp,
3411 u32 index)
3412{
3413 int err;
3414
1b1a251c
BT
3415 switch (map->map_type) {
3416 case BPF_MAP_TYPE_DEVMAP: {
67f29e07 3417 struct bpf_dtab_netdev *dst = fwd;
9c270af3 3418
38edddb8 3419 err = dev_map_enqueue(dst, xdp, dev_rx);
e1302542 3420 if (unlikely(err))
9c270af3 3421 return err;
11393cc9 3422 __dev_map_insert_ctx(map, index);
1b1a251c
BT
3423 break;
3424 }
3425 case BPF_MAP_TYPE_CPUMAP: {
9c270af3
JDB
3426 struct bpf_cpu_map_entry *rcpu = fwd;
3427
3428 err = cpu_map_enqueue(rcpu, xdp, dev_rx);
e1302542 3429 if (unlikely(err))
9c270af3
JDB
3430 return err;
3431 __cpu_map_insert_ctx(map, index);
1b1a251c
BT
3432 break;
3433 }
3434 case BPF_MAP_TYPE_XSKMAP: {
3435 struct xdp_sock *xs = fwd;
3436
3437 err = __xsk_map_redirect(map, xdp, xs);
3438 return err;
3439 }
3440 default:
3441 break;
9c270af3 3442 }
e4a8e817 3443 return 0;
814abfab
JF
3444}
3445
11393cc9
JF
3446void xdp_do_flush_map(void)
3447{
0b19cc0a 3448 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
11393cc9
JF
3449 struct bpf_map *map = ri->map_to_flush;
3450
11393cc9 3451 ri->map_to_flush = NULL;
9c270af3
JDB
3452 if (map) {
3453 switch (map->map_type) {
3454 case BPF_MAP_TYPE_DEVMAP:
3455 __dev_map_flush(map);
3456 break;
3457 case BPF_MAP_TYPE_CPUMAP:
3458 __cpu_map_flush(map);
3459 break;
1b1a251c
BT
3460 case BPF_MAP_TYPE_XSKMAP:
3461 __xsk_map_flush(map);
3462 break;
9c270af3
JDB
3463 default:
3464 break;
3465 }
3466 }
11393cc9
JF
3467}
3468EXPORT_SYMBOL_GPL(xdp_do_flush_map);
3469
2a68d85f 3470static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
9c270af3
JDB
3471{
3472 switch (map->map_type) {
3473 case BPF_MAP_TYPE_DEVMAP:
3474 return __dev_map_lookup_elem(map, index);
3475 case BPF_MAP_TYPE_CPUMAP:
3476 return __cpu_map_lookup_elem(map, index);
1b1a251c
BT
3477 case BPF_MAP_TYPE_XSKMAP:
3478 return __xsk_map_lookup_elem(map, index);
9c270af3
JDB
3479 default:
3480 return NULL;
3481 }
3482}
3483
f6069b9a 3484void bpf_clear_redirect_map(struct bpf_map *map)
7c300131 3485{
f6069b9a
DB
3486 struct bpf_redirect_info *ri;
3487 int cpu;
3488
3489 for_each_possible_cpu(cpu) {
3490 ri = per_cpu_ptr(&bpf_redirect_info, cpu);
3491 /* Avoid polluting remote cacheline due to writes if
3492 * not needed. Once we pass this test, we need the
3493 * cmpxchg() to make sure it hasn't been changed in
3494 * the meantime by remote CPU.
3495 */
3496 if (unlikely(READ_ONCE(ri->map) == map))
3497 cmpxchg(&ri->map, map, NULL);
3498 }
7c300131
DB
3499}
3500
e4a8e817 3501static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
47b123ed
JDB
3502 struct bpf_prog *xdp_prog, struct bpf_map *map,
3503 struct bpf_redirect_info *ri)
97f91a7c 3504{
11393cc9 3505 u32 index = ri->ifindex;
9c270af3 3506 void *fwd = NULL;
4c03bdd7 3507 int err;
97f91a7c
JF
3508
3509 ri->ifindex = 0;
f6069b9a 3510 WRITE_ONCE(ri->map, NULL);
97f91a7c 3511
9c270af3 3512 fwd = __xdp_map_lookup_elem(map, index);
2a68d85f 3513 if (unlikely(!fwd)) {
4c03bdd7 3514 err = -EINVAL;
f5836ca5 3515 goto err;
4c03bdd7 3516 }
e1302542 3517 if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
11393cc9
JF
3518 xdp_do_flush_map();
3519
9c270af3 3520 err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
f5836ca5
JDB
3521 if (unlikely(err))
3522 goto err;
3523
3524 ri->map_to_flush = map;
59a30896 3525 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
f5836ca5
JDB
3526 return 0;
3527err:
59a30896 3528 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
97f91a7c
JF
3529 return err;
3530}
3531
5acaee0a
JF
3532int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
3533 struct bpf_prog *xdp_prog)
814abfab 3534{
0b19cc0a 3535 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
f6069b9a 3536 struct bpf_map *map = READ_ONCE(ri->map);
814abfab 3537
2a68d85f 3538 if (likely(map))
47b123ed 3539 return xdp_do_redirect_map(dev, xdp, xdp_prog, map, ri);
97f91a7c 3540
47b123ed 3541 return xdp_do_redirect_slow(dev, xdp, xdp_prog, ri);
814abfab
JF
3542}
3543EXPORT_SYMBOL_GPL(xdp_do_redirect);
3544
c060bc61
XS
3545static int xdp_do_generic_redirect_map(struct net_device *dev,
3546 struct sk_buff *skb,
02671e23 3547 struct xdp_buff *xdp,
f6069b9a
DB
3548 struct bpf_prog *xdp_prog,
3549 struct bpf_map *map)
6103aa96 3550{
0b19cc0a 3551 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
eb48d682 3552 u32 index = ri->ifindex;
02671e23 3553 void *fwd = NULL;
2facaad6 3554 int err = 0;
6103aa96 3555
6103aa96 3556 ri->ifindex = 0;
f6069b9a 3557 WRITE_ONCE(ri->map, NULL);
96c5508e 3558
9c270af3 3559 fwd = __xdp_map_lookup_elem(map, index);
2facaad6
JDB
3560 if (unlikely(!fwd)) {
3561 err = -EINVAL;
f5836ca5 3562 goto err;
6103aa96
JF
3563 }
3564
9c270af3 3565 if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
6d5fc195
TM
3566 struct bpf_dtab_netdev *dst = fwd;
3567
3568 err = dev_map_generic_redirect(dst, skb, xdp_prog);
3569 if (unlikely(err))
9c270af3 3570 goto err;
02671e23
BT
3571 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
3572 struct xdp_sock *xs = fwd;
3573
3574 err = xsk_generic_rcv(xs, xdp);
3575 if (err)
3576 goto err;
3577 consume_skb(skb);
9c270af3
JDB
3578 } else {
3579 /* TODO: Handle BPF_MAP_TYPE_CPUMAP */
3580 err = -EBADRQC;
f5836ca5 3581 goto err;
2facaad6 3582 }
6103aa96 3583
9c270af3
JDB
3584 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
3585 return 0;
3586err:
3587 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
3588 return err;
3589}
3590
3591int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
02671e23 3592 struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
9c270af3 3593{
0b19cc0a 3594 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
f6069b9a 3595 struct bpf_map *map = READ_ONCE(ri->map);
9c270af3
JDB
3596 u32 index = ri->ifindex;
3597 struct net_device *fwd;
3598 int err = 0;
3599
f6069b9a
DB
3600 if (map)
3601 return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
3602 map);
9c270af3
JDB
3603 ri->ifindex = 0;
3604 fwd = dev_get_by_index_rcu(dev_net(dev), index);
3605 if (unlikely(!fwd)) {
3606 err = -EINVAL;
f5836ca5 3607 goto err;
2facaad6
JDB
3608 }
3609
d8d7218a
TM
3610 err = xdp_ok_fwd_dev(fwd, skb->len);
3611 if (unlikely(err))
9c270af3
JDB
3612 goto err;
3613
2facaad6 3614 skb->dev = fwd;
9c270af3 3615 _trace_xdp_redirect(dev, xdp_prog, index);
02671e23 3616 generic_xdp_tx(skb, xdp_prog);
f5836ca5
JDB
3617 return 0;
3618err:
9c270af3 3619 _trace_xdp_redirect_err(dev, xdp_prog, index, err);
2facaad6 3620 return err;
6103aa96
JF
3621}
3622EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
3623
814abfab
JF
3624BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
3625{
0b19cc0a 3626 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
814abfab
JF
3627
3628 if (unlikely(flags))
3629 return XDP_ABORTED;
3630
3631 ri->ifindex = ifindex;
3632 ri->flags = flags;
f6069b9a 3633 WRITE_ONCE(ri->map, NULL);
e4a8e817 3634
814abfab
JF
3635 return XDP_REDIRECT;
3636}
3637
3638static const struct bpf_func_proto bpf_xdp_redirect_proto = {
3639 .func = bpf_xdp_redirect,
3640 .gpl_only = false,
3641 .ret_type = RET_INTEGER,
3642 .arg1_type = ARG_ANYTHING,
3643 .arg2_type = ARG_ANYTHING,
3644};
3645
f6069b9a
DB
3646BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
3647 u64, flags)
e4a8e817 3648{
0b19cc0a 3649 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
e4a8e817
DB
3650
3651 if (unlikely(flags))
3652 return XDP_ABORTED;
3653
3654 ri->ifindex = ifindex;
3655 ri->flags = flags;
f6069b9a 3656 WRITE_ONCE(ri->map, map);
e4a8e817
DB
3657
3658 return XDP_REDIRECT;
3659}
3660
3661static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
3662 .func = bpf_xdp_redirect_map,
3663 .gpl_only = false,
3664 .ret_type = RET_INTEGER,
3665 .arg1_type = ARG_CONST_MAP_PTR,
3666 .arg2_type = ARG_ANYTHING,
3667 .arg3_type = ARG_ANYTHING,
3668};
3669
555c8a86 3670static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
aa7145c1 3671 unsigned long off, unsigned long len)
555c8a86 3672{
aa7145c1 3673 void *ptr = skb_header_pointer(skb, off, len, dst_buff);
555c8a86
DB
3674
3675 if (unlikely(!ptr))
3676 return len;
3677 if (ptr != dst_buff)
3678 memcpy(dst_buff, ptr, len);
3679
3680 return 0;
3681}
3682
f3694e00
DB
3683BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
3684 u64, flags, void *, meta, u64, meta_size)
555c8a86 3685{
555c8a86 3686 u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
555c8a86
DB
3687
3688 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
3689 return -EINVAL;
3690 if (unlikely(skb_size > skb->len))
3691 return -EFAULT;
3692
3693 return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
3694 bpf_skb_copy);
3695}
3696
3697static const struct bpf_func_proto bpf_skb_event_output_proto = {
3698 .func = bpf_skb_event_output,
3699 .gpl_only = true,
3700 .ret_type = RET_INTEGER,
3701 .arg1_type = ARG_PTR_TO_CTX,
3702 .arg2_type = ARG_CONST_MAP_PTR,
3703 .arg3_type = ARG_ANYTHING,
39f19ebb 3704 .arg4_type = ARG_PTR_TO_MEM,
1728a4f2 3705 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
555c8a86
DB
3706};
3707
c6c33454
DB
3708static unsigned short bpf_tunnel_key_af(u64 flags)
3709{
3710 return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
3711}
3712
f3694e00
DB
3713BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to,
3714 u32, size, u64, flags)
d3aa45ce 3715{
c6c33454
DB
3716 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
3717 u8 compat[sizeof(struct bpf_tunnel_key)];
074f528e
DB
3718 void *to_orig = to;
3719 int err;
d3aa45ce 3720
074f528e
DB
3721 if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) {
3722 err = -EINVAL;
3723 goto err_clear;
3724 }
3725 if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) {
3726 err = -EPROTO;
3727 goto err_clear;
3728 }
c6c33454 3729 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
074f528e 3730 err = -EINVAL;
c6c33454 3731 switch (size) {
4018ab18 3732 case offsetof(struct bpf_tunnel_key, tunnel_label):
c0e760c9 3733 case offsetof(struct bpf_tunnel_key, tunnel_ext):
4018ab18 3734 goto set_compat;
c6c33454
DB
3735 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
3736 /* Fixup deprecated structure layouts here, so we have
3737 * a common path later on.
3738 */
3739 if (ip_tunnel_info_af(info) != AF_INET)
074f528e 3740 goto err_clear;
4018ab18 3741set_compat:
c6c33454
DB
3742 to = (struct bpf_tunnel_key *)compat;
3743 break;
3744 default:
074f528e 3745 goto err_clear;
c6c33454
DB
3746 }
3747 }
d3aa45ce
AS
3748
3749 to->tunnel_id = be64_to_cpu(info->key.tun_id);
c6c33454
DB
3750 to->tunnel_tos = info->key.tos;
3751 to->tunnel_ttl = info->key.ttl;
1fbc2e0c 3752 to->tunnel_ext = 0;
c6c33454 3753
4018ab18 3754 if (flags & BPF_F_TUNINFO_IPV6) {
c6c33454
DB
3755 memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
3756 sizeof(to->remote_ipv6));
4018ab18
DB
3757 to->tunnel_label = be32_to_cpu(info->key.label);
3758 } else {
c6c33454 3759 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
1fbc2e0c
DB
3760 memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
3761 to->tunnel_label = 0;
4018ab18 3762 }
c6c33454
DB
3763
3764 if (unlikely(size != sizeof(struct bpf_tunnel_key)))
074f528e 3765 memcpy(to_orig, to, size);
d3aa45ce
AS
3766
3767 return 0;
074f528e
DB
3768err_clear:
3769 memset(to_orig, 0, size);
3770 return err;
d3aa45ce
AS
3771}
3772
577c50aa 3773static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
d3aa45ce
AS
3774 .func = bpf_skb_get_tunnel_key,
3775 .gpl_only = false,
3776 .ret_type = RET_INTEGER,
3777 .arg1_type = ARG_PTR_TO_CTX,
39f19ebb
AS
3778 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
3779 .arg3_type = ARG_CONST_SIZE,
d3aa45ce
AS
3780 .arg4_type = ARG_ANYTHING,
3781};
3782
f3694e00 3783BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
14ca0751 3784{
14ca0751 3785 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
074f528e 3786 int err;
14ca0751
DB
3787
3788 if (unlikely(!info ||
074f528e
DB
3789 !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
3790 err = -ENOENT;
3791 goto err_clear;
3792 }
3793 if (unlikely(size < info->options_len)) {
3794 err = -ENOMEM;
3795 goto err_clear;
3796 }
14ca0751
DB
3797
3798 ip_tunnel_info_opts_get(to, info);
074f528e
DB
3799 if (size > info->options_len)
3800 memset(to + info->options_len, 0, size - info->options_len);
14ca0751
DB
3801
3802 return info->options_len;
074f528e
DB
3803err_clear:
3804 memset(to, 0, size);
3805 return err;
14ca0751
DB
3806}
3807
3808static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
3809 .func = bpf_skb_get_tunnel_opt,
3810 .gpl_only = false,
3811 .ret_type = RET_INTEGER,
3812 .arg1_type = ARG_PTR_TO_CTX,
39f19ebb
AS
3813 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
3814 .arg3_type = ARG_CONST_SIZE,
14ca0751
DB
3815};
3816
d3aa45ce
AS
3817static struct metadata_dst __percpu *md_dst;
3818
f3694e00
DB
3819BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
3820 const struct bpf_tunnel_key *, from, u32, size, u64, flags)
d3aa45ce 3821{
d3aa45ce 3822 struct metadata_dst *md = this_cpu_ptr(md_dst);
c6c33454 3823 u8 compat[sizeof(struct bpf_tunnel_key)];
d3aa45ce
AS
3824 struct ip_tunnel_info *info;
3825
22080870 3826 if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
77a5196a 3827 BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
d3aa45ce 3828 return -EINVAL;
c6c33454
DB
3829 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
3830 switch (size) {
4018ab18 3831 case offsetof(struct bpf_tunnel_key, tunnel_label):
c0e760c9 3832 case offsetof(struct bpf_tunnel_key, tunnel_ext):
c6c33454
DB
3833 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
3834 /* Fixup deprecated structure layouts here, so we have
3835 * a common path later on.
3836 */
3837 memcpy(compat, from, size);
3838 memset(compat + size, 0, sizeof(compat) - size);
f3694e00 3839 from = (const struct bpf_tunnel_key *) compat;
c6c33454
DB
3840 break;
3841 default:
3842 return -EINVAL;
3843 }
3844 }
c0e760c9
DB
3845 if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) ||
3846 from->tunnel_ext))
4018ab18 3847 return -EINVAL;
d3aa45ce
AS
3848
3849 skb_dst_drop(skb);
3850 dst_hold((struct dst_entry *) md);
3851 skb_dst_set(skb, (struct dst_entry *) md);
3852
3853 info = &md->u.tun_info;
5540fbf4 3854 memset(info, 0, sizeof(*info));
d3aa45ce 3855 info->mode = IP_TUNNEL_INFO_TX;
c6c33454 3856
db3c6139 3857 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
22080870
DB
3858 if (flags & BPF_F_DONT_FRAGMENT)
3859 info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
792f3dd6
WT
3860 if (flags & BPF_F_ZERO_CSUM_TX)
3861 info->key.tun_flags &= ~TUNNEL_CSUM;
77a5196a
WT
3862 if (flags & BPF_F_SEQ_NUMBER)
3863 info->key.tun_flags |= TUNNEL_SEQ;
22080870 3864
d3aa45ce 3865 info->key.tun_id = cpu_to_be64(from->tunnel_id);
c6c33454
DB
3866 info->key.tos = from->tunnel_tos;
3867 info->key.ttl = from->tunnel_ttl;
3868
3869 if (flags & BPF_F_TUNINFO_IPV6) {
3870 info->mode |= IP_TUNNEL_INFO_IPV6;
3871 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
3872 sizeof(from->remote_ipv6));
4018ab18
DB
3873 info->key.label = cpu_to_be32(from->tunnel_label) &
3874 IPV6_FLOWLABEL_MASK;
c6c33454
DB
3875 } else {
3876 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
3877 }
d3aa45ce
AS
3878
3879 return 0;
3880}
3881
577c50aa 3882static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
d3aa45ce
AS
3883 .func = bpf_skb_set_tunnel_key,
3884 .gpl_only = false,
3885 .ret_type = RET_INTEGER,
3886 .arg1_type = ARG_PTR_TO_CTX,
39f19ebb
AS
3887 .arg2_type = ARG_PTR_TO_MEM,
3888 .arg3_type = ARG_CONST_SIZE,
d3aa45ce
AS
3889 .arg4_type = ARG_ANYTHING,
3890};
3891
f3694e00
DB
3892BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
3893 const u8 *, from, u32, size)
14ca0751 3894{
14ca0751
DB
3895 struct ip_tunnel_info *info = skb_tunnel_info(skb);
3896 const struct metadata_dst *md = this_cpu_ptr(md_dst);
3897
3898 if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
3899 return -EINVAL;
fca5fdf6 3900 if (unlikely(size > IP_TUNNEL_OPTS_MAX))
14ca0751
DB
3901 return -ENOMEM;
3902
256c87c1 3903 ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
14ca0751
DB
3904
3905 return 0;
3906}
3907
3908static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
3909 .func = bpf_skb_set_tunnel_opt,
3910 .gpl_only = false,
3911 .ret_type = RET_INTEGER,
3912 .arg1_type = ARG_PTR_TO_CTX,
39f19ebb
AS
3913 .arg2_type = ARG_PTR_TO_MEM,
3914 .arg3_type = ARG_CONST_SIZE,
14ca0751
DB
3915};
3916
3917static const struct bpf_func_proto *
3918bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
d3aa45ce
AS
3919{
3920 if (!md_dst) {
d66f2b91
JK
3921 struct metadata_dst __percpu *tmp;
3922
3923 tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
3924 METADATA_IP_TUNNEL,
3925 GFP_KERNEL);
3926 if (!tmp)
d3aa45ce 3927 return NULL;
d66f2b91
JK
3928 if (cmpxchg(&md_dst, NULL, tmp))
3929 metadata_dst_free_percpu(tmp);
d3aa45ce 3930 }
14ca0751
DB
3931
3932 switch (which) {
3933 case BPF_FUNC_skb_set_tunnel_key:
3934 return &bpf_skb_set_tunnel_key_proto;
3935 case BPF_FUNC_skb_set_tunnel_opt:
3936 return &bpf_skb_set_tunnel_opt_proto;
3937 default:
3938 return NULL;
3939 }
d3aa45ce
AS
3940}
3941
f3694e00
DB
3942BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map,
3943 u32, idx)
4a482f34 3944{
4a482f34
MKL
3945 struct bpf_array *array = container_of(map, struct bpf_array, map);
3946 struct cgroup *cgrp;
3947 struct sock *sk;
4a482f34 3948
2d48c5f9 3949 sk = skb_to_full_sk(skb);
4a482f34
MKL
3950 if (!sk || !sk_fullsock(sk))
3951 return -ENOENT;
f3694e00 3952 if (unlikely(idx >= array->map.max_entries))
4a482f34
MKL
3953 return -E2BIG;
3954
f3694e00 3955 cgrp = READ_ONCE(array->ptrs[idx]);
4a482f34
MKL
3956 if (unlikely(!cgrp))
3957 return -EAGAIN;
3958
54fd9c2d 3959 return sk_under_cgroup_hierarchy(sk, cgrp);
4a482f34
MKL
3960}
3961
747ea55e
DB
3962static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
3963 .func = bpf_skb_under_cgroup,
4a482f34
MKL
3964 .gpl_only = false,
3965 .ret_type = RET_INTEGER,
3966 .arg1_type = ARG_PTR_TO_CTX,
3967 .arg2_type = ARG_CONST_MAP_PTR,
3968 .arg3_type = ARG_ANYTHING,
3969};
4a482f34 3970
cb20b08e
DB
3971#ifdef CONFIG_SOCK_CGROUP_DATA
3972BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
3973{
3974 struct sock *sk = skb_to_full_sk(skb);
3975 struct cgroup *cgrp;
3976
3977 if (!sk || !sk_fullsock(sk))
3978 return 0;
3979
3980 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
3981 return cgrp->kn->id.id;
3982}
3983
3984static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
3985 .func = bpf_skb_cgroup_id,
3986 .gpl_only = false,
3987 .ret_type = RET_INTEGER,
3988 .arg1_type = ARG_PTR_TO_CTX,
3989};
77236281
AI
3990
3991BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
3992 ancestor_level)
3993{
3994 struct sock *sk = skb_to_full_sk(skb);
3995 struct cgroup *ancestor;
3996 struct cgroup *cgrp;
3997
3998 if (!sk || !sk_fullsock(sk))
3999 return 0;
4000
4001 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
4002 ancestor = cgroup_ancestor(cgrp, ancestor_level);
4003 if (!ancestor)
4004 return 0;
4005
4006 return ancestor->kn->id.id;
4007}
4008
4009static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
4010 .func = bpf_skb_ancestor_cgroup_id,
4011 .gpl_only = false,
4012 .ret_type = RET_INTEGER,
4013 .arg1_type = ARG_PTR_TO_CTX,
4014 .arg2_type = ARG_ANYTHING,
4015};
cb20b08e
DB
4016#endif
4017
4de16969
DB
4018static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
4019 unsigned long off, unsigned long len)
4020{
4021 memcpy(dst_buff, src_buff + off, len);
4022 return 0;
4023}
4024
f3694e00
DB
4025BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
4026 u64, flags, void *, meta, u64, meta_size)
4de16969 4027{
4de16969 4028 u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
4de16969
DB
4029
4030 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
4031 return -EINVAL;
4032 if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
4033 return -EFAULT;
4034
9c471370
MKL
4035 return bpf_event_output(map, flags, meta, meta_size, xdp->data,
4036 xdp_size, bpf_xdp_copy);
4de16969
DB
4037}
4038
4039static const struct bpf_func_proto bpf_xdp_event_output_proto = {
4040 .func = bpf_xdp_event_output,
4041 .gpl_only = true,
4042 .ret_type = RET_INTEGER,
4043 .arg1_type = ARG_PTR_TO_CTX,
4044 .arg2_type = ARG_CONST_MAP_PTR,
4045 .arg3_type = ARG_ANYTHING,
39f19ebb 4046 .arg4_type = ARG_PTR_TO_MEM,
1728a4f2 4047 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
4de16969
DB
4048};
4049
91b8270f
CF
4050BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
4051{
4052 return skb->sk ? sock_gen_cookie(skb->sk) : 0;
4053}
4054
4055static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
4056 .func = bpf_get_socket_cookie,
4057 .gpl_only = false,
4058 .ret_type = RET_INTEGER,
4059 .arg1_type = ARG_PTR_TO_CTX,
4060};
4061
d692f113
AI
4062BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
4063{
4064 return sock_gen_cookie(ctx->sk);
4065}
4066
4067static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
4068 .func = bpf_get_socket_cookie_sock_addr,
4069 .gpl_only = false,
4070 .ret_type = RET_INTEGER,
4071 .arg1_type = ARG_PTR_TO_CTX,
4072};
4073
4074BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
4075{
4076 return sock_gen_cookie(ctx->sk);
4077}
4078
4079static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
4080 .func = bpf_get_socket_cookie_sock_ops,
4081 .gpl_only = false,
4082 .ret_type = RET_INTEGER,
4083 .arg1_type = ARG_PTR_TO_CTX,
4084};
4085
6acc5c29
CF
4086BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
4087{
4088 struct sock *sk = sk_to_full_sk(skb->sk);
4089 kuid_t kuid;
4090
4091 if (!sk || !sk_fullsock(sk))
4092 return overflowuid;
4093 kuid = sock_net_uid(sock_net(sk), sk);
4094 return from_kuid_munged(sock_net(sk)->user_ns, kuid);
4095}
4096
4097static const struct bpf_func_proto bpf_get_socket_uid_proto = {
4098 .func = bpf_get_socket_uid,
4099 .gpl_only = false,
4100 .ret_type = RET_INTEGER,
4101 .arg1_type = ARG_PTR_TO_CTX,
4102};
4103
a5a3a828
SV
4104BPF_CALL_5(bpf_sockopt_event_output, struct bpf_sock_ops_kern *, bpf_sock,
4105 struct bpf_map *, map, u64, flags, void *, data, u64, size)
4106{
4107 if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
4108 return -EINVAL;
4109
4110 return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
4111}
4112
4113static const struct bpf_func_proto bpf_sockopt_event_output_proto = {
4114 .func = bpf_sockopt_event_output,
4115 .gpl_only = true,
4116 .ret_type = RET_INTEGER,
4117 .arg1_type = ARG_PTR_TO_CTX,
4118 .arg2_type = ARG_CONST_MAP_PTR,
4119 .arg3_type = ARG_ANYTHING,
4120 .arg4_type = ARG_PTR_TO_MEM,
4121 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
4122};
4123
8c4b4c7e
LB
4124BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
4125 int, level, int, optname, char *, optval, int, optlen)
4126{
4127 struct sock *sk = bpf_sock->sk;
4128 int ret = 0;
4129 int val;
4130
4131 if (!sk_fullsock(sk))
4132 return -EINVAL;
4133
4134 if (level == SOL_SOCKET) {
4135 if (optlen != sizeof(int))
4136 return -EINVAL;
4137 val = *((int *)optval);
4138
4139 /* Only some socketops are supported */
4140 switch (optname) {
4141 case SO_RCVBUF:
c9e45767 4142 val = min_t(u32, val, sysctl_rmem_max);
8c4b4c7e
LB
4143 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
4144 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
4145 break;
4146 case SO_SNDBUF:
c9e45767 4147 val = min_t(u32, val, sysctl_wmem_max);
8c4b4c7e
LB
4148 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
4149 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
4150 break;
76a9ebe8 4151 case SO_MAX_PACING_RATE: /* 32bit version */
e224c390
YC
4152 if (val != ~0U)
4153 cmpxchg(&sk->sk_pacing_status,
4154 SK_PACING_NONE,
4155 SK_PACING_NEEDED);
76a9ebe8 4156 sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val;
8c4b4c7e
LB
4157 sk->sk_pacing_rate = min(sk->sk_pacing_rate,
4158 sk->sk_max_pacing_rate);
4159 break;
4160 case SO_PRIORITY:
4161 sk->sk_priority = val;
4162 break;
4163 case SO_RCVLOWAT:
4164 if (val < 0)
4165 val = INT_MAX;
4166 sk->sk_rcvlowat = val ? : 1;
4167 break;
4168 case SO_MARK:
f4924f24
PO
4169 if (sk->sk_mark != val) {
4170 sk->sk_mark = val;
4171 sk_dst_reset(sk);
4172 }
8c4b4c7e
LB
4173 break;
4174 default:
4175 ret = -EINVAL;
4176 }
a5192c52 4177#ifdef CONFIG_INET
6f5c39fa
NS
4178 } else if (level == SOL_IP) {
4179 if (optlen != sizeof(int) || sk->sk_family != AF_INET)
4180 return -EINVAL;
4181
4182 val = *((int *)optval);
4183 /* Only some options are supported */
4184 switch (optname) {
4185 case IP_TOS:
4186 if (val < -1 || val > 0xff) {
4187 ret = -EINVAL;
4188 } else {
4189 struct inet_sock *inet = inet_sk(sk);
4190
4191 if (val == -1)
4192 val = 0;
4193 inet->tos = val;
4194 }
4195 break;
4196 default:
4197 ret = -EINVAL;
4198 }
6f9bd3d7
LB
4199#if IS_ENABLED(CONFIG_IPV6)
4200 } else if (level == SOL_IPV6) {
4201 if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
4202 return -EINVAL;
4203
4204 val = *((int *)optval);
4205 /* Only some options are supported */
4206 switch (optname) {
4207 case IPV6_TCLASS:
4208 if (val < -1 || val > 0xff) {
4209 ret = -EINVAL;
4210 } else {
4211 struct ipv6_pinfo *np = inet6_sk(sk);
4212
4213 if (val == -1)
4214 val = 0;
4215 np->tclass = val;
4216 }
4217 break;
4218 default:
4219 ret = -EINVAL;
4220 }
4221#endif
8c4b4c7e
LB
4222 } else if (level == SOL_TCP &&
4223 sk->sk_prot->setsockopt == tcp_setsockopt) {
91b5b21c
LB
4224 if (optname == TCP_CONGESTION) {
4225 char name[TCP_CA_NAME_MAX];
ebfa00c5 4226 bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN;
91b5b21c
LB
4227
4228 strncpy(name, optval, min_t(long, optlen,
4229 TCP_CA_NAME_MAX-1));
4230 name[TCP_CA_NAME_MAX-1] = 0;
6f9bd3d7
LB
4231 ret = tcp_set_congestion_control(sk, name, false,
4232 reinit);
91b5b21c 4233 } else {
fc747810
LB
4234 struct tcp_sock *tp = tcp_sk(sk);
4235
4236 if (optlen != sizeof(int))
4237 return -EINVAL;
4238
4239 val = *((int *)optval);
4240 /* Only some options are supported */
4241 switch (optname) {
4242 case TCP_BPF_IW:
31aa6503 4243 if (val <= 0 || tp->data_segs_out > tp->syn_data)
fc747810
LB
4244 ret = -EINVAL;
4245 else
4246 tp->snd_cwnd = val;
4247 break;
13bf9641
LB
4248 case TCP_BPF_SNDCWND_CLAMP:
4249 if (val <= 0) {
4250 ret = -EINVAL;
4251 } else {
4252 tp->snd_cwnd_clamp = val;
4253 tp->snd_ssthresh = val;
4254 }
6d3f06a0 4255 break;
1e215300
NS
4256 case TCP_SAVE_SYN:
4257 if (val < 0 || val > 1)
4258 ret = -EINVAL;
4259 else
4260 tp->save_syn = val;
4261 break;
fc747810
LB
4262 default:
4263 ret = -EINVAL;
4264 }
91b5b21c 4265 }
91b5b21c 4266#endif
8c4b4c7e
LB
4267 } else {
4268 ret = -EINVAL;
4269 }
4270 return ret;
4271}
4272
4273static const struct bpf_func_proto bpf_setsockopt_proto = {
4274 .func = bpf_setsockopt,
cd86d1fd 4275 .gpl_only = false,
8c4b4c7e
LB
4276 .ret_type = RET_INTEGER,
4277 .arg1_type = ARG_PTR_TO_CTX,
4278 .arg2_type = ARG_ANYTHING,
4279 .arg3_type = ARG_ANYTHING,
4280 .arg4_type = ARG_PTR_TO_MEM,
4281 .arg5_type = ARG_CONST_SIZE,
4282};
4283
cd86d1fd
LB
4284BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
4285 int, level, int, optname, char *, optval, int, optlen)
4286{
4287 struct sock *sk = bpf_sock->sk;
cd86d1fd
LB
4288
4289 if (!sk_fullsock(sk))
4290 goto err_clear;
cd86d1fd
LB
4291#ifdef CONFIG_INET
4292 if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
1edb6e03
AR
4293 struct inet_connection_sock *icsk;
4294 struct tcp_sock *tp;
4295
1e215300
NS
4296 switch (optname) {
4297 case TCP_CONGESTION:
4298 icsk = inet_csk(sk);
cd86d1fd
LB
4299
4300 if (!icsk->icsk_ca_ops || optlen <= 1)
4301 goto err_clear;
4302 strncpy(optval, icsk->icsk_ca_ops->name, optlen);
4303 optval[optlen - 1] = 0;
1e215300
NS
4304 break;
4305 case TCP_SAVED_SYN:
4306 tp = tcp_sk(sk);
4307
4308 if (optlen <= 0 || !tp->saved_syn ||
4309 optlen > tp->saved_syn[0])
4310 goto err_clear;
4311 memcpy(optval, tp->saved_syn + 1, optlen);
4312 break;
4313 default:
cd86d1fd
LB
4314 goto err_clear;
4315 }
6f5c39fa
NS
4316 } else if (level == SOL_IP) {
4317 struct inet_sock *inet = inet_sk(sk);
4318
4319 if (optlen != sizeof(int) || sk->sk_family != AF_INET)
4320 goto err_clear;
4321
4322 /* Only some options are supported */
4323 switch (optname) {
4324 case IP_TOS:
4325 *((int *)optval) = (int)inet->tos;
4326 break;
4327 default:
4328 goto err_clear;
4329 }
6f9bd3d7
LB
4330#if IS_ENABLED(CONFIG_IPV6)
4331 } else if (level == SOL_IPV6) {
4332 struct ipv6_pinfo *np = inet6_sk(sk);
4333
4334 if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
4335 goto err_clear;
4336
4337 /* Only some options are supported */
4338 switch (optname) {
4339 case IPV6_TCLASS:
4340 *((int *)optval) = (int)np->tclass;
4341 break;
4342 default:
4343 goto err_clear;
4344 }
4345#endif
cd86d1fd
LB
4346 } else {
4347 goto err_clear;
4348 }
aa2bc739 4349 return 0;
cd86d1fd
LB
4350#endif
4351err_clear:
4352 memset(optval, 0, optlen);
4353 return -EINVAL;
4354}
4355
4356static const struct bpf_func_proto bpf_getsockopt_proto = {
4357 .func = bpf_getsockopt,
4358 .gpl_only = false,
4359 .ret_type = RET_INTEGER,
4360 .arg1_type = ARG_PTR_TO_CTX,
4361 .arg2_type = ARG_ANYTHING,
4362 .arg3_type = ARG_ANYTHING,
4363 .arg4_type = ARG_PTR_TO_UNINIT_MEM,
4364 .arg5_type = ARG_CONST_SIZE,
4365};
4366
b13d8807
LB
4367BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
4368 int, argval)
4369{
4370 struct sock *sk = bpf_sock->sk;
4371 int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
4372
a7dcdf6e 4373 if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
b13d8807
LB
4374 return -EINVAL;
4375
b13d8807
LB
4376 if (val)
4377 tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
4378
4379 return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
b13d8807
LB
4380}
4381
4382static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
4383 .func = bpf_sock_ops_cb_flags_set,
4384 .gpl_only = false,
4385 .ret_type = RET_INTEGER,
4386 .arg1_type = ARG_PTR_TO_CTX,
4387 .arg2_type = ARG_ANYTHING,
4388};
4389
d74bad4e
AI
4390const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
4391EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
4392
4393BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
4394 int, addr_len)
4395{
4396#ifdef CONFIG_INET
4397 struct sock *sk = ctx->sk;
4398 int err;
4399
4400 /* Binding to port can be expensive so it's prohibited in the helper.
4401 * Only binding to IP is supported.
4402 */
4403 err = -EINVAL;
4404 if (addr->sa_family == AF_INET) {
4405 if (addr_len < sizeof(struct sockaddr_in))
4406 return err;
4407 if (((struct sockaddr_in *)addr)->sin_port != htons(0))
4408 return err;
4409 return __inet_bind(sk, addr, addr_len, true, false);
4410#if IS_ENABLED(CONFIG_IPV6)
4411 } else if (addr->sa_family == AF_INET6) {
4412 if (addr_len < SIN6_LEN_RFC2133)
4413 return err;
4414 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
4415 return err;
4416 /* ipv6_bpf_stub cannot be NULL, since it's called from
4417 * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
4418 */
4419 return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false);
4420#endif /* CONFIG_IPV6 */
4421 }
4422#endif /* CONFIG_INET */
4423
4424 return -EAFNOSUPPORT;
4425}
4426
4427static const struct bpf_func_proto bpf_bind_proto = {
4428 .func = bpf_bind,
4429 .gpl_only = false,
4430 .ret_type = RET_INTEGER,
4431 .arg1_type = ARG_PTR_TO_CTX,
4432 .arg2_type = ARG_PTR_TO_MEM,
4433 .arg3_type = ARG_CONST_SIZE,
4434};
4435
12bed760
EB
4436#ifdef CONFIG_XFRM
4437BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
4438 struct bpf_xfrm_state *, to, u32, size, u64, flags)
4439{
4440 const struct sec_path *sp = skb_sec_path(skb);
4441 const struct xfrm_state *x;
4442
4443 if (!sp || unlikely(index >= sp->len || flags))
4444 goto err_clear;
4445
4446 x = sp->xvec[index];
4447
4448 if (unlikely(size != sizeof(struct bpf_xfrm_state)))
4449 goto err_clear;
4450
4451 to->reqid = x->props.reqid;
4452 to->spi = x->id.spi;
4453 to->family = x->props.family;
1fbc2e0c
DB
4454 to->ext = 0;
4455
12bed760
EB
4456 if (to->family == AF_INET6) {
4457 memcpy(to->remote_ipv6, x->props.saddr.a6,
4458 sizeof(to->remote_ipv6));
4459 } else {
4460 to->remote_ipv4 = x->props.saddr.a4;
1fbc2e0c 4461 memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
12bed760
EB
4462 }
4463
4464 return 0;
4465err_clear:
4466 memset(to, 0, size);
4467 return -EINVAL;
4468}
4469
4470static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
4471 .func = bpf_skb_get_xfrm_state,
4472 .gpl_only = false,
4473 .ret_type = RET_INTEGER,
4474 .arg1_type = ARG_PTR_TO_CTX,
4475 .arg2_type = ARG_ANYTHING,
4476 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
4477 .arg4_type = ARG_CONST_SIZE,
4478 .arg5_type = ARG_ANYTHING,
4479};
4480#endif
4481
87f5fc7e
DA
4482#if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
4483static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
4484 const struct neighbour *neigh,
4485 const struct net_device *dev)
4486{
4487 memcpy(params->dmac, neigh->ha, ETH_ALEN);
4488 memcpy(params->smac, dev->dev_addr, ETH_ALEN);
4489 params->h_vlan_TCI = 0;
4490 params->h_vlan_proto = 0;
4c79579b 4491 params->ifindex = dev->ifindex;
87f5fc7e 4492
4c79579b 4493 return 0;
87f5fc7e
DA
4494}
4495#endif
4496
4497#if IS_ENABLED(CONFIG_INET)
4498static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4f74fede 4499 u32 flags, bool check_mtu)
87f5fc7e
DA
4500{
4501 struct in_device *in_dev;
4502 struct neighbour *neigh;
4503 struct net_device *dev;
4504 struct fib_result res;
4505 struct fib_nh *nh;
4506 struct flowi4 fl4;
4507 int err;
4f74fede 4508 u32 mtu;
87f5fc7e
DA
4509
4510 dev = dev_get_by_index_rcu(net, params->ifindex);
4511 if (unlikely(!dev))
4512 return -ENODEV;
4513
4514 /* verify forwarding is enabled on this interface */
4515 in_dev = __in_dev_get_rcu(dev);
4516 if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
4c79579b 4517 return BPF_FIB_LKUP_RET_FWD_DISABLED;
87f5fc7e
DA
4518
4519 if (flags & BPF_FIB_LOOKUP_OUTPUT) {
4520 fl4.flowi4_iif = 1;
4521 fl4.flowi4_oif = params->ifindex;
4522 } else {
4523 fl4.flowi4_iif = params->ifindex;
4524 fl4.flowi4_oif = 0;
4525 }
4526 fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
4527 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
4528 fl4.flowi4_flags = 0;
4529
4530 fl4.flowi4_proto = params->l4_protocol;
4531 fl4.daddr = params->ipv4_dst;
4532 fl4.saddr = params->ipv4_src;
4533 fl4.fl4_sport = params->sport;
4534 fl4.fl4_dport = params->dport;
4535
4536 if (flags & BPF_FIB_LOOKUP_DIRECT) {
4537 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
4538 struct fib_table *tb;
4539
4540 tb = fib_get_table(net, tbid);
4541 if (unlikely(!tb))
4c79579b 4542 return BPF_FIB_LKUP_RET_NOT_FWDED;
87f5fc7e
DA
4543
4544 err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
4545 } else {
4546 fl4.flowi4_mark = 0;
4547 fl4.flowi4_secid = 0;
4548 fl4.flowi4_tun_key.tun_id = 0;
4549 fl4.flowi4_uid = sock_net_uid(net, NULL);
4550
4551 err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
4552 }
4553
4c79579b
DA
4554 if (err) {
4555 /* map fib lookup errors to RTN_ type */
4556 if (err == -EINVAL)
4557 return BPF_FIB_LKUP_RET_BLACKHOLE;
4558 if (err == -EHOSTUNREACH)
4559 return BPF_FIB_LKUP_RET_UNREACHABLE;
4560 if (err == -EACCES)
4561 return BPF_FIB_LKUP_RET_PROHIBIT;
4562
4563 return BPF_FIB_LKUP_RET_NOT_FWDED;
4564 }
4565
4566 if (res.type != RTN_UNICAST)
4567 return BPF_FIB_LKUP_RET_NOT_FWDED;
87f5fc7e
DA
4568
4569 if (res.fi->fib_nhs > 1)
4570 fib_select_path(net, &res, &fl4, NULL);
4571
4f74fede
DA
4572 if (check_mtu) {
4573 mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
4574 if (params->tot_len > mtu)
4c79579b 4575 return BPF_FIB_LKUP_RET_FRAG_NEEDED;
4f74fede
DA
4576 }
4577
87f5fc7e
DA
4578 nh = &res.fi->fib_nh[res.nh_sel];
4579
4580 /* do not handle lwt encaps right now */
4581 if (nh->nh_lwtstate)
4c79579b 4582 return BPF_FIB_LKUP_RET_UNSUPP_LWT;
87f5fc7e
DA
4583
4584 dev = nh->nh_dev;
87f5fc7e
DA
4585 if (nh->nh_gw)
4586 params->ipv4_dst = nh->nh_gw;
4587
4588 params->rt_metric = res.fi->fib_priority;
4589
4590 /* xdp and cls_bpf programs are run in RCU-bh so
4591 * rcu_read_lock_bh is not needed here
4592 */
4593 neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
4c79579b
DA
4594 if (!neigh)
4595 return BPF_FIB_LKUP_RET_NO_NEIGH;
87f5fc7e 4596
4c79579b 4597 return bpf_fib_set_fwd_params(params, neigh, dev);
87f5fc7e
DA
4598}
4599#endif
4600
4601#if IS_ENABLED(CONFIG_IPV6)
4602static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4f74fede 4603 u32 flags, bool check_mtu)
87f5fc7e
DA
4604{
4605 struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
4606 struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
4607 struct neighbour *neigh;
4608 struct net_device *dev;
4609 struct inet6_dev *idev;
4610 struct fib6_info *f6i;
4611 struct flowi6 fl6;
4612 int strict = 0;
4613 int oif;
4f74fede 4614 u32 mtu;
87f5fc7e
DA
4615
4616 /* link local addresses are never forwarded */
4617 if (rt6_need_strict(dst) || rt6_need_strict(src))
4c79579b 4618 return BPF_FIB_LKUP_RET_NOT_FWDED;
87f5fc7e
DA
4619
4620 dev = dev_get_by_index_rcu(net, params->ifindex);
4621 if (unlikely(!dev))
4622 return -ENODEV;
4623
4624 idev = __in6_dev_get_safely(dev);
4625 if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
4c79579b 4626 return BPF_FIB_LKUP_RET_FWD_DISABLED;
87f5fc7e
DA
4627
4628 if (flags & BPF_FIB_LOOKUP_OUTPUT) {
4629 fl6.flowi6_iif = 1;
4630 oif = fl6.flowi6_oif = params->ifindex;
4631 } else {
4632 oif = fl6.flowi6_iif = params->ifindex;
4633 fl6.flowi6_oif = 0;
4634 strict = RT6_LOOKUP_F_HAS_SADDR;
4635 }
bd3a08aa 4636 fl6.flowlabel = params->flowinfo;
87f5fc7e
DA
4637 fl6.flowi6_scope = 0;
4638 fl6.flowi6_flags = 0;
4639 fl6.mp_hash = 0;
4640
4641 fl6.flowi6_proto = params->l4_protocol;
4642 fl6.daddr = *dst;
4643 fl6.saddr = *src;
4644 fl6.fl6_sport = params->sport;
4645 fl6.fl6_dport = params->dport;
4646
4647 if (flags & BPF_FIB_LOOKUP_DIRECT) {
4648 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
4649 struct fib6_table *tb;
4650
4651 tb = ipv6_stub->fib6_get_table(net, tbid);
4652 if (unlikely(!tb))
4c79579b 4653 return BPF_FIB_LKUP_RET_NOT_FWDED;
87f5fc7e
DA
4654
4655 f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
4656 } else {
4657 fl6.flowi6_mark = 0;
4658 fl6.flowi6_secid = 0;
4659 fl6.flowi6_tun_key.tun_id = 0;
4660 fl6.flowi6_uid = sock_net_uid(net, NULL);
4661
4662 f6i = ipv6_stub->fib6_lookup(net, oif, &fl6, strict);
4663 }
4664
4665 if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
4c79579b
DA
4666 return BPF_FIB_LKUP_RET_NOT_FWDED;
4667
4668 if (unlikely(f6i->fib6_flags & RTF_REJECT)) {
4669 switch (f6i->fib6_type) {
4670 case RTN_BLACKHOLE:
4671 return BPF_FIB_LKUP_RET_BLACKHOLE;
4672 case RTN_UNREACHABLE:
4673 return BPF_FIB_LKUP_RET_UNREACHABLE;
4674 case RTN_PROHIBIT:
4675 return BPF_FIB_LKUP_RET_PROHIBIT;
4676 default:
4677 return BPF_FIB_LKUP_RET_NOT_FWDED;
4678 }
4679 }
87f5fc7e 4680
4c79579b
DA
4681 if (f6i->fib6_type != RTN_UNICAST)
4682 return BPF_FIB_LKUP_RET_NOT_FWDED;
87f5fc7e
DA
4683
4684 if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
4685 f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
4686 fl6.flowi6_oif, NULL,
4687 strict);
4688
4f74fede
DA
4689 if (check_mtu) {
4690 mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
4691 if (params->tot_len > mtu)
4c79579b 4692 return BPF_FIB_LKUP_RET_FRAG_NEEDED;
4f74fede
DA
4693 }
4694
87f5fc7e 4695 if (f6i->fib6_nh.nh_lwtstate)
4c79579b 4696 return BPF_FIB_LKUP_RET_UNSUPP_LWT;
87f5fc7e
DA
4697
4698 if (f6i->fib6_flags & RTF_GATEWAY)
4699 *dst = f6i->fib6_nh.nh_gw;
4700
4701 dev = f6i->fib6_nh.nh_dev;
4702 params->rt_metric = f6i->fib6_metric;
4703
4704 /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
4705 * not needed here. Can not use __ipv6_neigh_lookup_noref here
4706 * because we need to get nd_tbl via the stub
4707 */
4708 neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
4709 ndisc_hashfn, dst, dev);
4c79579b
DA
4710 if (!neigh)
4711 return BPF_FIB_LKUP_RET_NO_NEIGH;
87f5fc7e 4712
4c79579b 4713 return bpf_fib_set_fwd_params(params, neigh, dev);
87f5fc7e
DA
4714}
4715#endif
4716
4717BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
4718 struct bpf_fib_lookup *, params, int, plen, u32, flags)
4719{
4720 if (plen < sizeof(*params))
4721 return -EINVAL;
4722
9ce64f19
DA
4723 if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
4724 return -EINVAL;
4725
87f5fc7e
DA
4726 switch (params->family) {
4727#if IS_ENABLED(CONFIG_INET)
4728 case AF_INET:
4729 return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
4f74fede 4730 flags, true);
87f5fc7e
DA
4731#endif
4732#if IS_ENABLED(CONFIG_IPV6)
4733 case AF_INET6:
4734 return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
4f74fede 4735 flags, true);
87f5fc7e
DA
4736#endif
4737 }
bcece5dc 4738 return -EAFNOSUPPORT;
87f5fc7e
DA
4739}
4740
4741static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
4742 .func = bpf_xdp_fib_lookup,
4743 .gpl_only = true,
4744 .ret_type = RET_INTEGER,
4745 .arg1_type = ARG_PTR_TO_CTX,
4746 .arg2_type = ARG_PTR_TO_MEM,
4747 .arg3_type = ARG_CONST_SIZE,
4748 .arg4_type = ARG_ANYTHING,
4749};
4750
4751BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
4752 struct bpf_fib_lookup *, params, int, plen, u32, flags)
4753{
4f74fede 4754 struct net *net = dev_net(skb->dev);
4c79579b 4755 int rc = -EAFNOSUPPORT;
4f74fede 4756
87f5fc7e
DA
4757 if (plen < sizeof(*params))
4758 return -EINVAL;
4759
9ce64f19
DA
4760 if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
4761 return -EINVAL;
4762
87f5fc7e
DA
4763 switch (params->family) {
4764#if IS_ENABLED(CONFIG_INET)
4765 case AF_INET:
4c79579b 4766 rc = bpf_ipv4_fib_lookup(net, params, flags, false);
4f74fede 4767 break;
87f5fc7e
DA
4768#endif
4769#if IS_ENABLED(CONFIG_IPV6)
4770 case AF_INET6:
4c79579b 4771 rc = bpf_ipv6_fib_lookup(net, params, flags, false);
4f74fede 4772 break;
87f5fc7e
DA
4773#endif
4774 }
4f74fede 4775
4c79579b 4776 if (!rc) {
4f74fede
DA
4777 struct net_device *dev;
4778
4c79579b 4779 dev = dev_get_by_index_rcu(net, params->ifindex);
4f74fede 4780 if (!is_skb_forwardable(dev, skb))
4c79579b 4781 rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
4f74fede
DA
4782 }
4783
4c79579b 4784 return rc;
87f5fc7e
DA
4785}
4786
4787static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
4788 .func = bpf_skb_fib_lookup,
4789 .gpl_only = true,
4790 .ret_type = RET_INTEGER,
4791 .arg1_type = ARG_PTR_TO_CTX,
4792 .arg2_type = ARG_PTR_TO_MEM,
4793 .arg3_type = ARG_CONST_SIZE,
4794 .arg4_type = ARG_ANYTHING,
4795};
4796
fe94cc29
MX
4797#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
4798static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
4799{
4800 int err;
4801 struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;
4802
4803 if (!seg6_validate_srh(srh, len))
4804 return -EINVAL;
4805
4806 switch (type) {
4807 case BPF_LWT_ENCAP_SEG6_INLINE:
4808 if (skb->protocol != htons(ETH_P_IPV6))
4809 return -EBADMSG;
4810
4811 err = seg6_do_srh_inline(skb, srh);
4812 break;
4813 case BPF_LWT_ENCAP_SEG6:
4814 skb_reset_inner_headers(skb);
4815 skb->encapsulation = 1;
4816 err = seg6_do_srh_encap(skb, srh, IPPROTO_IPV6);
4817 break;
4818 default:
4819 return -EINVAL;
4820 }
4821
4822 bpf_compute_data_pointers(skb);
4823 if (err)
4824 return err;
4825
4826 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
4827 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
4828
4829 return seg6_lookup_nexthop(skb, NULL, 0);
4830}
4831#endif /* CONFIG_IPV6_SEG6_BPF */
4832
3e0bd37c
PO
4833#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
4834static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
4835 bool ingress)
4836{
52f27877 4837 return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
3e0bd37c
PO
4838}
4839#endif
4840
4841BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
fe94cc29
MX
4842 u32, len)
4843{
4844 switch (type) {
4845#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
4846 case BPF_LWT_ENCAP_SEG6:
4847 case BPF_LWT_ENCAP_SEG6_INLINE:
4848 return bpf_push_seg6_encap(skb, type, hdr, len);
3e0bd37c
PO
4849#endif
4850#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
4851 case BPF_LWT_ENCAP_IP:
4852 return bpf_push_ip_encap(skb, hdr, len, true /* ingress */);
fe94cc29
MX
4853#endif
4854 default:
4855 return -EINVAL;
4856 }
4857}
4858
3e0bd37c
PO
4859BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
4860 void *, hdr, u32, len)
4861{
4862 switch (type) {
4863#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
4864 case BPF_LWT_ENCAP_IP:
4865 return bpf_push_ip_encap(skb, hdr, len, false /* egress */);
fe94cc29
MX
4866#endif
4867 default:
4868 return -EINVAL;
4869 }
4870}
4871
3e0bd37c
PO
4872static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
4873 .func = bpf_lwt_in_push_encap,
4874 .gpl_only = false,
4875 .ret_type = RET_INTEGER,
4876 .arg1_type = ARG_PTR_TO_CTX,
4877 .arg2_type = ARG_ANYTHING,
4878 .arg3_type = ARG_PTR_TO_MEM,
4879 .arg4_type = ARG_CONST_SIZE
4880};
4881
4882static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
4883 .func = bpf_lwt_xmit_push_encap,
fe94cc29
MX
4884 .gpl_only = false,
4885 .ret_type = RET_INTEGER,
4886 .arg1_type = ARG_PTR_TO_CTX,
4887 .arg2_type = ARG_ANYTHING,
4888 .arg3_type = ARG_PTR_TO_MEM,
4889 .arg4_type = ARG_CONST_SIZE
4890};
4891
61d76980 4892#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
fe94cc29
MX
4893BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
4894 const void *, from, u32, len)
4895{
fe94cc29
MX
4896 struct seg6_bpf_srh_state *srh_state =
4897 this_cpu_ptr(&seg6_bpf_srh_states);
486cdf21 4898 struct ipv6_sr_hdr *srh = srh_state->srh;
fe94cc29 4899 void *srh_tlvs, *srh_end, *ptr;
fe94cc29
MX
4900 int srhoff = 0;
4901
486cdf21 4902 if (srh == NULL)
fe94cc29
MX
4903 return -EINVAL;
4904
fe94cc29
MX
4905 srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
4906 srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
4907
4908 ptr = skb->data + offset;
4909 if (ptr >= srh_tlvs && ptr + len <= srh_end)
486cdf21 4910 srh_state->valid = false;
fe94cc29
MX
4911 else if (ptr < (void *)&srh->flags ||
4912 ptr + len > (void *)&srh->segments)
4913 return -EFAULT;
4914
4915 if (unlikely(bpf_try_make_writable(skb, offset + len)))
4916 return -EFAULT;
486cdf21
MX
4917 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
4918 return -EINVAL;
4919 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
fe94cc29
MX
4920
4921 memcpy(skb->data + offset, from, len);
4922 return 0;
fe94cc29
MX
4923}
4924
4925static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
4926 .func = bpf_lwt_seg6_store_bytes,
4927 .gpl_only = false,
4928 .ret_type = RET_INTEGER,
4929 .arg1_type = ARG_PTR_TO_CTX,
4930 .arg2_type = ARG_ANYTHING,
4931 .arg3_type = ARG_PTR_TO_MEM,
4932 .arg4_type = ARG_CONST_SIZE
4933};
4934
486cdf21 4935static void bpf_update_srh_state(struct sk_buff *skb)
fe94cc29 4936{
fe94cc29
MX
4937 struct seg6_bpf_srh_state *srh_state =
4938 this_cpu_ptr(&seg6_bpf_srh_states);
fe94cc29 4939 int srhoff = 0;
fe94cc29 4940
486cdf21
MX
4941 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) {
4942 srh_state->srh = NULL;
4943 } else {
4944 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
4945 srh_state->hdrlen = srh_state->srh->hdrlen << 3;
4946 srh_state->valid = true;
fe94cc29 4947 }
486cdf21
MX
4948}
4949
4950BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
4951 u32, action, void *, param, u32, param_len)
4952{
4953 struct seg6_bpf_srh_state *srh_state =
4954 this_cpu_ptr(&seg6_bpf_srh_states);
4955 int hdroff = 0;
4956 int err;
fe94cc29
MX
4957
4958 switch (action) {
4959 case SEG6_LOCAL_ACTION_END_X:
486cdf21
MX
4960 if (!seg6_bpf_has_valid_srh(skb))
4961 return -EBADMSG;
fe94cc29
MX
4962 if (param_len != sizeof(struct in6_addr))
4963 return -EINVAL;
4964 return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
4965 case SEG6_LOCAL_ACTION_END_T:
486cdf21
MX
4966 if (!seg6_bpf_has_valid_srh(skb))
4967 return -EBADMSG;
fe94cc29
MX
4968 if (param_len != sizeof(int))
4969 return -EINVAL;
4970 return seg6_lookup_nexthop(skb, NULL, *(int *)param);
486cdf21
MX
4971 case SEG6_LOCAL_ACTION_END_DT6:
4972 if (!seg6_bpf_has_valid_srh(skb))
4973 return -EBADMSG;
fe94cc29
MX
4974 if (param_len != sizeof(int))
4975 return -EINVAL;
486cdf21
MX
4976
4977 if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0)
4978 return -EBADMSG;
4979 if (!pskb_pull(skb, hdroff))
4980 return -EBADMSG;
4981
4982 skb_postpull_rcsum(skb, skb_network_header(skb), hdroff);
4983 skb_reset_network_header(skb);
4984 skb_reset_transport_header(skb);
4985 skb->encapsulation = 0;
4986
4987 bpf_compute_data_pointers(skb);
4988 bpf_update_srh_state(skb);
fe94cc29
MX
4989 return seg6_lookup_nexthop(skb, NULL, *(int *)param);
4990 case SEG6_LOCAL_ACTION_END_B6:
486cdf21
MX
4991 if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
4992 return -EBADMSG;
fe94cc29
MX
4993 err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
4994 param, param_len);
4995 if (!err)
486cdf21
MX
4996 bpf_update_srh_state(skb);
4997
fe94cc29
MX
4998 return err;
4999 case SEG6_LOCAL_ACTION_END_B6_ENCAP:
486cdf21
MX
5000 if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
5001 return -EBADMSG;
fe94cc29
MX
5002 err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
5003 param, param_len);
5004 if (!err)
486cdf21
MX
5005 bpf_update_srh_state(skb);
5006
fe94cc29
MX
5007 return err;
5008 default:
5009 return -EINVAL;
5010 }
fe94cc29
MX
5011}
5012
5013static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
5014 .func = bpf_lwt_seg6_action,
5015 .gpl_only = false,
5016 .ret_type = RET_INTEGER,
5017 .arg1_type = ARG_PTR_TO_CTX,
5018 .arg2_type = ARG_ANYTHING,
5019 .arg3_type = ARG_PTR_TO_MEM,
5020 .arg4_type = ARG_CONST_SIZE
5021};
5022
5023BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
5024 s32, len)
5025{
fe94cc29
MX
5026 struct seg6_bpf_srh_state *srh_state =
5027 this_cpu_ptr(&seg6_bpf_srh_states);
486cdf21 5028 struct ipv6_sr_hdr *srh = srh_state->srh;
fe94cc29 5029 void *srh_end, *srh_tlvs, *ptr;
fe94cc29
MX
5030 struct ipv6hdr *hdr;
5031 int srhoff = 0;
5032 int ret;
5033
486cdf21 5034 if (unlikely(srh == NULL))
fe94cc29 5035 return -EINVAL;
fe94cc29
MX
5036
5037 srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
5038 ((srh->first_segment + 1) << 4));
5039 srh_end = (void *)((unsigned char *)srh + sizeof(*srh) +
5040 srh_state->hdrlen);
5041 ptr = skb->data + offset;
5042
5043 if (unlikely(ptr < srh_tlvs || ptr > srh_end))
5044 return -EFAULT;
5045 if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end))
5046 return -EFAULT;
5047
5048 if (len > 0) {
5049 ret = skb_cow_head(skb, len);
5050 if (unlikely(ret < 0))
5051 return ret;
5052
5053 ret = bpf_skb_net_hdr_push(skb, offset, len);
5054 } else {
5055 ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len);
5056 }
5057
5058 bpf_compute_data_pointers(skb);
5059 if (unlikely(ret < 0))
5060 return ret;
5061
5062 hdr = (struct ipv6hdr *)skb->data;
5063 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
5064
486cdf21
MX
5065 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
5066 return -EINVAL;
5067 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
fe94cc29 5068 srh_state->hdrlen += len;
486cdf21 5069 srh_state->valid = false;
fe94cc29 5070 return 0;
fe94cc29
MX
5071}
5072
5073static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
5074 .func = bpf_lwt_seg6_adjust_srh,
5075 .gpl_only = false,
5076 .ret_type = RET_INTEGER,
5077 .arg1_type = ARG_PTR_TO_CTX,
5078 .arg2_type = ARG_ANYTHING,
5079 .arg3_type = ARG_ANYTHING,
5080};
61d76980 5081#endif /* CONFIG_IPV6_SEG6_BPF */
fe94cc29 5082
9b1f3d6e
MKL
5083#define CONVERT_COMMON_TCP_SOCK_FIELDS(md_type, CONVERT) \
5084do { \
5085 switch (si->off) { \
5086 case offsetof(md_type, snd_cwnd): \
5087 CONVERT(snd_cwnd); break; \
5088 case offsetof(md_type, srtt_us): \
5089 CONVERT(srtt_us); break; \
5090 case offsetof(md_type, snd_ssthresh): \
5091 CONVERT(snd_ssthresh); break; \
5092 case offsetof(md_type, rcv_nxt): \
5093 CONVERT(rcv_nxt); break; \
5094 case offsetof(md_type, snd_nxt): \
5095 CONVERT(snd_nxt); break; \
5096 case offsetof(md_type, snd_una): \
5097 CONVERT(snd_una); break; \
5098 case offsetof(md_type, mss_cache): \
5099 CONVERT(mss_cache); break; \
5100 case offsetof(md_type, ecn_flags): \
5101 CONVERT(ecn_flags); break; \
5102 case offsetof(md_type, rate_delivered): \
5103 CONVERT(rate_delivered); break; \
5104 case offsetof(md_type, rate_interval_us): \
5105 CONVERT(rate_interval_us); break; \
5106 case offsetof(md_type, packets_out): \
5107 CONVERT(packets_out); break; \
5108 case offsetof(md_type, retrans_out): \
5109 CONVERT(retrans_out); break; \
5110 case offsetof(md_type, total_retrans): \
5111 CONVERT(total_retrans); break; \
5112 case offsetof(md_type, segs_in): \
5113 CONVERT(segs_in); break; \
5114 case offsetof(md_type, data_segs_in): \
5115 CONVERT(data_segs_in); break; \
5116 case offsetof(md_type, segs_out): \
5117 CONVERT(segs_out); break; \
5118 case offsetof(md_type, data_segs_out): \
5119 CONVERT(data_segs_out); break; \
5120 case offsetof(md_type, lost_out): \
5121 CONVERT(lost_out); break; \
5122 case offsetof(md_type, sacked_out): \
5123 CONVERT(sacked_out); break; \
5124 case offsetof(md_type, bytes_received): \
5125 CONVERT(bytes_received); break; \
5126 case offsetof(md_type, bytes_acked): \
5127 CONVERT(bytes_acked); break; \
5128 } \
5129} while (0)
5130
df3f94a0
AB
5131#ifdef CONFIG_INET
5132static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
c8123ead 5133 int dif, int sdif, u8 family, u8 proto)
6acc9b43 5134{
6acc9b43
JS
5135 bool refcounted = false;
5136 struct sock *sk = NULL;
5137
5138 if (family == AF_INET) {
5139 __be32 src4 = tuple->ipv4.saddr;
5140 __be32 dst4 = tuple->ipv4.daddr;
6acc9b43
JS
5141
5142 if (proto == IPPROTO_TCP)
c8123ead 5143 sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0,
6acc9b43
JS
5144 src4, tuple->ipv4.sport,
5145 dst4, tuple->ipv4.dport,
5146 dif, sdif, &refcounted);
5147 else
5148 sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
5149 dst4, tuple->ipv4.dport,
c8123ead 5150 dif, sdif, &udp_table, NULL);
8a615c6b 5151#if IS_ENABLED(CONFIG_IPV6)
6acc9b43
JS
5152 } else {
5153 struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
5154 struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
6acc9b43
JS
5155
5156 if (proto == IPPROTO_TCP)
c8123ead 5157 sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0,
6acc9b43 5158 src6, tuple->ipv6.sport,
cac6cc2f 5159 dst6, ntohs(tuple->ipv6.dport),
6acc9b43 5160 dif, sdif, &refcounted);
8a615c6b
JS
5161 else if (likely(ipv6_bpf_stub))
5162 sk = ipv6_bpf_stub->udp6_lib_lookup(net,
5163 src6, tuple->ipv6.sport,
cac6cc2f 5164 dst6, tuple->ipv6.dport,
8a615c6b 5165 dif, sdif,
c8123ead 5166 &udp_table, NULL);
6acc9b43
JS
5167#endif
5168 }
5169
5170 if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) {
5171 WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
5172 sk = NULL;
5173 }
5174 return sk;
5175}
5176
edbf8c01 5177/* bpf_skc_lookup performs the core lookup for different types of sockets,
6acc9b43
JS
5178 * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
5179 * Returns the socket as an 'unsigned long' to simplify the casting in the
5180 * callers to satisfy BPF_CALL declarations.
5181 */
edbf8c01
LB
5182static struct sock *
5183__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
5184 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
5185 u64 flags)
6acc9b43 5186{
6acc9b43
JS
5187 struct sock *sk = NULL;
5188 u8 family = AF_UNSPEC;
5189 struct net *net;
c8123ead 5190 int sdif;
6acc9b43
JS
5191
5192 family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6;
f71c6143
JS
5193 if (unlikely(family == AF_UNSPEC || flags ||
5194 !((s32)netns_id < 0 || netns_id <= S32_MAX)))
6acc9b43
JS
5195 goto out;
5196
c8123ead
NH
5197 if (family == AF_INET)
5198 sdif = inet_sdif(skb);
6acc9b43 5199 else
c8123ead
NH
5200 sdif = inet6_sdif(skb);
5201
f71c6143
JS
5202 if ((s32)netns_id < 0) {
5203 net = caller_net;
4cc1feeb 5204 sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
f71c6143 5205 } else {
6acc9b43
JS
5206 net = get_net_ns_by_id(caller_net, netns_id);
5207 if (unlikely(!net))
5208 goto out;
c8123ead 5209 sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
6acc9b43 5210 put_net(net);
6acc9b43
JS
5211 }
5212
edbf8c01
LB
5213out:
5214 return sk;
5215}
5216
5217static struct sock *
5218__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
5219 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
5220 u64 flags)
5221{
5222 struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
5223 ifindex, proto, netns_id, flags);
5224
6acc9b43
JS
5225 if (sk)
5226 sk = sk_to_full_sk(sk);
edbf8c01
LB
5227
5228 return sk;
6acc9b43
JS
5229}
5230
edbf8c01
LB
5231static struct sock *
5232bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
5233 u8 proto, u64 netns_id, u64 flags)
c8123ead
NH
5234{
5235 struct net *caller_net;
5236 int ifindex;
5237
5238 if (skb->dev) {
5239 caller_net = dev_net(skb->dev);
5240 ifindex = skb->dev->ifindex;
5241 } else {
5242 caller_net = sock_net(skb->sk);
5243 ifindex = 0;
5244 }
5245
edbf8c01
LB
5246 return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
5247 netns_id, flags);
c8123ead
NH
5248}
5249
edbf8c01
LB
5250static struct sock *
5251bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
5252 u8 proto, u64 netns_id, u64 flags)
5253{
5254 struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id,
5255 flags);
5256
5257 if (sk)
5258 sk = sk_to_full_sk(sk);
5259
5260 return sk;
5261}
5262
5263BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb,
5264 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5265{
5266 return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
5267 netns_id, flags);
5268}
5269
5270static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
5271 .func = bpf_skc_lookup_tcp,
5272 .gpl_only = false,
5273 .pkt_access = true,
5274 .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
5275 .arg1_type = ARG_PTR_TO_CTX,
5276 .arg2_type = ARG_PTR_TO_MEM,
5277 .arg3_type = ARG_CONST_SIZE,
5278 .arg4_type = ARG_ANYTHING,
5279 .arg5_type = ARG_ANYTHING,
5280};
5281
6acc9b43
JS
5282BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
5283 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5284{
edbf8c01
LB
5285 return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
5286 netns_id, flags);
6acc9b43
JS
5287}
5288
5289static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
5290 .func = bpf_sk_lookup_tcp,
5291 .gpl_only = false,
5292 .pkt_access = true,
5293 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5294 .arg1_type = ARG_PTR_TO_CTX,
5295 .arg2_type = ARG_PTR_TO_MEM,
5296 .arg3_type = ARG_CONST_SIZE,
5297 .arg4_type = ARG_ANYTHING,
5298 .arg5_type = ARG_ANYTHING,
5299};
5300
5301BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
5302 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5303{
edbf8c01
LB
5304 return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
5305 netns_id, flags);
6acc9b43
JS
5306}
5307
5308static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
5309 .func = bpf_sk_lookup_udp,
5310 .gpl_only = false,
5311 .pkt_access = true,
5312 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5313 .arg1_type = ARG_PTR_TO_CTX,
5314 .arg2_type = ARG_PTR_TO_MEM,
5315 .arg3_type = ARG_CONST_SIZE,
5316 .arg4_type = ARG_ANYTHING,
5317 .arg5_type = ARG_ANYTHING,
5318};
5319
5320BPF_CALL_1(bpf_sk_release, struct sock *, sk)
5321{
5322 if (!sock_flag(sk, SOCK_RCU_FREE))
5323 sock_gen_put(sk);
5324 return 0;
5325}
5326
5327static const struct bpf_func_proto bpf_sk_release_proto = {
5328 .func = bpf_sk_release,
5329 .gpl_only = false,
5330 .ret_type = RET_INTEGER,
1b986589 5331 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
6acc9b43 5332};
c8123ead
NH
5333
5334BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
5335 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
5336{
5337 struct net *caller_net = dev_net(ctx->rxq->dev);
5338 int ifindex = ctx->rxq->dev->ifindex;
5339
edbf8c01
LB
5340 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
5341 ifindex, IPPROTO_UDP, netns_id,
5342 flags);
c8123ead
NH
5343}
5344
5345static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
5346 .func = bpf_xdp_sk_lookup_udp,
5347 .gpl_only = false,
5348 .pkt_access = true,
5349 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5350 .arg1_type = ARG_PTR_TO_CTX,
5351 .arg2_type = ARG_PTR_TO_MEM,
5352 .arg3_type = ARG_CONST_SIZE,
5353 .arg4_type = ARG_ANYTHING,
5354 .arg5_type = ARG_ANYTHING,
5355};
5356
edbf8c01
LB
5357BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
5358 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
5359{
5360 struct net *caller_net = dev_net(ctx->rxq->dev);
5361 int ifindex = ctx->rxq->dev->ifindex;
5362
5363 return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
5364 ifindex, IPPROTO_TCP, netns_id,
5365 flags);
5366}
5367
5368static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
5369 .func = bpf_xdp_skc_lookup_tcp,
5370 .gpl_only = false,
5371 .pkt_access = true,
5372 .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
5373 .arg1_type = ARG_PTR_TO_CTX,
5374 .arg2_type = ARG_PTR_TO_MEM,
5375 .arg3_type = ARG_CONST_SIZE,
5376 .arg4_type = ARG_ANYTHING,
5377 .arg5_type = ARG_ANYTHING,
5378};
5379
c8123ead
NH
5380BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
5381 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
5382{
5383 struct net *caller_net = dev_net(ctx->rxq->dev);
5384 int ifindex = ctx->rxq->dev->ifindex;
5385
edbf8c01
LB
5386 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
5387 ifindex, IPPROTO_TCP, netns_id,
5388 flags);
c8123ead
NH
5389}
5390
5391static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
5392 .func = bpf_xdp_sk_lookup_tcp,
5393 .gpl_only = false,
5394 .pkt_access = true,
5395 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5396 .arg1_type = ARG_PTR_TO_CTX,
5397 .arg2_type = ARG_PTR_TO_MEM,
5398 .arg3_type = ARG_CONST_SIZE,
5399 .arg4_type = ARG_ANYTHING,
5400 .arg5_type = ARG_ANYTHING,
5401};
6c49e65e 5402
edbf8c01
LB
5403BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
5404 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5405{
5406 return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
5407 sock_net(ctx->sk), 0,
5408 IPPROTO_TCP, netns_id, flags);
5409}
5410
5411static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
5412 .func = bpf_sock_addr_skc_lookup_tcp,
5413 .gpl_only = false,
5414 .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
5415 .arg1_type = ARG_PTR_TO_CTX,
5416 .arg2_type = ARG_PTR_TO_MEM,
5417 .arg3_type = ARG_CONST_SIZE,
5418 .arg4_type = ARG_ANYTHING,
5419 .arg5_type = ARG_ANYTHING,
5420};
5421
6c49e65e
AI
5422BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
5423 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5424{
edbf8c01
LB
5425 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
5426 sock_net(ctx->sk), 0, IPPROTO_TCP,
5427 netns_id, flags);
6c49e65e
AI
5428}
5429
5430static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
5431 .func = bpf_sock_addr_sk_lookup_tcp,
5432 .gpl_only = false,
5433 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5434 .arg1_type = ARG_PTR_TO_CTX,
5435 .arg2_type = ARG_PTR_TO_MEM,
5436 .arg3_type = ARG_CONST_SIZE,
5437 .arg4_type = ARG_ANYTHING,
5438 .arg5_type = ARG_ANYTHING,
5439};
5440
5441BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
5442 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5443{
edbf8c01
LB
5444 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
5445 sock_net(ctx->sk), 0, IPPROTO_UDP,
5446 netns_id, flags);
6c49e65e
AI
5447}
5448
5449static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
5450 .func = bpf_sock_addr_sk_lookup_udp,
5451 .gpl_only = false,
5452 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5453 .arg1_type = ARG_PTR_TO_CTX,
5454 .arg2_type = ARG_PTR_TO_MEM,
5455 .arg3_type = ARG_CONST_SIZE,
5456 .arg4_type = ARG_ANYTHING,
5457 .arg5_type = ARG_ANYTHING,
5458};
5459
655a51e5
MKL
5460bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
5461 struct bpf_insn_access_aux *info)
5462{
5463 if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked))
5464 return false;
5465
5466 if (off % size != 0)
5467 return false;
5468
5469 switch (off) {
5470 case offsetof(struct bpf_tcp_sock, bytes_received):
5471 case offsetof(struct bpf_tcp_sock, bytes_acked):
5472 return size == sizeof(__u64);
5473 default:
5474 return size == sizeof(__u32);
5475 }
5476}
5477
5478u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
5479 const struct bpf_insn *si,
5480 struct bpf_insn *insn_buf,
5481 struct bpf_prog *prog, u32 *target_size)
5482{
5483 struct bpf_insn *insn = insn_buf;
5484
5485#define BPF_TCP_SOCK_GET_COMMON(FIELD) \
5486 do { \
5487 BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) > \
5488 FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \
5489 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
5490 si->dst_reg, si->src_reg, \
5491 offsetof(struct tcp_sock, FIELD)); \
5492 } while (0)
5493
5494 CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock,
5495 BPF_TCP_SOCK_GET_COMMON);
5496
5497 if (insn > insn_buf)
5498 return insn - insn_buf;
5499
5500 switch (si->off) {
5501 case offsetof(struct bpf_tcp_sock, rtt_min):
5502 BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
5503 sizeof(struct minmax));
5504 BUILD_BUG_ON(sizeof(struct minmax) <
5505 sizeof(struct minmax_sample));
5506
5507 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
5508 offsetof(struct tcp_sock, rtt_min) +
5509 offsetof(struct minmax_sample, v));
5510 break;
5511 }
5512
5513 return insn - insn_buf;
5514}
5515
5516BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
5517{
655a51e5
MKL
5518 if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
5519 return (unsigned long)sk;
5520
5521 return (unsigned long)NULL;
5522}
5523
5524static const struct bpf_func_proto bpf_tcp_sock_proto = {
5525 .func = bpf_tcp_sock,
5526 .gpl_only = false,
5527 .ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL,
5528 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
5529};
5530
dbafd7dd
MKL
5531BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
5532{
5533 sk = sk_to_full_sk(sk);
5534
5535 if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
5536 return (unsigned long)sk;
5537
5538 return (unsigned long)NULL;
5539}
5540
5541static const struct bpf_func_proto bpf_get_listener_sock_proto = {
5542 .func = bpf_get_listener_sock,
5543 .gpl_only = false,
5544 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5545 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
5546};
5547
f7c917ba 5548BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
5549{
5550 unsigned int iphdr_len;
5551
5552 if (skb->protocol == cpu_to_be16(ETH_P_IP))
5553 iphdr_len = sizeof(struct iphdr);
5554 else if (skb->protocol == cpu_to_be16(ETH_P_IPV6))
5555 iphdr_len = sizeof(struct ipv6hdr);
5556 else
5557 return 0;
5558
5559 if (skb_headlen(skb) < iphdr_len)
5560 return 0;
5561
5562 if (skb_cloned(skb) && !skb_clone_writable(skb, iphdr_len))
5563 return 0;
5564
5565 return INET_ECN_set_ce(skb);
5566}
5567
5568static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
5569 .func = bpf_skb_ecn_set_ce,
5570 .gpl_only = false,
5571 .ret_type = RET_INTEGER,
5572 .arg1_type = ARG_PTR_TO_CTX,
5573};
39904084
LB
5574
5575BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
5576 struct tcphdr *, th, u32, th_len)
5577{
5578#ifdef CONFIG_SYN_COOKIES
5579 u32 cookie;
5580 int ret;
5581
5582 if (unlikely(th_len < sizeof(*th)))
5583 return -EINVAL;
5584
5585 /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
5586 if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
5587 return -EINVAL;
5588
5589 if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
5590 return -EINVAL;
5591
5592 if (!th->ack || th->rst || th->syn)
5593 return -ENOENT;
5594
5595 if (tcp_synq_no_recent_overflow(sk))
5596 return -ENOENT;
5597
5598 cookie = ntohl(th->ack_seq) - 1;
5599
5600 switch (sk->sk_family) {
5601 case AF_INET:
5602 if (unlikely(iph_len < sizeof(struct iphdr)))
5603 return -EINVAL;
5604
5605 ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
5606 break;
5607
5608#if IS_BUILTIN(CONFIG_IPV6)
5609 case AF_INET6:
5610 if (unlikely(iph_len < sizeof(struct ipv6hdr)))
5611 return -EINVAL;
5612
5613 ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
5614 break;
5615#endif /* CONFIG_IPV6 */
5616
5617 default:
5618 return -EPROTONOSUPPORT;
5619 }
5620
5621 if (ret > 0)
5622 return 0;
5623
5624 return -ENOENT;
5625#else
5626 return -ENOTSUPP;
5627#endif
5628}
5629
5630static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
5631 .func = bpf_tcp_check_syncookie,
5632 .gpl_only = true,
5633 .pkt_access = true,
5634 .ret_type = RET_INTEGER,
5635 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
5636 .arg2_type = ARG_PTR_TO_MEM,
5637 .arg3_type = ARG_CONST_SIZE,
5638 .arg4_type = ARG_PTR_TO_MEM,
5639 .arg5_type = ARG_CONST_SIZE,
5640};
5641
df3f94a0 5642#endif /* CONFIG_INET */
6acc9b43 5643
fe94cc29
MX
5644bool bpf_helper_changes_pkt_data(void *func)
5645{
5646 if (func == bpf_skb_vlan_push ||
5647 func == bpf_skb_vlan_pop ||
5648 func == bpf_skb_store_bytes ||
5649 func == bpf_skb_change_proto ||
5650 func == bpf_skb_change_head ||
0ea488ff 5651 func == sk_skb_change_head ||
fe94cc29 5652 func == bpf_skb_change_tail ||
0ea488ff 5653 func == sk_skb_change_tail ||
fe94cc29
MX
5654 func == bpf_skb_adjust_room ||
5655 func == bpf_skb_pull_data ||
0ea488ff 5656 func == sk_skb_pull_data ||
fe94cc29
MX
5657 func == bpf_clone_redirect ||
5658 func == bpf_l3_csum_replace ||
5659 func == bpf_l4_csum_replace ||
5660 func == bpf_xdp_adjust_head ||
5661 func == bpf_xdp_adjust_meta ||
5662 func == bpf_msg_pull_data ||
6fff607e 5663 func == bpf_msg_push_data ||
7246d8ed 5664 func == bpf_msg_pop_data ||
fe94cc29 5665 func == bpf_xdp_adjust_tail ||
61d76980 5666#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
fe94cc29
MX
5667 func == bpf_lwt_seg6_store_bytes ||
5668 func == bpf_lwt_seg6_adjust_srh ||
61d76980
MX
5669 func == bpf_lwt_seg6_action ||
5670#endif
3e0bd37c
PO
5671 func == bpf_lwt_in_push_encap ||
5672 func == bpf_lwt_xmit_push_encap)
fe94cc29
MX
5673 return true;
5674
5675 return false;
5676}
5677
d4052c4a 5678static const struct bpf_func_proto *
2492d3b8 5679bpf_base_func_proto(enum bpf_func_id func_id)
89aa0758
AS
5680{
5681 switch (func_id) {
5682 case BPF_FUNC_map_lookup_elem:
5683 return &bpf_map_lookup_elem_proto;
5684 case BPF_FUNC_map_update_elem:
5685 return &bpf_map_update_elem_proto;
5686 case BPF_FUNC_map_delete_elem:
5687 return &bpf_map_delete_elem_proto;
f1a2e44a
MV
5688 case BPF_FUNC_map_push_elem:
5689 return &bpf_map_push_elem_proto;
5690 case BPF_FUNC_map_pop_elem:
5691 return &bpf_map_pop_elem_proto;
5692 case BPF_FUNC_map_peek_elem:
5693 return &bpf_map_peek_elem_proto;
03e69b50
DB
5694 case BPF_FUNC_get_prandom_u32:
5695 return &bpf_get_prandom_u32_proto;
c04167ce 5696 case BPF_FUNC_get_smp_processor_id:
80b48c44 5697 return &bpf_get_raw_smp_processor_id_proto;
2d0e30c3
DB
5698 case BPF_FUNC_get_numa_node_id:
5699 return &bpf_get_numa_node_id_proto;
04fd61ab
AS
5700 case BPF_FUNC_tail_call:
5701 return &bpf_tail_call_proto;
17ca8cbf
DB
5702 case BPF_FUNC_ktime_get_ns:
5703 return &bpf_ktime_get_ns_proto;
d83525ca
AS
5704 default:
5705 break;
5706 }
5707
5708 if (!capable(CAP_SYS_ADMIN))
5709 return NULL;
5710
5711 switch (func_id) {
5712 case BPF_FUNC_spin_lock:
5713 return &bpf_spin_lock_proto;
5714 case BPF_FUNC_spin_unlock:
5715 return &bpf_spin_unlock_proto;
0756ea3e 5716 case BPF_FUNC_trace_printk:
d83525ca 5717 return bpf_get_trace_printk_proto();
89aa0758
AS
5718 default:
5719 return NULL;
5720 }
5721}
5722
ae2cf1c4 5723static const struct bpf_func_proto *
5e43f899 5724sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
ae2cf1c4
DA
5725{
5726 switch (func_id) {
5727 /* inet and inet6 sockets are created in a process
5728 * context so there is always a valid uid/gid
5729 */
5730 case BPF_FUNC_get_current_uid_gid:
5731 return &bpf_get_current_uid_gid_proto;
cd339431
RG
5732 case BPF_FUNC_get_local_storage:
5733 return &bpf_get_local_storage_proto;
ae2cf1c4
DA
5734 default:
5735 return bpf_base_func_proto(func_id);
5736 }
5737}
5738
4fbac77d
AI
5739static const struct bpf_func_proto *
5740sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
5741{
5742 switch (func_id) {
5743 /* inet and inet6 sockets are created in a process
5744 * context so there is always a valid uid/gid
5745 */
5746 case BPF_FUNC_get_current_uid_gid:
5747 return &bpf_get_current_uid_gid_proto;
d74bad4e
AI
5748 case BPF_FUNC_bind:
5749 switch (prog->expected_attach_type) {
5750 case BPF_CGROUP_INET4_CONNECT:
5751 case BPF_CGROUP_INET6_CONNECT:
5752 return &bpf_bind_proto;
5753 default:
5754 return NULL;
5755 }
d692f113
AI
5756 case BPF_FUNC_get_socket_cookie:
5757 return &bpf_get_socket_cookie_sock_addr_proto;
cd339431
RG
5758 case BPF_FUNC_get_local_storage:
5759 return &bpf_get_local_storage_proto;
6c49e65e
AI
5760#ifdef CONFIG_INET
5761 case BPF_FUNC_sk_lookup_tcp:
5762 return &bpf_sock_addr_sk_lookup_tcp_proto;
5763 case BPF_FUNC_sk_lookup_udp:
5764 return &bpf_sock_addr_sk_lookup_udp_proto;
5765 case BPF_FUNC_sk_release:
5766 return &bpf_sk_release_proto;
edbf8c01
LB
5767 case BPF_FUNC_skc_lookup_tcp:
5768 return &bpf_sock_addr_skc_lookup_tcp_proto;
6c49e65e 5769#endif /* CONFIG_INET */
4fbac77d
AI
5770 default:
5771 return bpf_base_func_proto(func_id);
5772 }
5773}
5774
2492d3b8 5775static const struct bpf_func_proto *
5e43f899 5776sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
2492d3b8
DB
5777{
5778 switch (func_id) {
5779 case BPF_FUNC_skb_load_bytes:
5780 return &bpf_skb_load_bytes_proto;
4e1ec56c
DB
5781 case BPF_FUNC_skb_load_bytes_relative:
5782 return &bpf_skb_load_bytes_relative_proto;
91b8270f
CF
5783 case BPF_FUNC_get_socket_cookie:
5784 return &bpf_get_socket_cookie_proto;
6acc5c29
CF
5785 case BPF_FUNC_get_socket_uid:
5786 return &bpf_get_socket_uid_proto;
2492d3b8
DB
5787 default:
5788 return bpf_base_func_proto(func_id);
5789 }
5790}
5791
cd339431
RG
5792static const struct bpf_func_proto *
5793cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
5794{
5795 switch (func_id) {
5796 case BPF_FUNC_get_local_storage:
5797 return &bpf_get_local_storage_proto;
46f8bc92
MKL
5798 case BPF_FUNC_sk_fullsock:
5799 return &bpf_sk_fullsock_proto;
655a51e5
MKL
5800#ifdef CONFIG_INET
5801 case BPF_FUNC_tcp_sock:
5802 return &bpf_tcp_sock_proto;
dbafd7dd
MKL
5803 case BPF_FUNC_get_listener_sock:
5804 return &bpf_get_listener_sock_proto;
f7c917ba 5805 case BPF_FUNC_skb_ecn_set_ce:
5806 return &bpf_skb_ecn_set_ce_proto;
655a51e5 5807#endif
cd339431
RG
5808 default:
5809 return sk_filter_func_proto(func_id, prog);
5810 }
5811}
5812
608cd71a 5813static const struct bpf_func_proto *
5e43f899 5814tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
608cd71a
AS
5815{
5816 switch (func_id) {
5817 case BPF_FUNC_skb_store_bytes:
5818 return &bpf_skb_store_bytes_proto;
05c74e5e
DB
5819 case BPF_FUNC_skb_load_bytes:
5820 return &bpf_skb_load_bytes_proto;
4e1ec56c
DB
5821 case BPF_FUNC_skb_load_bytes_relative:
5822 return &bpf_skb_load_bytes_relative_proto;
36bbef52
DB
5823 case BPF_FUNC_skb_pull_data:
5824 return &bpf_skb_pull_data_proto;
7d672345
DB
5825 case BPF_FUNC_csum_diff:
5826 return &bpf_csum_diff_proto;
36bbef52
DB
5827 case BPF_FUNC_csum_update:
5828 return &bpf_csum_update_proto;
91bc4822
AS
5829 case BPF_FUNC_l3_csum_replace:
5830 return &bpf_l3_csum_replace_proto;
5831 case BPF_FUNC_l4_csum_replace:
5832 return &bpf_l4_csum_replace_proto;
3896d655
AS
5833 case BPF_FUNC_clone_redirect:
5834 return &bpf_clone_redirect_proto;
8d20aabe
DB
5835 case BPF_FUNC_get_cgroup_classid:
5836 return &bpf_get_cgroup_classid_proto;
4e10df9a
AS
5837 case BPF_FUNC_skb_vlan_push:
5838 return &bpf_skb_vlan_push_proto;
5839 case BPF_FUNC_skb_vlan_pop:
5840 return &bpf_skb_vlan_pop_proto;
6578171a
DB
5841 case BPF_FUNC_skb_change_proto:
5842 return &bpf_skb_change_proto_proto;
d2485c42
DB
5843 case BPF_FUNC_skb_change_type:
5844 return &bpf_skb_change_type_proto;
2be7e212
DB
5845 case BPF_FUNC_skb_adjust_room:
5846 return &bpf_skb_adjust_room_proto;
5293efe6
DB
5847 case BPF_FUNC_skb_change_tail:
5848 return &bpf_skb_change_tail_proto;
d3aa45ce
AS
5849 case BPF_FUNC_skb_get_tunnel_key:
5850 return &bpf_skb_get_tunnel_key_proto;
5851 case BPF_FUNC_skb_set_tunnel_key:
14ca0751
DB
5852 return bpf_get_skb_set_tunnel_proto(func_id);
5853 case BPF_FUNC_skb_get_tunnel_opt:
5854 return &bpf_skb_get_tunnel_opt_proto;
5855 case BPF_FUNC_skb_set_tunnel_opt:
5856 return bpf_get_skb_set_tunnel_proto(func_id);
27b29f63
AS
5857 case BPF_FUNC_redirect:
5858 return &bpf_redirect_proto;
c46646d0
DB
5859 case BPF_FUNC_get_route_realm:
5860 return &bpf_get_route_realm_proto;
13c5c240
DB
5861 case BPF_FUNC_get_hash_recalc:
5862 return &bpf_get_hash_recalc_proto;
7a4b28c6
DB
5863 case BPF_FUNC_set_hash_invalid:
5864 return &bpf_set_hash_invalid_proto;
ded092cd
DB
5865 case BPF_FUNC_set_hash:
5866 return &bpf_set_hash_proto;
bd570ff9 5867 case BPF_FUNC_perf_event_output:
555c8a86 5868 return &bpf_skb_event_output_proto;
80b48c44
DB
5869 case BPF_FUNC_get_smp_processor_id:
5870 return &bpf_get_smp_processor_id_proto;
747ea55e
DB
5871 case BPF_FUNC_skb_under_cgroup:
5872 return &bpf_skb_under_cgroup_proto;
91b8270f
CF
5873 case BPF_FUNC_get_socket_cookie:
5874 return &bpf_get_socket_cookie_proto;
6acc5c29
CF
5875 case BPF_FUNC_get_socket_uid:
5876 return &bpf_get_socket_uid_proto;
cb20b08e
DB
5877 case BPF_FUNC_fib_lookup:
5878 return &bpf_skb_fib_lookup_proto;
46f8bc92
MKL
5879 case BPF_FUNC_sk_fullsock:
5880 return &bpf_sk_fullsock_proto;
12bed760
EB
5881#ifdef CONFIG_XFRM
5882 case BPF_FUNC_skb_get_xfrm_state:
5883 return &bpf_skb_get_xfrm_state_proto;
5884#endif
cb20b08e
DB
5885#ifdef CONFIG_SOCK_CGROUP_DATA
5886 case BPF_FUNC_skb_cgroup_id:
5887 return &bpf_skb_cgroup_id_proto;
77236281
AI
5888 case BPF_FUNC_skb_ancestor_cgroup_id:
5889 return &bpf_skb_ancestor_cgroup_id_proto;
cb20b08e 5890#endif
df3f94a0 5891#ifdef CONFIG_INET
6acc9b43
JS
5892 case BPF_FUNC_sk_lookup_tcp:
5893 return &bpf_sk_lookup_tcp_proto;
5894 case BPF_FUNC_sk_lookup_udp:
5895 return &bpf_sk_lookup_udp_proto;
5896 case BPF_FUNC_sk_release:
5897 return &bpf_sk_release_proto;
655a51e5
MKL
5898 case BPF_FUNC_tcp_sock:
5899 return &bpf_tcp_sock_proto;
dbafd7dd
MKL
5900 case BPF_FUNC_get_listener_sock:
5901 return &bpf_get_listener_sock_proto;
edbf8c01
LB
5902 case BPF_FUNC_skc_lookup_tcp:
5903 return &bpf_skc_lookup_tcp_proto;
39904084
LB
5904 case BPF_FUNC_tcp_check_syncookie:
5905 return &bpf_tcp_check_syncookie_proto;
df3f94a0 5906#endif
608cd71a 5907 default:
2492d3b8 5908 return bpf_base_func_proto(func_id);
608cd71a
AS
5909 }
5910}
5911
6a773a15 5912static const struct bpf_func_proto *
5e43f899 5913xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6a773a15 5914{
4de16969
DB
5915 switch (func_id) {
5916 case BPF_FUNC_perf_event_output:
5917 return &bpf_xdp_event_output_proto;
669dc4d7
DB
5918 case BPF_FUNC_get_smp_processor_id:
5919 return &bpf_get_smp_processor_id_proto;
205c3807
DB
5920 case BPF_FUNC_csum_diff:
5921 return &bpf_csum_diff_proto;
17bedab2
MKL
5922 case BPF_FUNC_xdp_adjust_head:
5923 return &bpf_xdp_adjust_head_proto;
de8f3a83
DB
5924 case BPF_FUNC_xdp_adjust_meta:
5925 return &bpf_xdp_adjust_meta_proto;
814abfab
JF
5926 case BPF_FUNC_redirect:
5927 return &bpf_xdp_redirect_proto;
97f91a7c 5928 case BPF_FUNC_redirect_map:
e4a8e817 5929 return &bpf_xdp_redirect_map_proto;
b32cc5b9
NS
5930 case BPF_FUNC_xdp_adjust_tail:
5931 return &bpf_xdp_adjust_tail_proto;
87f5fc7e
DA
5932 case BPF_FUNC_fib_lookup:
5933 return &bpf_xdp_fib_lookup_proto;
c8123ead
NH
5934#ifdef CONFIG_INET
5935 case BPF_FUNC_sk_lookup_udp:
5936 return &bpf_xdp_sk_lookup_udp_proto;
5937 case BPF_FUNC_sk_lookup_tcp:
5938 return &bpf_xdp_sk_lookup_tcp_proto;
5939 case BPF_FUNC_sk_release:
5940 return &bpf_sk_release_proto;
edbf8c01
LB
5941 case BPF_FUNC_skc_lookup_tcp:
5942 return &bpf_xdp_skc_lookup_tcp_proto;
39904084
LB
5943 case BPF_FUNC_tcp_check_syncookie:
5944 return &bpf_tcp_check_syncookie_proto;
c8123ead 5945#endif
4de16969 5946 default:
2492d3b8 5947 return bpf_base_func_proto(func_id);
4de16969 5948 }
6a773a15
BB
5949}
5950
604326b4
DB
5951const struct bpf_func_proto bpf_sock_map_update_proto __weak;
5952const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
5953
8c4b4c7e 5954static const struct bpf_func_proto *
5e43f899 5955sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8c4b4c7e
LB
5956{
5957 switch (func_id) {
5958 case BPF_FUNC_setsockopt:
5959 return &bpf_setsockopt_proto;
cd86d1fd
LB
5960 case BPF_FUNC_getsockopt:
5961 return &bpf_getsockopt_proto;
b13d8807
LB
5962 case BPF_FUNC_sock_ops_cb_flags_set:
5963 return &bpf_sock_ops_cb_flags_set_proto;
174a79ff
JF
5964 case BPF_FUNC_sock_map_update:
5965 return &bpf_sock_map_update_proto;
81110384
JF
5966 case BPF_FUNC_sock_hash_update:
5967 return &bpf_sock_hash_update_proto;
d692f113
AI
5968 case BPF_FUNC_get_socket_cookie:
5969 return &bpf_get_socket_cookie_sock_ops_proto;
cd339431
RG
5970 case BPF_FUNC_get_local_storage:
5971 return &bpf_get_local_storage_proto;
a5a3a828
SV
5972 case BPF_FUNC_perf_event_output:
5973 return &bpf_sockopt_event_output_proto;
8c4b4c7e
LB
5974 default:
5975 return bpf_base_func_proto(func_id);
5976 }
5977}
5978
604326b4
DB
5979const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
5980const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;
5981
5e43f899
AI
5982static const struct bpf_func_proto *
5983sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
4f738adb
JF
5984{
5985 switch (func_id) {
5986 case BPF_FUNC_msg_redirect_map:
5987 return &bpf_msg_redirect_map_proto;
81110384
JF
5988 case BPF_FUNC_msg_redirect_hash:
5989 return &bpf_msg_redirect_hash_proto;
2a100317
JF
5990 case BPF_FUNC_msg_apply_bytes:
5991 return &bpf_msg_apply_bytes_proto;
91843d54
JF
5992 case BPF_FUNC_msg_cork_bytes:
5993 return &bpf_msg_cork_bytes_proto;
015632bb
JF
5994 case BPF_FUNC_msg_pull_data:
5995 return &bpf_msg_pull_data_proto;
6fff607e
JF
5996 case BPF_FUNC_msg_push_data:
5997 return &bpf_msg_push_data_proto;
7246d8ed
JF
5998 case BPF_FUNC_msg_pop_data:
5999 return &bpf_msg_pop_data_proto;
4f738adb
JF
6000 default:
6001 return bpf_base_func_proto(func_id);
6002 }
6003}
6004
604326b4
DB
6005const struct bpf_func_proto bpf_sk_redirect_map_proto __weak;
6006const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak;
6007
5e43f899
AI
6008static const struct bpf_func_proto *
6009sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
b005fd18
JF
6010{
6011 switch (func_id) {
8a31db56
JF
6012 case BPF_FUNC_skb_store_bytes:
6013 return &bpf_skb_store_bytes_proto;
b005fd18
JF
6014 case BPF_FUNC_skb_load_bytes:
6015 return &bpf_skb_load_bytes_proto;
8a31db56 6016 case BPF_FUNC_skb_pull_data:
0ea488ff 6017 return &sk_skb_pull_data_proto;
8a31db56 6018 case BPF_FUNC_skb_change_tail:
0ea488ff 6019 return &sk_skb_change_tail_proto;
8a31db56 6020 case BPF_FUNC_skb_change_head:
0ea488ff 6021 return &sk_skb_change_head_proto;
b005fd18
JF
6022 case BPF_FUNC_get_socket_cookie:
6023 return &bpf_get_socket_cookie_proto;
6024 case BPF_FUNC_get_socket_uid:
6025 return &bpf_get_socket_uid_proto;
174a79ff
JF
6026 case BPF_FUNC_sk_redirect_map:
6027 return &bpf_sk_redirect_map_proto;
81110384
JF
6028 case BPF_FUNC_sk_redirect_hash:
6029 return &bpf_sk_redirect_hash_proto;
df3f94a0 6030#ifdef CONFIG_INET
6acc9b43
JS
6031 case BPF_FUNC_sk_lookup_tcp:
6032 return &bpf_sk_lookup_tcp_proto;
6033 case BPF_FUNC_sk_lookup_udp:
6034 return &bpf_sk_lookup_udp_proto;
6035 case BPF_FUNC_sk_release:
6036 return &bpf_sk_release_proto;
edbf8c01
LB
6037 case BPF_FUNC_skc_lookup_tcp:
6038 return &bpf_skc_lookup_tcp_proto;
df3f94a0 6039#endif
b005fd18
JF
6040 default:
6041 return bpf_base_func_proto(func_id);
6042 }
6043}
6044
d58e468b
PP
6045static const struct bpf_func_proto *
6046flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6047{
6048 switch (func_id) {
6049 case BPF_FUNC_skb_load_bytes:
6050 return &bpf_skb_load_bytes_proto;
6051 default:
6052 return bpf_base_func_proto(func_id);
6053 }
6054}
6055
cd3092c7
MX
6056static const struct bpf_func_proto *
6057lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6058{
6059 switch (func_id) {
6060 case BPF_FUNC_skb_load_bytes:
6061 return &bpf_skb_load_bytes_proto;
6062 case BPF_FUNC_skb_pull_data:
6063 return &bpf_skb_pull_data_proto;
6064 case BPF_FUNC_csum_diff:
6065 return &bpf_csum_diff_proto;
6066 case BPF_FUNC_get_cgroup_classid:
6067 return &bpf_get_cgroup_classid_proto;
6068 case BPF_FUNC_get_route_realm:
6069 return &bpf_get_route_realm_proto;
6070 case BPF_FUNC_get_hash_recalc:
6071 return &bpf_get_hash_recalc_proto;
6072 case BPF_FUNC_perf_event_output:
6073 return &bpf_skb_event_output_proto;
6074 case BPF_FUNC_get_smp_processor_id:
6075 return &bpf_get_smp_processor_id_proto;
6076 case BPF_FUNC_skb_under_cgroup:
6077 return &bpf_skb_under_cgroup_proto;
6078 default:
6079 return bpf_base_func_proto(func_id);
6080 }
6081}
6082
6083static const struct bpf_func_proto *
6084lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6085{
6086 switch (func_id) {
6087 case BPF_FUNC_lwt_push_encap:
3e0bd37c 6088 return &bpf_lwt_in_push_encap_proto;
cd3092c7
MX
6089 default:
6090 return lwt_out_func_proto(func_id, prog);
6091 }
6092}
6093
3a0af8fd 6094static const struct bpf_func_proto *
5e43f899 6095lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3a0af8fd
TG
6096{
6097 switch (func_id) {
6098 case BPF_FUNC_skb_get_tunnel_key:
6099 return &bpf_skb_get_tunnel_key_proto;
6100 case BPF_FUNC_skb_set_tunnel_key:
6101 return bpf_get_skb_set_tunnel_proto(func_id);
6102 case BPF_FUNC_skb_get_tunnel_opt:
6103 return &bpf_skb_get_tunnel_opt_proto;
6104 case BPF_FUNC_skb_set_tunnel_opt:
6105 return bpf_get_skb_set_tunnel_proto(func_id);
6106 case BPF_FUNC_redirect:
6107 return &bpf_redirect_proto;
6108 case BPF_FUNC_clone_redirect:
6109 return &bpf_clone_redirect_proto;
6110 case BPF_FUNC_skb_change_tail:
6111 return &bpf_skb_change_tail_proto;
6112 case BPF_FUNC_skb_change_head:
6113 return &bpf_skb_change_head_proto;
6114 case BPF_FUNC_skb_store_bytes:
6115 return &bpf_skb_store_bytes_proto;
6116 case BPF_FUNC_csum_update:
6117 return &bpf_csum_update_proto;
6118 case BPF_FUNC_l3_csum_replace:
6119 return &bpf_l3_csum_replace_proto;
6120 case BPF_FUNC_l4_csum_replace:
6121 return &bpf_l4_csum_replace_proto;
6122 case BPF_FUNC_set_hash_invalid:
6123 return &bpf_set_hash_invalid_proto;
3e0bd37c
PO
6124 case BPF_FUNC_lwt_push_encap:
6125 return &bpf_lwt_xmit_push_encap_proto;
3a0af8fd 6126 default:
cd3092c7 6127 return lwt_out_func_proto(func_id, prog);
3a0af8fd
TG
6128 }
6129}
6130
004d4b27
MX
6131static const struct bpf_func_proto *
6132lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6133{
6134 switch (func_id) {
61d76980 6135#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
004d4b27
MX
6136 case BPF_FUNC_lwt_seg6_store_bytes:
6137 return &bpf_lwt_seg6_store_bytes_proto;
6138 case BPF_FUNC_lwt_seg6_action:
6139 return &bpf_lwt_seg6_action_proto;
6140 case BPF_FUNC_lwt_seg6_adjust_srh:
6141 return &bpf_lwt_seg6_adjust_srh_proto;
61d76980 6142#endif
004d4b27
MX
6143 default:
6144 return lwt_out_func_proto(func_id, prog);
3a0af8fd
TG
6145 }
6146}
6147
f96da094 6148static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
5e43f899 6149 const struct bpf_prog *prog,
f96da094 6150 struct bpf_insn_access_aux *info)
23994631 6151{
f96da094 6152 const int size_default = sizeof(__u32);
23994631 6153
9bac3d6d
AS
6154 if (off < 0 || off >= sizeof(struct __sk_buff))
6155 return false;
62c7989b 6156
4936e352 6157 /* The verifier guarantees that size > 0. */
9bac3d6d
AS
6158 if (off % size != 0)
6159 return false;
62c7989b
DB
6160
6161 switch (off) {
f96da094
DB
6162 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
6163 if (off + size > offsetofend(struct __sk_buff, cb[4]))
62c7989b
DB
6164 return false;
6165 break;
8a31db56
JF
6166 case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
6167 case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
6168 case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
6169 case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
f96da094 6170 case bpf_ctx_range(struct __sk_buff, data):
de8f3a83 6171 case bpf_ctx_range(struct __sk_buff, data_meta):
f96da094
DB
6172 case bpf_ctx_range(struct __sk_buff, data_end):
6173 if (size != size_default)
23994631 6174 return false;
31fd8581 6175 break;
b7df9ada
DB
6176 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
6177 if (size != sizeof(__u64))
d58e468b
PP
6178 return false;
6179 break;
f11216b2
VD
6180 case bpf_ctx_range(struct __sk_buff, tstamp):
6181 if (size != sizeof(__u64))
6182 return false;
6183 break;
46f8bc92
MKL
6184 case offsetof(struct __sk_buff, sk):
6185 if (type == BPF_WRITE || size != sizeof(__u64))
6186 return false;
6187 info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
6188 break;
31fd8581 6189 default:
f96da094 6190 /* Only narrow read access allowed for now. */
31fd8581 6191 if (type == BPF_WRITE) {
f96da094 6192 if (size != size_default)
31fd8581
YS
6193 return false;
6194 } else {
f96da094
DB
6195 bpf_ctx_record_field_size(info, size_default);
6196 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
23994631 6197 return false;
31fd8581 6198 }
62c7989b 6199 }
9bac3d6d
AS
6200
6201 return true;
6202}
6203
d691f9e8 6204static bool sk_filter_is_valid_access(int off, int size,
19de99f7 6205 enum bpf_access_type type,
5e43f899 6206 const struct bpf_prog *prog,
23994631 6207 struct bpf_insn_access_aux *info)
d691f9e8 6208{
db58ba45 6209 switch (off) {
f96da094
DB
6210 case bpf_ctx_range(struct __sk_buff, tc_classid):
6211 case bpf_ctx_range(struct __sk_buff, data):
de8f3a83 6212 case bpf_ctx_range(struct __sk_buff, data_meta):
f96da094 6213 case bpf_ctx_range(struct __sk_buff, data_end):
b7df9ada 6214 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
8a31db56 6215 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
f11216b2 6216 case bpf_ctx_range(struct __sk_buff, tstamp):
e3da08d0 6217 case bpf_ctx_range(struct __sk_buff, wire_len):
045efa82 6218 return false;
db58ba45 6219 }
045efa82 6220
d691f9e8
AS
6221 if (type == BPF_WRITE) {
6222 switch (off) {
f96da094 6223 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
d691f9e8
AS
6224 break;
6225 default:
6226 return false;
6227 }
6228 }
6229
5e43f899 6230 return bpf_skb_is_valid_access(off, size, type, prog, info);
d691f9e8
AS
6231}
6232
b39b5f41
SL
6233static bool cg_skb_is_valid_access(int off, int size,
6234 enum bpf_access_type type,
6235 const struct bpf_prog *prog,
6236 struct bpf_insn_access_aux *info)
6237{
6238 switch (off) {
6239 case bpf_ctx_range(struct __sk_buff, tc_classid):
6240 case bpf_ctx_range(struct __sk_buff, data_meta):
b7df9ada 6241 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
e3da08d0 6242 case bpf_ctx_range(struct __sk_buff, wire_len):
b39b5f41 6243 return false;
ab21c1b5
DB
6244 case bpf_ctx_range(struct __sk_buff, data):
6245 case bpf_ctx_range(struct __sk_buff, data_end):
6246 if (!capable(CAP_SYS_ADMIN))
6247 return false;
6248 break;
b39b5f41 6249 }
ab21c1b5 6250
b39b5f41
SL
6251 if (type == BPF_WRITE) {
6252 switch (off) {
6253 case bpf_ctx_range(struct __sk_buff, mark):
6254 case bpf_ctx_range(struct __sk_buff, priority):
6255 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
6256 break;
f11216b2
VD
6257 case bpf_ctx_range(struct __sk_buff, tstamp):
6258 if (!capable(CAP_SYS_ADMIN))
6259 return false;
6260 break;
b39b5f41
SL
6261 default:
6262 return false;
6263 }
6264 }
6265
6266 switch (off) {
6267 case bpf_ctx_range(struct __sk_buff, data):
6268 info->reg_type = PTR_TO_PACKET;
6269 break;
6270 case bpf_ctx_range(struct __sk_buff, data_end):
6271 info->reg_type = PTR_TO_PACKET_END;
6272 break;
6273 }
6274
6275 return bpf_skb_is_valid_access(off, size, type, prog, info);
6276}
6277
3a0af8fd
TG
6278static bool lwt_is_valid_access(int off, int size,
6279 enum bpf_access_type type,
5e43f899 6280 const struct bpf_prog *prog,
23994631 6281 struct bpf_insn_access_aux *info)
3a0af8fd
TG
6282{
6283 switch (off) {
f96da094 6284 case bpf_ctx_range(struct __sk_buff, tc_classid):
8a31db56 6285 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
de8f3a83 6286 case bpf_ctx_range(struct __sk_buff, data_meta):
b7df9ada 6287 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
f11216b2 6288 case bpf_ctx_range(struct __sk_buff, tstamp):
e3da08d0 6289 case bpf_ctx_range(struct __sk_buff, wire_len):
3a0af8fd
TG
6290 return false;
6291 }
6292
6293 if (type == BPF_WRITE) {
6294 switch (off) {
f96da094
DB
6295 case bpf_ctx_range(struct __sk_buff, mark):
6296 case bpf_ctx_range(struct __sk_buff, priority):
6297 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
3a0af8fd
TG
6298 break;
6299 default:
6300 return false;
6301 }
6302 }
6303
f96da094
DB
6304 switch (off) {
6305 case bpf_ctx_range(struct __sk_buff, data):
6306 info->reg_type = PTR_TO_PACKET;
6307 break;
6308 case bpf_ctx_range(struct __sk_buff, data_end):
6309 info->reg_type = PTR_TO_PACKET_END;
6310 break;
6311 }
6312
5e43f899 6313 return bpf_skb_is_valid_access(off, size, type, prog, info);
3a0af8fd
TG
6314}
6315
aac3fc32
AI
6316/* Attach type specific accesses */
6317static bool __sock_filter_check_attach_type(int off,
6318 enum bpf_access_type access_type,
6319 enum bpf_attach_type attach_type)
61023658 6320{
aac3fc32
AI
6321 switch (off) {
6322 case offsetof(struct bpf_sock, bound_dev_if):
6323 case offsetof(struct bpf_sock, mark):
6324 case offsetof(struct bpf_sock, priority):
6325 switch (attach_type) {
6326 case BPF_CGROUP_INET_SOCK_CREATE:
6327 goto full_access;
6328 default:
6329 return false;
6330 }
6331 case bpf_ctx_range(struct bpf_sock, src_ip4):
6332 switch (attach_type) {
6333 case BPF_CGROUP_INET4_POST_BIND:
6334 goto read_only;
6335 default:
6336 return false;
6337 }
6338 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
6339 switch (attach_type) {
6340 case BPF_CGROUP_INET6_POST_BIND:
6341 goto read_only;
6342 default:
6343 return false;
6344 }
6345 case bpf_ctx_range(struct bpf_sock, src_port):
6346 switch (attach_type) {
6347 case BPF_CGROUP_INET4_POST_BIND:
6348 case BPF_CGROUP_INET6_POST_BIND:
6349 goto read_only;
61023658
DA
6350 default:
6351 return false;
6352 }
6353 }
aac3fc32
AI
6354read_only:
6355 return access_type == BPF_READ;
6356full_access:
6357 return true;
6358}
6359
46f8bc92
MKL
6360bool bpf_sock_common_is_valid_access(int off, int size,
6361 enum bpf_access_type type,
aac3fc32
AI
6362 struct bpf_insn_access_aux *info)
6363{
aac3fc32 6364 switch (off) {
46f8bc92
MKL
6365 case bpf_ctx_range_till(struct bpf_sock, type, priority):
6366 return false;
6367 default:
6368 return bpf_sock_is_valid_access(off, size, type, info);
aac3fc32 6369 }
aac3fc32
AI
6370}
6371
c64b7983
JS
6372bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
6373 struct bpf_insn_access_aux *info)
aac3fc32 6374{
aa65d696
MKL
6375 const int size_default = sizeof(__u32);
6376
aac3fc32 6377 if (off < 0 || off >= sizeof(struct bpf_sock))
61023658 6378 return false;
61023658
DA
6379 if (off % size != 0)
6380 return false;
aa65d696
MKL
6381
6382 switch (off) {
6383 case offsetof(struct bpf_sock, state):
6384 case offsetof(struct bpf_sock, family):
6385 case offsetof(struct bpf_sock, type):
6386 case offsetof(struct bpf_sock, protocol):
6387 case offsetof(struct bpf_sock, dst_port):
6388 case offsetof(struct bpf_sock, src_port):
6389 case bpf_ctx_range(struct bpf_sock, src_ip4):
6390 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
6391 case bpf_ctx_range(struct bpf_sock, dst_ip4):
6392 case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
6393 bpf_ctx_record_field_size(info, size_default);
6394 return bpf_ctx_narrow_access_ok(off, size, size_default);
6395 }
6396
6397 return size == size_default;
61023658
DA
6398}
6399
c64b7983
JS
6400static bool sock_filter_is_valid_access(int off, int size,
6401 enum bpf_access_type type,
6402 const struct bpf_prog *prog,
6403 struct bpf_insn_access_aux *info)
6404{
6405 if (!bpf_sock_is_valid_access(off, size, type, info))
6406 return false;
6407 return __sock_filter_check_attach_type(off, type,
6408 prog->expected_attach_type);
6409}
6410
b09928b9
DB
6411static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write,
6412 const struct bpf_prog *prog)
6413{
6414 /* Neither direct read nor direct write requires any preliminary
6415 * action.
6416 */
6417 return 0;
6418}
6419
047b0ecd
DB
6420static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
6421 const struct bpf_prog *prog, int drop_verdict)
36bbef52
DB
6422{
6423 struct bpf_insn *insn = insn_buf;
6424
6425 if (!direct_write)
6426 return 0;
6427
6428 /* if (!skb->cloned)
6429 * goto start;
6430 *
6431 * (Fast-path, otherwise approximation that we might be
6432 * a clone, do the rest in helper.)
6433 */
6434 *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET());
6435 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
6436 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7);
6437
6438 /* ret = bpf_skb_pull_data(skb, 0); */
6439 *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
6440 *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
6441 *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
6442 BPF_FUNC_skb_pull_data);
6443 /* if (!ret)
6444 * goto restore;
6445 * return TC_ACT_SHOT;
6446 */
6447 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
047b0ecd 6448 *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
36bbef52
DB
6449 *insn++ = BPF_EXIT_INSN();
6450
6451 /* restore: */
6452 *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
6453 /* start: */
6454 *insn++ = prog->insnsi[0];
6455
6456 return insn - insn_buf;
6457}
6458
e0cea7ce
DB
6459static int bpf_gen_ld_abs(const struct bpf_insn *orig,
6460 struct bpf_insn *insn_buf)
6461{
6462 bool indirect = BPF_MODE(orig->code) == BPF_IND;
6463 struct bpf_insn *insn = insn_buf;
6464
6465 /* We're guaranteed here that CTX is in R6. */
6466 *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);
6467 if (!indirect) {
6468 *insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm);
6469 } else {
6470 *insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg);
6471 if (orig->imm)
6472 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm);
6473 }
6474
6475 switch (BPF_SIZE(orig->code)) {
6476 case BPF_B:
6477 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache);
6478 break;
6479 case BPF_H:
6480 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache);
6481 break;
6482 case BPF_W:
6483 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache);
6484 break;
6485 }
6486
6487 *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 2);
6488 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
6489 *insn++ = BPF_EXIT_INSN();
6490
6491 return insn - insn_buf;
6492}
6493
047b0ecd
DB
6494static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
6495 const struct bpf_prog *prog)
6496{
6497 return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
6498}
6499
d691f9e8 6500static bool tc_cls_act_is_valid_access(int off, int size,
19de99f7 6501 enum bpf_access_type type,
5e43f899 6502 const struct bpf_prog *prog,
23994631 6503 struct bpf_insn_access_aux *info)
d691f9e8
AS
6504{
6505 if (type == BPF_WRITE) {
6506 switch (off) {
f96da094
DB
6507 case bpf_ctx_range(struct __sk_buff, mark):
6508 case bpf_ctx_range(struct __sk_buff, tc_index):
6509 case bpf_ctx_range(struct __sk_buff, priority):
6510 case bpf_ctx_range(struct __sk_buff, tc_classid):
6511 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
f11216b2 6512 case bpf_ctx_range(struct __sk_buff, tstamp):
74e31ca8 6513 case bpf_ctx_range(struct __sk_buff, queue_mapping):
d691f9e8
AS
6514 break;
6515 default:
6516 return false;
6517 }
6518 }
19de99f7 6519
f96da094
DB
6520 switch (off) {
6521 case bpf_ctx_range(struct __sk_buff, data):
6522 info->reg_type = PTR_TO_PACKET;
6523 break;
de8f3a83
DB
6524 case bpf_ctx_range(struct __sk_buff, data_meta):
6525 info->reg_type = PTR_TO_PACKET_META;
6526 break;
f96da094
DB
6527 case bpf_ctx_range(struct __sk_buff, data_end):
6528 info->reg_type = PTR_TO_PACKET_END;
6529 break;
b7df9ada 6530 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
8a31db56
JF
6531 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
6532 return false;
f96da094
DB
6533 }
6534
5e43f899 6535 return bpf_skb_is_valid_access(off, size, type, prog, info);
d691f9e8
AS
6536}
6537
1afaf661 6538static bool __is_valid_xdp_access(int off, int size)
6a773a15
BB
6539{
6540 if (off < 0 || off >= sizeof(struct xdp_md))
6541 return false;
6542 if (off % size != 0)
6543 return false;
6088b582 6544 if (size != sizeof(__u32))
6a773a15
BB
6545 return false;
6546
6547 return true;
6548}
6549
6550static bool xdp_is_valid_access(int off, int size,
6551 enum bpf_access_type type,
5e43f899 6552 const struct bpf_prog *prog,
23994631 6553 struct bpf_insn_access_aux *info)
6a773a15 6554{
0d830032
JK
6555 if (type == BPF_WRITE) {
6556 if (bpf_prog_is_dev_bound(prog->aux)) {
6557 switch (off) {
6558 case offsetof(struct xdp_md, rx_queue_index):
6559 return __is_valid_xdp_access(off, size);
6560 }
6561 }
6a773a15 6562 return false;
0d830032 6563 }
6a773a15
BB
6564
6565 switch (off) {
6566 case offsetof(struct xdp_md, data):
23994631 6567 info->reg_type = PTR_TO_PACKET;
6a773a15 6568 break;
de8f3a83
DB
6569 case offsetof(struct xdp_md, data_meta):
6570 info->reg_type = PTR_TO_PACKET_META;
6571 break;
6a773a15 6572 case offsetof(struct xdp_md, data_end):
23994631 6573 info->reg_type = PTR_TO_PACKET_END;
6a773a15
BB
6574 break;
6575 }
6576
1afaf661 6577 return __is_valid_xdp_access(off, size);
6a773a15
BB
6578}
6579
6580void bpf_warn_invalid_xdp_action(u32 act)
6581{
9beb8bed
DB
6582 const u32 act_max = XDP_REDIRECT;
6583
6584 WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n",
6585 act > act_max ? "Illegal" : "Driver unsupported",
6586 act);
6a773a15
BB
6587}
6588EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
6589
4fbac77d
AI
6590static bool sock_addr_is_valid_access(int off, int size,
6591 enum bpf_access_type type,
6592 const struct bpf_prog *prog,
6593 struct bpf_insn_access_aux *info)
6594{
6595 const int size_default = sizeof(__u32);
6596
6597 if (off < 0 || off >= sizeof(struct bpf_sock_addr))
6598 return false;
6599 if (off % size != 0)
6600 return false;
6601
6602 /* Disallow access to IPv6 fields from IPv4 contex and vise
6603 * versa.
6604 */
6605 switch (off) {
6606 case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
6607 switch (prog->expected_attach_type) {
6608 case BPF_CGROUP_INET4_BIND:
d74bad4e 6609 case BPF_CGROUP_INET4_CONNECT:
1cedee13 6610 case BPF_CGROUP_UDP4_SENDMSG:
4fbac77d
AI
6611 break;
6612 default:
6613 return false;
6614 }
6615 break;
6616 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
6617 switch (prog->expected_attach_type) {
6618 case BPF_CGROUP_INET6_BIND:
d74bad4e 6619 case BPF_CGROUP_INET6_CONNECT:
1cedee13
AI
6620 case BPF_CGROUP_UDP6_SENDMSG:
6621 break;
6622 default:
6623 return false;
6624 }
6625 break;
6626 case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
6627 switch (prog->expected_attach_type) {
6628 case BPF_CGROUP_UDP4_SENDMSG:
6629 break;
6630 default:
6631 return false;
6632 }
6633 break;
6634 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
6635 msg_src_ip6[3]):
6636 switch (prog->expected_attach_type) {
6637 case BPF_CGROUP_UDP6_SENDMSG:
4fbac77d
AI
6638 break;
6639 default:
6640 return false;
6641 }
6642 break;
6643 }
6644
6645 switch (off) {
6646 case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
6647 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
1cedee13
AI
6648 case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
6649 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
6650 msg_src_ip6[3]):
4fbac77d
AI
6651 /* Only narrow read access allowed for now. */
6652 if (type == BPF_READ) {
6653 bpf_ctx_record_field_size(info, size_default);
6654 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
6655 return false;
6656 } else {
6657 if (size != size_default)
6658 return false;
6659 }
6660 break;
6661 case bpf_ctx_range(struct bpf_sock_addr, user_port):
6662 if (size != size_default)
6663 return false;
6664 break;
6665 default:
6666 if (type == BPF_READ) {
6667 if (size != size_default)
6668 return false;
6669 } else {
6670 return false;
6671 }
6672 }
6673
6674 return true;
6675}
6676
44f0e430
LB
6677static bool sock_ops_is_valid_access(int off, int size,
6678 enum bpf_access_type type,
5e43f899 6679 const struct bpf_prog *prog,
44f0e430 6680 struct bpf_insn_access_aux *info)
40304b2a 6681{
44f0e430
LB
6682 const int size_default = sizeof(__u32);
6683
40304b2a
LB
6684 if (off < 0 || off >= sizeof(struct bpf_sock_ops))
6685 return false;
44f0e430 6686
40304b2a
LB
6687 /* The verifier guarantees that size > 0. */
6688 if (off % size != 0)
6689 return false;
40304b2a 6690
40304b2a
LB
6691 if (type == BPF_WRITE) {
6692 switch (off) {
2585cd62 6693 case offsetof(struct bpf_sock_ops, reply):
6f9bd3d7 6694 case offsetof(struct bpf_sock_ops, sk_txhash):
44f0e430
LB
6695 if (size != size_default)
6696 return false;
40304b2a
LB
6697 break;
6698 default:
6699 return false;
6700 }
44f0e430
LB
6701 } else {
6702 switch (off) {
6703 case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
6704 bytes_acked):
6705 if (size != sizeof(__u64))
6706 return false;
6707 break;
6708 default:
6709 if (size != size_default)
6710 return false;
6711 break;
6712 }
40304b2a
LB
6713 }
6714
44f0e430 6715 return true;
40304b2a
LB
6716}
6717
8a31db56
JF
6718static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
6719 const struct bpf_prog *prog)
6720{
047b0ecd 6721 return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP);
8a31db56
JF
6722}
6723
b005fd18
JF
6724static bool sk_skb_is_valid_access(int off, int size,
6725 enum bpf_access_type type,
5e43f899 6726 const struct bpf_prog *prog,
b005fd18
JF
6727 struct bpf_insn_access_aux *info)
6728{
de8f3a83
DB
6729 switch (off) {
6730 case bpf_ctx_range(struct __sk_buff, tc_classid):
6731 case bpf_ctx_range(struct __sk_buff, data_meta):
b7df9ada 6732 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
f11216b2 6733 case bpf_ctx_range(struct __sk_buff, tstamp):
e3da08d0 6734 case bpf_ctx_range(struct __sk_buff, wire_len):
de8f3a83
DB
6735 return false;
6736 }
6737
8a31db56
JF
6738 if (type == BPF_WRITE) {
6739 switch (off) {
8a31db56
JF
6740 case bpf_ctx_range(struct __sk_buff, tc_index):
6741 case bpf_ctx_range(struct __sk_buff, priority):
6742 break;
6743 default:
6744 return false;
6745 }
6746 }
6747
b005fd18 6748 switch (off) {
f7e9cb1e 6749 case bpf_ctx_range(struct __sk_buff, mark):
8a31db56 6750 return false;
b005fd18
JF
6751 case bpf_ctx_range(struct __sk_buff, data):
6752 info->reg_type = PTR_TO_PACKET;
6753 break;
6754 case bpf_ctx_range(struct __sk_buff, data_end):
6755 info->reg_type = PTR_TO_PACKET_END;
6756 break;
6757 }
6758
5e43f899 6759 return bpf_skb_is_valid_access(off, size, type, prog, info);
b005fd18
JF
6760}
6761
4f738adb
JF
6762static bool sk_msg_is_valid_access(int off, int size,
6763 enum bpf_access_type type,
5e43f899 6764 const struct bpf_prog *prog,
4f738adb
JF
6765 struct bpf_insn_access_aux *info)
6766{
6767 if (type == BPF_WRITE)
6768 return false;
6769
bc1b4f01
JF
6770 if (off % size != 0)
6771 return false;
6772
4f738adb
JF
6773 switch (off) {
6774 case offsetof(struct sk_msg_md, data):
6775 info->reg_type = PTR_TO_PACKET;
303def35
JF
6776 if (size != sizeof(__u64))
6777 return false;
4f738adb
JF
6778 break;
6779 case offsetof(struct sk_msg_md, data_end):
6780 info->reg_type = PTR_TO_PACKET_END;
303def35
JF
6781 if (size != sizeof(__u64))
6782 return false;
4f738adb 6783 break;
bc1b4f01
JF
6784 case bpf_ctx_range(struct sk_msg_md, family):
6785 case bpf_ctx_range(struct sk_msg_md, remote_ip4):
6786 case bpf_ctx_range(struct sk_msg_md, local_ip4):
6787 case bpf_ctx_range_till(struct sk_msg_md, remote_ip6[0], remote_ip6[3]):
6788 case bpf_ctx_range_till(struct sk_msg_md, local_ip6[0], local_ip6[3]):
6789 case bpf_ctx_range(struct sk_msg_md, remote_port):
6790 case bpf_ctx_range(struct sk_msg_md, local_port):
6791 case bpf_ctx_range(struct sk_msg_md, size):
303def35
JF
6792 if (size != sizeof(__u32))
6793 return false;
bc1b4f01
JF
6794 break;
6795 default:
4f738adb 6796 return false;
bc1b4f01 6797 }
4f738adb
JF
6798 return true;
6799}
6800
d58e468b
PP
6801static bool flow_dissector_is_valid_access(int off, int size,
6802 enum bpf_access_type type,
6803 const struct bpf_prog *prog,
6804 struct bpf_insn_access_aux *info)
6805{
6806 if (type == BPF_WRITE) {
6807 switch (off) {
6808 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
6809 break;
6810 default:
6811 return false;
6812 }
6813 }
6814
6815 switch (off) {
6816 case bpf_ctx_range(struct __sk_buff, data):
6817 info->reg_type = PTR_TO_PACKET;
6818 break;
6819 case bpf_ctx_range(struct __sk_buff, data_end):
6820 info->reg_type = PTR_TO_PACKET_END;
6821 break;
b7df9ada 6822 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
d58e468b
PP
6823 info->reg_type = PTR_TO_FLOW_KEYS;
6824 break;
6825 case bpf_ctx_range(struct __sk_buff, tc_classid):
6826 case bpf_ctx_range(struct __sk_buff, data_meta):
6827 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
f11216b2 6828 case bpf_ctx_range(struct __sk_buff, tstamp):
e3da08d0 6829 case bpf_ctx_range(struct __sk_buff, wire_len):
d58e468b
PP
6830 return false;
6831 }
6832
6833 return bpf_skb_is_valid_access(off, size, type, prog, info);
6834}
6835
2492d3b8
DB
6836static u32 bpf_convert_ctx_access(enum bpf_access_type type,
6837 const struct bpf_insn *si,
6838 struct bpf_insn *insn_buf,
f96da094 6839 struct bpf_prog *prog, u32 *target_size)
9bac3d6d
AS
6840{
6841 struct bpf_insn *insn = insn_buf;
6b8cc1d1 6842 int off;
9bac3d6d 6843
6b8cc1d1 6844 switch (si->off) {
9bac3d6d 6845 case offsetof(struct __sk_buff, len):
6b8cc1d1 6846 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
6847 bpf_target_off(struct sk_buff, len, 4,
6848 target_size));
9bac3d6d
AS
6849 break;
6850
0b8c707d 6851 case offsetof(struct __sk_buff, protocol):
6b8cc1d1 6852 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
f96da094
DB
6853 bpf_target_off(struct sk_buff, protocol, 2,
6854 target_size));
0b8c707d
DB
6855 break;
6856
27cd5452 6857 case offsetof(struct __sk_buff, vlan_proto):
6b8cc1d1 6858 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
f96da094
DB
6859 bpf_target_off(struct sk_buff, vlan_proto, 2,
6860 target_size));
27cd5452
MS
6861 break;
6862
bcad5718 6863 case offsetof(struct __sk_buff, priority):
754f1e6a 6864 if (type == BPF_WRITE)
6b8cc1d1 6865 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
6866 bpf_target_off(struct sk_buff, priority, 4,
6867 target_size));
754f1e6a 6868 else
6b8cc1d1 6869 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
6870 bpf_target_off(struct sk_buff, priority, 4,
6871 target_size));
bcad5718
DB
6872 break;
6873
37e82c2f 6874 case offsetof(struct __sk_buff, ingress_ifindex):
6b8cc1d1 6875 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
6876 bpf_target_off(struct sk_buff, skb_iif, 4,
6877 target_size));
37e82c2f
AS
6878 break;
6879
6880 case offsetof(struct __sk_buff, ifindex):
f035a515 6881 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
6b8cc1d1 6882 si->dst_reg, si->src_reg,
37e82c2f 6883 offsetof(struct sk_buff, dev));
6b8cc1d1
DB
6884 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
6885 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
f96da094
DB
6886 bpf_target_off(struct net_device, ifindex, 4,
6887 target_size));
37e82c2f
AS
6888 break;
6889
ba7591d8 6890 case offsetof(struct __sk_buff, hash):
6b8cc1d1 6891 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
6892 bpf_target_off(struct sk_buff, hash, 4,
6893 target_size));
ba7591d8
DB
6894 break;
6895
9bac3d6d 6896 case offsetof(struct __sk_buff, mark):
d691f9e8 6897 if (type == BPF_WRITE)
6b8cc1d1 6898 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
6899 bpf_target_off(struct sk_buff, mark, 4,
6900 target_size));
d691f9e8 6901 else
6b8cc1d1 6902 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
6903 bpf_target_off(struct sk_buff, mark, 4,
6904 target_size));
d691f9e8 6905 break;
9bac3d6d
AS
6906
6907 case offsetof(struct __sk_buff, pkt_type):
f96da094
DB
6908 *target_size = 1;
6909 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
6910 PKT_TYPE_OFFSET());
6911 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
6912#ifdef __BIG_ENDIAN_BITFIELD
6913 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5);
6914#endif
6915 break;
9bac3d6d
AS
6916
6917 case offsetof(struct __sk_buff, queue_mapping):
74e31ca8
JDB
6918 if (type == BPF_WRITE) {
6919 *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1);
6920 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
6921 bpf_target_off(struct sk_buff,
6922 queue_mapping,
6923 2, target_size));
6924 } else {
6925 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
6926 bpf_target_off(struct sk_buff,
6927 queue_mapping,
6928 2, target_size));
6929 }
f96da094 6930 break;
c2497395 6931
c2497395 6932 case offsetof(struct __sk_buff, vlan_present):
9c212255
MM
6933 *target_size = 1;
6934 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
6935 PKT_VLAN_PRESENT_OFFSET());
6936 if (PKT_VLAN_PRESENT_BIT)
6937 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, PKT_VLAN_PRESENT_BIT);
6938 if (PKT_VLAN_PRESENT_BIT < 7)
6939 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
6940 break;
f96da094 6941
9c212255 6942 case offsetof(struct __sk_buff, vlan_tci):
f96da094
DB
6943 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
6944 bpf_target_off(struct sk_buff, vlan_tci, 2,
6945 target_size));
f96da094 6946 break;
d691f9e8
AS
6947
6948 case offsetof(struct __sk_buff, cb[0]) ...
f96da094 6949 offsetofend(struct __sk_buff, cb[4]) - 1:
d691f9e8 6950 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
62c7989b
DB
6951 BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
6952 offsetof(struct qdisc_skb_cb, data)) %
6953 sizeof(__u64));
d691f9e8 6954
ff936a04 6955 prog->cb_access = 1;
6b8cc1d1
DB
6956 off = si->off;
6957 off -= offsetof(struct __sk_buff, cb[0]);
6958 off += offsetof(struct sk_buff, cb);
6959 off += offsetof(struct qdisc_skb_cb, data);
d691f9e8 6960 if (type == BPF_WRITE)
62c7989b 6961 *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
6b8cc1d1 6962 si->src_reg, off);
d691f9e8 6963 else
62c7989b 6964 *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
6b8cc1d1 6965 si->src_reg, off);
d691f9e8
AS
6966 break;
6967
045efa82 6968 case offsetof(struct __sk_buff, tc_classid):
6b8cc1d1
DB
6969 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, tc_classid) != 2);
6970
6971 off = si->off;
6972 off -= offsetof(struct __sk_buff, tc_classid);
6973 off += offsetof(struct sk_buff, cb);
6974 off += offsetof(struct qdisc_skb_cb, tc_classid);
f96da094 6975 *target_size = 2;
09c37a2c 6976 if (type == BPF_WRITE)
6b8cc1d1
DB
6977 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
6978 si->src_reg, off);
09c37a2c 6979 else
6b8cc1d1
DB
6980 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
6981 si->src_reg, off);
045efa82
DB
6982 break;
6983
db58ba45 6984 case offsetof(struct __sk_buff, data):
f035a515 6985 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
6b8cc1d1 6986 si->dst_reg, si->src_reg,
db58ba45
AS
6987 offsetof(struct sk_buff, data));
6988 break;
6989
de8f3a83
DB
6990 case offsetof(struct __sk_buff, data_meta):
6991 off = si->off;
6992 off -= offsetof(struct __sk_buff, data_meta);
6993 off += offsetof(struct sk_buff, cb);
6994 off += offsetof(struct bpf_skb_data_end, data_meta);
6995 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
6996 si->src_reg, off);
6997 break;
6998
db58ba45 6999 case offsetof(struct __sk_buff, data_end):
6b8cc1d1
DB
7000 off = si->off;
7001 off -= offsetof(struct __sk_buff, data_end);
7002 off += offsetof(struct sk_buff, cb);
7003 off += offsetof(struct bpf_skb_data_end, data_end);
7004 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
7005 si->src_reg, off);
db58ba45
AS
7006 break;
7007
d691f9e8
AS
7008 case offsetof(struct __sk_buff, tc_index):
7009#ifdef CONFIG_NET_SCHED
d691f9e8 7010 if (type == BPF_WRITE)
6b8cc1d1 7011 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
f96da094
DB
7012 bpf_target_off(struct sk_buff, tc_index, 2,
7013 target_size));
d691f9e8 7014 else
6b8cc1d1 7015 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
f96da094
DB
7016 bpf_target_off(struct sk_buff, tc_index, 2,
7017 target_size));
d691f9e8 7018#else
2ed46ce4 7019 *target_size = 2;
d691f9e8 7020 if (type == BPF_WRITE)
6b8cc1d1 7021 *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
d691f9e8 7022 else
6b8cc1d1 7023 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
b1d9fc41
DB
7024#endif
7025 break;
7026
7027 case offsetof(struct __sk_buff, napi_id):
7028#if defined(CONFIG_NET_RX_BUSY_POLL)
b1d9fc41 7029 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
7030 bpf_target_off(struct sk_buff, napi_id, 4,
7031 target_size));
b1d9fc41
DB
7032 *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
7033 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
7034#else
2ed46ce4 7035 *target_size = 4;
b1d9fc41 7036 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
d691f9e8 7037#endif
6b8cc1d1 7038 break;
8a31db56
JF
7039 case offsetof(struct __sk_buff, family):
7040 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
7041
7042 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
7043 si->dst_reg, si->src_reg,
7044 offsetof(struct sk_buff, sk));
7045 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
7046 bpf_target_off(struct sock_common,
7047 skc_family,
7048 2, target_size));
7049 break;
7050 case offsetof(struct __sk_buff, remote_ip4):
7051 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
7052
7053 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
7054 si->dst_reg, si->src_reg,
7055 offsetof(struct sk_buff, sk));
7056 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7057 bpf_target_off(struct sock_common,
7058 skc_daddr,
7059 4, target_size));
7060 break;
7061 case offsetof(struct __sk_buff, local_ip4):
7062 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
7063 skc_rcv_saddr) != 4);
7064
7065 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
7066 si->dst_reg, si->src_reg,
7067 offsetof(struct sk_buff, sk));
7068 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7069 bpf_target_off(struct sock_common,
7070 skc_rcv_saddr,
7071 4, target_size));
7072 break;
7073 case offsetof(struct __sk_buff, remote_ip6[0]) ...
7074 offsetof(struct __sk_buff, remote_ip6[3]):
7075#if IS_ENABLED(CONFIG_IPV6)
7076 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
7077 skc_v6_daddr.s6_addr32[0]) != 4);
7078
7079 off = si->off;
7080 off -= offsetof(struct __sk_buff, remote_ip6[0]);
7081
7082 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
7083 si->dst_reg, si->src_reg,
7084 offsetof(struct sk_buff, sk));
7085 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7086 offsetof(struct sock_common,
7087 skc_v6_daddr.s6_addr32[0]) +
7088 off);
7089#else
7090 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
7091#endif
7092 break;
7093 case offsetof(struct __sk_buff, local_ip6[0]) ...
7094 offsetof(struct __sk_buff, local_ip6[3]):
7095#if IS_ENABLED(CONFIG_IPV6)
7096 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
7097 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
7098
7099 off = si->off;
7100 off -= offsetof(struct __sk_buff, local_ip6[0]);
7101
7102 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
7103 si->dst_reg, si->src_reg,
7104 offsetof(struct sk_buff, sk));
7105 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7106 offsetof(struct sock_common,
7107 skc_v6_rcv_saddr.s6_addr32[0]) +
7108 off);
7109#else
7110 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
7111#endif
7112 break;
7113
7114 case offsetof(struct __sk_buff, remote_port):
7115 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
7116
7117 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
7118 si->dst_reg, si->src_reg,
7119 offsetof(struct sk_buff, sk));
7120 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
7121 bpf_target_off(struct sock_common,
7122 skc_dport,
7123 2, target_size));
7124#ifndef __BIG_ENDIAN_BITFIELD
7125 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
7126#endif
7127 break;
7128
7129 case offsetof(struct __sk_buff, local_port):
7130 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
7131
7132 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
7133 si->dst_reg, si->src_reg,
7134 offsetof(struct sk_buff, sk));
7135 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
7136 bpf_target_off(struct sock_common,
7137 skc_num, 2, target_size));
7138 break;
d58e468b
PP
7139
7140 case offsetof(struct __sk_buff, flow_keys):
7141 off = si->off;
7142 off -= offsetof(struct __sk_buff, flow_keys);
7143 off += offsetof(struct sk_buff, cb);
7144 off += offsetof(struct qdisc_skb_cb, flow_keys);
7145 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
7146 si->src_reg, off);
7147 break;
f11216b2
VD
7148
7149 case offsetof(struct __sk_buff, tstamp):
7150 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
7151
7152 if (type == BPF_WRITE)
7153 *insn++ = BPF_STX_MEM(BPF_DW,
7154 si->dst_reg, si->src_reg,
7155 bpf_target_off(struct sk_buff,
7156 tstamp, 8,
7157 target_size));
7158 else
7159 *insn++ = BPF_LDX_MEM(BPF_DW,
7160 si->dst_reg, si->src_reg,
7161 bpf_target_off(struct sk_buff,
7162 tstamp, 8,
7163 target_size));
e3da08d0
PP
7164 break;
7165
d9ff286a
ED
7166 case offsetof(struct __sk_buff, gso_segs):
7167 /* si->dst_reg = skb_shinfo(SKB); */
7168#ifdef NET_SKBUFF_DATA_USES_OFFSET
7169 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
7170 si->dst_reg, si->src_reg,
7171 offsetof(struct sk_buff, head));
7172 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
7173 BPF_REG_AX, si->src_reg,
7174 offsetof(struct sk_buff, end));
7175 *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
7176#else
7177 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
7178 si->dst_reg, si->src_reg,
7179 offsetof(struct sk_buff, end));
7180#endif
7181 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
7182 si->dst_reg, si->dst_reg,
7183 bpf_target_off(struct skb_shared_info,
7184 gso_segs, 2,
7185 target_size));
7186 break;
e3da08d0
PP
7187 case offsetof(struct __sk_buff, wire_len):
7188 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
7189
7190 off = si->off;
7191 off -= offsetof(struct __sk_buff, wire_len);
7192 off += offsetof(struct sk_buff, cb);
7193 off += offsetof(struct qdisc_skb_cb, pkt_len);
7194 *target_size = 4;
7195 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
46f8bc92
MKL
7196 break;
7197
7198 case offsetof(struct __sk_buff, sk):
7199 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
7200 si->dst_reg, si->src_reg,
7201 offsetof(struct sk_buff, sk));
7202 break;
9bac3d6d
AS
7203 }
7204
7205 return insn - insn_buf;
89aa0758
AS
7206}
7207
c64b7983
JS
7208u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
7209 const struct bpf_insn *si,
7210 struct bpf_insn *insn_buf,
7211 struct bpf_prog *prog, u32 *target_size)
61023658
DA
7212{
7213 struct bpf_insn *insn = insn_buf;
aac3fc32 7214 int off;
61023658 7215
6b8cc1d1 7216 switch (si->off) {
61023658
DA
7217 case offsetof(struct bpf_sock, bound_dev_if):
7218 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
7219
7220 if (type == BPF_WRITE)
6b8cc1d1 7221 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
61023658
DA
7222 offsetof(struct sock, sk_bound_dev_if));
7223 else
6b8cc1d1 7224 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
61023658
DA
7225 offsetof(struct sock, sk_bound_dev_if));
7226 break;
aa4c1037 7227
482dca93
DA
7228 case offsetof(struct bpf_sock, mark):
7229 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4);
7230
7231 if (type == BPF_WRITE)
7232 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
7233 offsetof(struct sock, sk_mark));
7234 else
7235 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
7236 offsetof(struct sock, sk_mark));
7237 break;
7238
7239 case offsetof(struct bpf_sock, priority):
7240 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4);
7241
7242 if (type == BPF_WRITE)
7243 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
7244 offsetof(struct sock, sk_priority));
7245 else
7246 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
7247 offsetof(struct sock, sk_priority));
7248 break;
7249
aa4c1037 7250 case offsetof(struct bpf_sock, family):
aa65d696
MKL
7251 *insn++ = BPF_LDX_MEM(
7252 BPF_FIELD_SIZEOF(struct sock_common, skc_family),
7253 si->dst_reg, si->src_reg,
7254 bpf_target_off(struct sock_common,
7255 skc_family,
7256 FIELD_SIZEOF(struct sock_common,
7257 skc_family),
7258 target_size));
aa4c1037
DA
7259 break;
7260
7261 case offsetof(struct bpf_sock, type):
aa65d696 7262 BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2);
6b8cc1d1 7263 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
aa4c1037 7264 offsetof(struct sock, __sk_flags_offset));
6b8cc1d1
DB
7265 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
7266 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
aa65d696 7267 *target_size = 2;
aa4c1037
DA
7268 break;
7269
7270 case offsetof(struct bpf_sock, protocol):
aa65d696 7271 BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
6b8cc1d1 7272 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
aa4c1037 7273 offsetof(struct sock, __sk_flags_offset));
6b8cc1d1
DB
7274 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
7275 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
aa65d696 7276 *target_size = 1;
aa4c1037 7277 break;
aac3fc32
AI
7278
7279 case offsetof(struct bpf_sock, src_ip4):
7280 *insn++ = BPF_LDX_MEM(
7281 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
7282 bpf_target_off(struct sock_common, skc_rcv_saddr,
7283 FIELD_SIZEOF(struct sock_common,
7284 skc_rcv_saddr),
7285 target_size));
7286 break;
7287
aa65d696
MKL
7288 case offsetof(struct bpf_sock, dst_ip4):
7289 *insn++ = BPF_LDX_MEM(
7290 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
7291 bpf_target_off(struct sock_common, skc_daddr,
7292 FIELD_SIZEOF(struct sock_common,
7293 skc_daddr),
7294 target_size));
7295 break;
7296
aac3fc32
AI
7297 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
7298#if IS_ENABLED(CONFIG_IPV6)
7299 off = si->off;
7300 off -= offsetof(struct bpf_sock, src_ip6[0]);
7301 *insn++ = BPF_LDX_MEM(
7302 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
7303 bpf_target_off(
7304 struct sock_common,
7305 skc_v6_rcv_saddr.s6_addr32[0],
7306 FIELD_SIZEOF(struct sock_common,
7307 skc_v6_rcv_saddr.s6_addr32[0]),
7308 target_size) + off);
7309#else
7310 (void)off;
7311 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
7312#endif
7313 break;
7314
aa65d696
MKL
7315 case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
7316#if IS_ENABLED(CONFIG_IPV6)
7317 off = si->off;
7318 off -= offsetof(struct bpf_sock, dst_ip6[0]);
7319 *insn++ = BPF_LDX_MEM(
7320 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
7321 bpf_target_off(struct sock_common,
7322 skc_v6_daddr.s6_addr32[0],
7323 FIELD_SIZEOF(struct sock_common,
7324 skc_v6_daddr.s6_addr32[0]),
7325 target_size) + off);
7326#else
7327 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
7328 *target_size = 4;
7329#endif
7330 break;
7331
aac3fc32
AI
7332 case offsetof(struct bpf_sock, src_port):
7333 *insn++ = BPF_LDX_MEM(
7334 BPF_FIELD_SIZEOF(struct sock_common, skc_num),
7335 si->dst_reg, si->src_reg,
7336 bpf_target_off(struct sock_common, skc_num,
7337 FIELD_SIZEOF(struct sock_common,
7338 skc_num),
7339 target_size));
7340 break;
aa65d696
MKL
7341
7342 case offsetof(struct bpf_sock, dst_port):
7343 *insn++ = BPF_LDX_MEM(
7344 BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
7345 si->dst_reg, si->src_reg,
7346 bpf_target_off(struct sock_common, skc_dport,
7347 FIELD_SIZEOF(struct sock_common,
7348 skc_dport),
7349 target_size));
7350 break;
7351
7352 case offsetof(struct bpf_sock, state):
7353 *insn++ = BPF_LDX_MEM(
7354 BPF_FIELD_SIZEOF(struct sock_common, skc_state),
7355 si->dst_reg, si->src_reg,
7356 bpf_target_off(struct sock_common, skc_state,
7357 FIELD_SIZEOF(struct sock_common,
7358 skc_state),
7359 target_size));
7360 break;
61023658
DA
7361 }
7362
7363 return insn - insn_buf;
7364}
7365
6b8cc1d1
DB
7366static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
7367 const struct bpf_insn *si,
374fb54e 7368 struct bpf_insn *insn_buf,
f96da094 7369 struct bpf_prog *prog, u32 *target_size)
374fb54e
DB
7370{
7371 struct bpf_insn *insn = insn_buf;
7372
6b8cc1d1 7373 switch (si->off) {
374fb54e 7374 case offsetof(struct __sk_buff, ifindex):
374fb54e 7375 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
6b8cc1d1 7376 si->dst_reg, si->src_reg,
374fb54e 7377 offsetof(struct sk_buff, dev));
6b8cc1d1 7378 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
f96da094
DB
7379 bpf_target_off(struct net_device, ifindex, 4,
7380 target_size));
374fb54e
DB
7381 break;
7382 default:
f96da094
DB
7383 return bpf_convert_ctx_access(type, si, insn_buf, prog,
7384 target_size);
374fb54e
DB
7385 }
7386
7387 return insn - insn_buf;
7388}
7389
6b8cc1d1
DB
7390static u32 xdp_convert_ctx_access(enum bpf_access_type type,
7391 const struct bpf_insn *si,
6a773a15 7392 struct bpf_insn *insn_buf,
f96da094 7393 struct bpf_prog *prog, u32 *target_size)
6a773a15
BB
7394{
7395 struct bpf_insn *insn = insn_buf;
7396
6b8cc1d1 7397 switch (si->off) {
6a773a15 7398 case offsetof(struct xdp_md, data):
f035a515 7399 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
6b8cc1d1 7400 si->dst_reg, si->src_reg,
6a773a15
BB
7401 offsetof(struct xdp_buff, data));
7402 break;
de8f3a83
DB
7403 case offsetof(struct xdp_md, data_meta):
7404 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
7405 si->dst_reg, si->src_reg,
7406 offsetof(struct xdp_buff, data_meta));
7407 break;
6a773a15 7408 case offsetof(struct xdp_md, data_end):
f035a515 7409 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
6b8cc1d1 7410 si->dst_reg, si->src_reg,
6a773a15
BB
7411 offsetof(struct xdp_buff, data_end));
7412 break;
02dd3291
JDB
7413 case offsetof(struct xdp_md, ingress_ifindex):
7414 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
7415 si->dst_reg, si->src_reg,
7416 offsetof(struct xdp_buff, rxq));
7417 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
7418 si->dst_reg, si->dst_reg,
7419 offsetof(struct xdp_rxq_info, dev));
7420 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
daaf24c6 7421 offsetof(struct net_device, ifindex));
02dd3291
JDB
7422 break;
7423 case offsetof(struct xdp_md, rx_queue_index):
7424 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
7425 si->dst_reg, si->src_reg,
7426 offsetof(struct xdp_buff, rxq));
7427 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
daaf24c6
JDB
7428 offsetof(struct xdp_rxq_info,
7429 queue_index));
02dd3291 7430 break;
6a773a15
BB
7431 }
7432
7433 return insn - insn_buf;
7434}
7435
4fbac77d
AI
7436/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
7437 * context Structure, F is Field in context structure that contains a pointer
7438 * to Nested Structure of type NS that has the field NF.
7439 *
7440 * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
7441 * sure that SIZE is not greater than actual size of S.F.NF.
7442 *
7443 * If offset OFF is provided, the load happens from that offset relative to
7444 * offset of NF.
7445 */
7446#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \
7447 do { \
7448 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \
7449 si->src_reg, offsetof(S, F)); \
7450 *insn++ = BPF_LDX_MEM( \
7451 SIZE, si->dst_reg, si->dst_reg, \
7452 bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
7453 target_size) \
7454 + OFF); \
7455 } while (0)
7456
7457#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \
7458 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \
7459 BPF_FIELD_SIZEOF(NS, NF), 0)
7460
7461/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
7462 * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
7463 *
7464 * It doesn't support SIZE argument though since narrow stores are not
7465 * supported for now.
7466 *
7467 * In addition it uses Temporary Field TF (member of struct S) as the 3rd
7468 * "register" since two registers available in convert_ctx_access are not
7469 * enough: we can't override neither SRC, since it contains value to store, nor
7470 * DST since it contains pointer to context that may be used by later
7471 * instructions. But we need a temporary place to save pointer to nested
7472 * structure whose field we want to store to.
7473 */
7474#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \
7475 do { \
7476 int tmp_reg = BPF_REG_9; \
7477 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
7478 --tmp_reg; \
7479 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
7480 --tmp_reg; \
7481 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \
7482 offsetof(S, TF)); \
7483 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
7484 si->dst_reg, offsetof(S, F)); \
7485 *insn++ = BPF_STX_MEM( \
7486 BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \
7487 bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
7488 target_size) \
7489 + OFF); \
7490 *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
7491 offsetof(S, TF)); \
7492 } while (0)
7493
7494#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
7495 TF) \
7496 do { \
7497 if (type == BPF_WRITE) { \
7498 SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \
7499 TF); \
7500 } else { \
7501 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
7502 S, NS, F, NF, SIZE, OFF); \
7503 } \
7504 } while (0)
7505
7506#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \
7507 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \
7508 S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
7509
7510static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
7511 const struct bpf_insn *si,
7512 struct bpf_insn *insn_buf,
7513 struct bpf_prog *prog, u32 *target_size)
7514{
7515 struct bpf_insn *insn = insn_buf;
7516 int off;
7517
7518 switch (si->off) {
7519 case offsetof(struct bpf_sock_addr, user_family):
7520 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
7521 struct sockaddr, uaddr, sa_family);
7522 break;
7523
7524 case offsetof(struct bpf_sock_addr, user_ip4):
7525 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
7526 struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
7527 sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
7528 break;
7529
7530 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
7531 off = si->off;
7532 off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
7533 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
7534 struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
7535 sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
7536 tmp_reg);
7537 break;
7538
7539 case offsetof(struct bpf_sock_addr, user_port):
7540 /* To get port we need to know sa_family first and then treat
7541 * sockaddr as either sockaddr_in or sockaddr_in6.
7542 * Though we can simplify since port field has same offset and
7543 * size in both structures.
7544 * Here we check this invariant and use just one of the
7545 * structures if it's true.
7546 */
7547 BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
7548 offsetof(struct sockaddr_in6, sin6_port));
7549 BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) !=
7550 FIELD_SIZEOF(struct sockaddr_in6, sin6_port));
7551 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern,
7552 struct sockaddr_in6, uaddr,
7553 sin6_port, tmp_reg);
7554 break;
7555
7556 case offsetof(struct bpf_sock_addr, family):
7557 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
7558 struct sock, sk, sk_family);
7559 break;
7560
7561 case offsetof(struct bpf_sock_addr, type):
7562 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
7563 struct bpf_sock_addr_kern, struct sock, sk,
7564 __sk_flags_offset, BPF_W, 0);
7565 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
7566 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
7567 break;
7568
7569 case offsetof(struct bpf_sock_addr, protocol):
7570 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
7571 struct bpf_sock_addr_kern, struct sock, sk,
7572 __sk_flags_offset, BPF_W, 0);
7573 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
7574 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
7575 SK_FL_PROTO_SHIFT);
7576 break;
1cedee13
AI
7577
7578 case offsetof(struct bpf_sock_addr, msg_src_ip4):
7579 /* Treat t_ctx as struct in_addr for msg_src_ip4. */
7580 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
7581 struct bpf_sock_addr_kern, struct in_addr, t_ctx,
7582 s_addr, BPF_SIZE(si->code), 0, tmp_reg);
7583 break;
7584
7585 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
7586 msg_src_ip6[3]):
7587 off = si->off;
7588 off -= offsetof(struct bpf_sock_addr, msg_src_ip6[0]);
7589 /* Treat t_ctx as struct in6_addr for msg_src_ip6. */
7590 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
7591 struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
7592 s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg);
7593 break;
4fbac77d
AI
7594 }
7595
7596 return insn - insn_buf;
7597}
7598
40304b2a
LB
7599static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
7600 const struct bpf_insn *si,
7601 struct bpf_insn *insn_buf,
f96da094
DB
7602 struct bpf_prog *prog,
7603 u32 *target_size)
40304b2a
LB
7604{
7605 struct bpf_insn *insn = insn_buf;
7606 int off;
7607
9b1f3d6e
MKL
7608/* Helper macro for adding read access to tcp_sock or sock fields. */
7609#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
7610 do { \
7611 BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
7612 FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
7613 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
7614 struct bpf_sock_ops_kern, \
7615 is_fullsock), \
7616 si->dst_reg, si->src_reg, \
7617 offsetof(struct bpf_sock_ops_kern, \
7618 is_fullsock)); \
7619 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \
7620 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
7621 struct bpf_sock_ops_kern, sk),\
7622 si->dst_reg, si->src_reg, \
7623 offsetof(struct bpf_sock_ops_kern, sk));\
7624 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
7625 OBJ_FIELD), \
7626 si->dst_reg, si->dst_reg, \
7627 offsetof(OBJ, OBJ_FIELD)); \
7628 } while (0)
7629
7630#define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
7631 SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)
7632
7633/* Helper macro for adding write access to tcp_sock or sock fields.
7634 * The macro is called with two registers, dst_reg which contains a pointer
7635 * to ctx (context) and src_reg which contains the value that should be
7636 * stored. However, we need an additional register since we cannot overwrite
7637 * dst_reg because it may be used later in the program.
7638 * Instead we "borrow" one of the other register. We first save its value
7639 * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
7640 * it at the end of the macro.
7641 */
7642#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
7643 do { \
7644 int reg = BPF_REG_9; \
7645 BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
7646 FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
7647 if (si->dst_reg == reg || si->src_reg == reg) \
7648 reg--; \
7649 if (si->dst_reg == reg || si->src_reg == reg) \
7650 reg--; \
7651 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
7652 offsetof(struct bpf_sock_ops_kern, \
7653 temp)); \
7654 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
7655 struct bpf_sock_ops_kern, \
7656 is_fullsock), \
7657 reg, si->dst_reg, \
7658 offsetof(struct bpf_sock_ops_kern, \
7659 is_fullsock)); \
7660 *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
7661 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
7662 struct bpf_sock_ops_kern, sk),\
7663 reg, si->dst_reg, \
7664 offsetof(struct bpf_sock_ops_kern, sk));\
7665 *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
7666 reg, si->src_reg, \
7667 offsetof(OBJ, OBJ_FIELD)); \
7668 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
7669 offsetof(struct bpf_sock_ops_kern, \
7670 temp)); \
7671 } while (0)
7672
7673#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
7674 do { \
7675 if (TYPE == BPF_WRITE) \
7676 SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
7677 else \
7678 SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
7679 } while (0)
7680
7681 CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_sock_ops,
7682 SOCK_OPS_GET_TCP_SOCK_FIELD);
7683
7684 if (insn > insn_buf)
7685 return insn - insn_buf;
7686
40304b2a
LB
7687 switch (si->off) {
7688 case offsetof(struct bpf_sock_ops, op) ...
7689 offsetof(struct bpf_sock_ops, replylong[3]):
7690 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) !=
7691 FIELD_SIZEOF(struct bpf_sock_ops_kern, op));
7692 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) !=
7693 FIELD_SIZEOF(struct bpf_sock_ops_kern, reply));
7694 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) !=
7695 FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong));
7696 off = si->off;
7697 off -= offsetof(struct bpf_sock_ops, op);
7698 off += offsetof(struct bpf_sock_ops_kern, op);
7699 if (type == BPF_WRITE)
7700 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
7701 off);
7702 else
7703 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
7704 off);
7705 break;
7706
7707 case offsetof(struct bpf_sock_ops, family):
7708 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
7709
7710 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
7711 struct bpf_sock_ops_kern, sk),
7712 si->dst_reg, si->src_reg,
7713 offsetof(struct bpf_sock_ops_kern, sk));
7714 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
7715 offsetof(struct sock_common, skc_family));
7716 break;
7717
7718 case offsetof(struct bpf_sock_ops, remote_ip4):
7719 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
7720
7721 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
7722 struct bpf_sock_ops_kern, sk),
7723 si->dst_reg, si->src_reg,
7724 offsetof(struct bpf_sock_ops_kern, sk));
7725 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7726 offsetof(struct sock_common, skc_daddr));
7727 break;
7728
7729 case offsetof(struct bpf_sock_ops, local_ip4):
303def35
JF
7730 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
7731 skc_rcv_saddr) != 4);
40304b2a
LB
7732
7733 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
7734 struct bpf_sock_ops_kern, sk),
7735 si->dst_reg, si->src_reg,
7736 offsetof(struct bpf_sock_ops_kern, sk));
7737 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7738 offsetof(struct sock_common,
7739 skc_rcv_saddr));
7740 break;
7741
7742 case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
7743 offsetof(struct bpf_sock_ops, remote_ip6[3]):
7744#if IS_ENABLED(CONFIG_IPV6)
7745 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
7746 skc_v6_daddr.s6_addr32[0]) != 4);
7747
7748 off = si->off;
7749 off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
7750 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
7751 struct bpf_sock_ops_kern, sk),
7752 si->dst_reg, si->src_reg,
7753 offsetof(struct bpf_sock_ops_kern, sk));
7754 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7755 offsetof(struct sock_common,
7756 skc_v6_daddr.s6_addr32[0]) +
7757 off);
7758#else
7759 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
7760#endif
7761 break;
7762
7763 case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
7764 offsetof(struct bpf_sock_ops, local_ip6[3]):
7765#if IS_ENABLED(CONFIG_IPV6)
7766 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
7767 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
7768
7769 off = si->off;
7770 off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
7771 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
7772 struct bpf_sock_ops_kern, sk),
7773 si->dst_reg, si->src_reg,
7774 offsetof(struct bpf_sock_ops_kern, sk));
7775 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7776 offsetof(struct sock_common,
7777 skc_v6_rcv_saddr.s6_addr32[0]) +
7778 off);
7779#else
7780 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
7781#endif
7782 break;
7783
7784 case offsetof(struct bpf_sock_ops, remote_port):
7785 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
7786
7787 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
7788 struct bpf_sock_ops_kern, sk),
7789 si->dst_reg, si->src_reg,
7790 offsetof(struct bpf_sock_ops_kern, sk));
7791 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
7792 offsetof(struct sock_common, skc_dport));
7793#ifndef __BIG_ENDIAN_BITFIELD
7794 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
7795#endif
7796 break;
7797
7798 case offsetof(struct bpf_sock_ops, local_port):
7799 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
7800
7801 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
7802 struct bpf_sock_ops_kern, sk),
7803 si->dst_reg, si->src_reg,
7804 offsetof(struct bpf_sock_ops_kern, sk));
7805 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
7806 offsetof(struct sock_common, skc_num));
7807 break;
f19397a5
LB
7808
7809 case offsetof(struct bpf_sock_ops, is_fullsock):
7810 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
7811 struct bpf_sock_ops_kern,
7812 is_fullsock),
7813 si->dst_reg, si->src_reg,
7814 offsetof(struct bpf_sock_ops_kern,
7815 is_fullsock));
7816 break;
7817
44f0e430
LB
7818 case offsetof(struct bpf_sock_ops, state):
7819 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1);
7820
7821 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
7822 struct bpf_sock_ops_kern, sk),
7823 si->dst_reg, si->src_reg,
7824 offsetof(struct bpf_sock_ops_kern, sk));
7825 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
7826 offsetof(struct sock_common, skc_state));
7827 break;
7828
7829 case offsetof(struct bpf_sock_ops, rtt_min):
7830 BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
7831 sizeof(struct minmax));
7832 BUILD_BUG_ON(sizeof(struct minmax) <
7833 sizeof(struct minmax_sample));
7834
7835 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
7836 struct bpf_sock_ops_kern, sk),
7837 si->dst_reg, si->src_reg,
7838 offsetof(struct bpf_sock_ops_kern, sk));
7839 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7840 offsetof(struct tcp_sock, rtt_min) +
7841 FIELD_SIZEOF(struct minmax_sample, t));
7842 break;
7843
b13d8807
LB
7844 case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
7845 SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
7846 struct tcp_sock);
7847 break;
44f0e430 7848
44f0e430 7849 case offsetof(struct bpf_sock_ops, sk_txhash):
6f9bd3d7
LB
7850 SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
7851 struct sock, type);
44f0e430 7852 break;
40304b2a
LB
7853 }
7854 return insn - insn_buf;
7855}
7856
8108a775
JF
7857static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
7858 const struct bpf_insn *si,
7859 struct bpf_insn *insn_buf,
7860 struct bpf_prog *prog, u32 *target_size)
7861{
7862 struct bpf_insn *insn = insn_buf;
7863 int off;
7864
7865 switch (si->off) {
7866 case offsetof(struct __sk_buff, data_end):
7867 off = si->off;
7868 off -= offsetof(struct __sk_buff, data_end);
7869 off += offsetof(struct sk_buff, cb);
7870 off += offsetof(struct tcp_skb_cb, bpf.data_end);
7871 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
7872 si->src_reg, off);
7873 break;
7874 default:
7875 return bpf_convert_ctx_access(type, si, insn_buf, prog,
7876 target_size);
7877 }
7878
7879 return insn - insn_buf;
7880}
7881
4f738adb
JF
7882static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
7883 const struct bpf_insn *si,
7884 struct bpf_insn *insn_buf,
7885 struct bpf_prog *prog, u32 *target_size)
7886{
7887 struct bpf_insn *insn = insn_buf;
720e7f38 7888#if IS_ENABLED(CONFIG_IPV6)
303def35 7889 int off;
720e7f38 7890#endif
4f738adb 7891
7a69c0f2
JF
7892 /* convert ctx uses the fact sg element is first in struct */
7893 BUILD_BUG_ON(offsetof(struct sk_msg, sg) != 0);
7894
4f738adb
JF
7895 switch (si->off) {
7896 case offsetof(struct sk_msg_md, data):
604326b4 7897 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data),
4f738adb 7898 si->dst_reg, si->src_reg,
604326b4 7899 offsetof(struct sk_msg, data));
4f738adb
JF
7900 break;
7901 case offsetof(struct sk_msg_md, data_end):
604326b4 7902 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data_end),
4f738adb 7903 si->dst_reg, si->src_reg,
604326b4 7904 offsetof(struct sk_msg, data_end));
4f738adb 7905 break;
303def35
JF
7906 case offsetof(struct sk_msg_md, family):
7907 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
7908
7909 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 7910 struct sk_msg, sk),
303def35 7911 si->dst_reg, si->src_reg,
604326b4 7912 offsetof(struct sk_msg, sk));
303def35
JF
7913 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
7914 offsetof(struct sock_common, skc_family));
7915 break;
7916
7917 case offsetof(struct sk_msg_md, remote_ip4):
7918 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
7919
7920 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 7921 struct sk_msg, sk),
303def35 7922 si->dst_reg, si->src_reg,
604326b4 7923 offsetof(struct sk_msg, sk));
303def35
JF
7924 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7925 offsetof(struct sock_common, skc_daddr));
7926 break;
7927
7928 case offsetof(struct sk_msg_md, local_ip4):
7929 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
7930 skc_rcv_saddr) != 4);
7931
7932 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 7933 struct sk_msg, sk),
303def35 7934 si->dst_reg, si->src_reg,
604326b4 7935 offsetof(struct sk_msg, sk));
303def35
JF
7936 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7937 offsetof(struct sock_common,
7938 skc_rcv_saddr));
7939 break;
7940
7941 case offsetof(struct sk_msg_md, remote_ip6[0]) ...
7942 offsetof(struct sk_msg_md, remote_ip6[3]):
7943#if IS_ENABLED(CONFIG_IPV6)
7944 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
7945 skc_v6_daddr.s6_addr32[0]) != 4);
7946
7947 off = si->off;
7948 off -= offsetof(struct sk_msg_md, remote_ip6[0]);
7949 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 7950 struct sk_msg, sk),
303def35 7951 si->dst_reg, si->src_reg,
604326b4 7952 offsetof(struct sk_msg, sk));
303def35
JF
7953 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7954 offsetof(struct sock_common,
7955 skc_v6_daddr.s6_addr32[0]) +
7956 off);
7957#else
7958 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
7959#endif
7960 break;
7961
7962 case offsetof(struct sk_msg_md, local_ip6[0]) ...
7963 offsetof(struct sk_msg_md, local_ip6[3]):
7964#if IS_ENABLED(CONFIG_IPV6)
7965 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
7966 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
7967
7968 off = si->off;
7969 off -= offsetof(struct sk_msg_md, local_ip6[0]);
7970 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 7971 struct sk_msg, sk),
303def35 7972 si->dst_reg, si->src_reg,
604326b4 7973 offsetof(struct sk_msg, sk));
303def35
JF
7974 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
7975 offsetof(struct sock_common,
7976 skc_v6_rcv_saddr.s6_addr32[0]) +
7977 off);
7978#else
7979 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
7980#endif
7981 break;
7982
7983 case offsetof(struct sk_msg_md, remote_port):
7984 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
7985
7986 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 7987 struct sk_msg, sk),
303def35 7988 si->dst_reg, si->src_reg,
604326b4 7989 offsetof(struct sk_msg, sk));
303def35
JF
7990 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
7991 offsetof(struct sock_common, skc_dport));
7992#ifndef __BIG_ENDIAN_BITFIELD
7993 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
7994#endif
7995 break;
7996
7997 case offsetof(struct sk_msg_md, local_port):
7998 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
7999
8000 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 8001 struct sk_msg, sk),
303def35 8002 si->dst_reg, si->src_reg,
604326b4 8003 offsetof(struct sk_msg, sk));
303def35
JF
8004 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8005 offsetof(struct sock_common, skc_num));
8006 break;
3bdbd022
JF
8007
8008 case offsetof(struct sk_msg_md, size):
8009 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_sg, size),
8010 si->dst_reg, si->src_reg,
8011 offsetof(struct sk_msg_sg, size));
8012 break;
4f738adb
JF
8013 }
8014
8015 return insn - insn_buf;
8016}
8017
7de16e3a 8018const struct bpf_verifier_ops sk_filter_verifier_ops = {
4936e352
DB
8019 .get_func_proto = sk_filter_func_proto,
8020 .is_valid_access = sk_filter_is_valid_access,
2492d3b8 8021 .convert_ctx_access = bpf_convert_ctx_access,
e0cea7ce 8022 .gen_ld_abs = bpf_gen_ld_abs,
89aa0758
AS
8023};
8024
7de16e3a 8025const struct bpf_prog_ops sk_filter_prog_ops = {
61f3c964 8026 .test_run = bpf_prog_test_run_skb,
7de16e3a
JK
8027};
8028
8029const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
4936e352
DB
8030 .get_func_proto = tc_cls_act_func_proto,
8031 .is_valid_access = tc_cls_act_is_valid_access,
374fb54e 8032 .convert_ctx_access = tc_cls_act_convert_ctx_access,
36bbef52 8033 .gen_prologue = tc_cls_act_prologue,
e0cea7ce 8034 .gen_ld_abs = bpf_gen_ld_abs,
7de16e3a
JK
8035};
8036
8037const struct bpf_prog_ops tc_cls_act_prog_ops = {
1cf1cae9 8038 .test_run = bpf_prog_test_run_skb,
608cd71a
AS
8039};
8040
7de16e3a 8041const struct bpf_verifier_ops xdp_verifier_ops = {
6a773a15
BB
8042 .get_func_proto = xdp_func_proto,
8043 .is_valid_access = xdp_is_valid_access,
8044 .convert_ctx_access = xdp_convert_ctx_access,
b09928b9 8045 .gen_prologue = bpf_noop_prologue,
7de16e3a
JK
8046};
8047
8048const struct bpf_prog_ops xdp_prog_ops = {
1cf1cae9 8049 .test_run = bpf_prog_test_run_xdp,
6a773a15
BB
8050};
8051
7de16e3a 8052const struct bpf_verifier_ops cg_skb_verifier_ops = {
cd339431 8053 .get_func_proto = cg_skb_func_proto,
b39b5f41 8054 .is_valid_access = cg_skb_is_valid_access,
2492d3b8 8055 .convert_ctx_access = bpf_convert_ctx_access,
7de16e3a
JK
8056};
8057
8058const struct bpf_prog_ops cg_skb_prog_ops = {
1cf1cae9 8059 .test_run = bpf_prog_test_run_skb,
0e33661d
DM
8060};
8061
cd3092c7
MX
8062const struct bpf_verifier_ops lwt_in_verifier_ops = {
8063 .get_func_proto = lwt_in_func_proto,
3a0af8fd 8064 .is_valid_access = lwt_is_valid_access,
2492d3b8 8065 .convert_ctx_access = bpf_convert_ctx_access,
7de16e3a
JK
8066};
8067
cd3092c7
MX
8068const struct bpf_prog_ops lwt_in_prog_ops = {
8069 .test_run = bpf_prog_test_run_skb,
8070};
8071
8072const struct bpf_verifier_ops lwt_out_verifier_ops = {
8073 .get_func_proto = lwt_out_func_proto,
3a0af8fd 8074 .is_valid_access = lwt_is_valid_access,
2492d3b8 8075 .convert_ctx_access = bpf_convert_ctx_access,
7de16e3a
JK
8076};
8077
cd3092c7 8078const struct bpf_prog_ops lwt_out_prog_ops = {
1cf1cae9 8079 .test_run = bpf_prog_test_run_skb,
3a0af8fd
TG
8080};
8081
7de16e3a 8082const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
3a0af8fd
TG
8083 .get_func_proto = lwt_xmit_func_proto,
8084 .is_valid_access = lwt_is_valid_access,
2492d3b8 8085 .convert_ctx_access = bpf_convert_ctx_access,
3a0af8fd 8086 .gen_prologue = tc_cls_act_prologue,
7de16e3a
JK
8087};
8088
8089const struct bpf_prog_ops lwt_xmit_prog_ops = {
1cf1cae9 8090 .test_run = bpf_prog_test_run_skb,
3a0af8fd
TG
8091};
8092
004d4b27
MX
8093const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
8094 .get_func_proto = lwt_seg6local_func_proto,
8095 .is_valid_access = lwt_is_valid_access,
8096 .convert_ctx_access = bpf_convert_ctx_access,
8097};
8098
8099const struct bpf_prog_ops lwt_seg6local_prog_ops = {
8100 .test_run = bpf_prog_test_run_skb,
8101};
8102
7de16e3a 8103const struct bpf_verifier_ops cg_sock_verifier_ops = {
ae2cf1c4 8104 .get_func_proto = sock_filter_func_proto,
61023658 8105 .is_valid_access = sock_filter_is_valid_access,
c64b7983 8106 .convert_ctx_access = bpf_sock_convert_ctx_access,
61023658
DA
8107};
8108
7de16e3a
JK
8109const struct bpf_prog_ops cg_sock_prog_ops = {
8110};
8111
4fbac77d
AI
8112const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
8113 .get_func_proto = sock_addr_func_proto,
8114 .is_valid_access = sock_addr_is_valid_access,
8115 .convert_ctx_access = sock_addr_convert_ctx_access,
8116};
8117
8118const struct bpf_prog_ops cg_sock_addr_prog_ops = {
8119};
8120
7de16e3a 8121const struct bpf_verifier_ops sock_ops_verifier_ops = {
8c4b4c7e 8122 .get_func_proto = sock_ops_func_proto,
40304b2a
LB
8123 .is_valid_access = sock_ops_is_valid_access,
8124 .convert_ctx_access = sock_ops_convert_ctx_access,
8125};
8126
7de16e3a
JK
8127const struct bpf_prog_ops sock_ops_prog_ops = {
8128};
8129
8130const struct bpf_verifier_ops sk_skb_verifier_ops = {
b005fd18
JF
8131 .get_func_proto = sk_skb_func_proto,
8132 .is_valid_access = sk_skb_is_valid_access,
8108a775 8133 .convert_ctx_access = sk_skb_convert_ctx_access,
8a31db56 8134 .gen_prologue = sk_skb_prologue,
b005fd18
JF
8135};
8136
7de16e3a
JK
8137const struct bpf_prog_ops sk_skb_prog_ops = {
8138};
8139
4f738adb
JF
8140const struct bpf_verifier_ops sk_msg_verifier_ops = {
8141 .get_func_proto = sk_msg_func_proto,
8142 .is_valid_access = sk_msg_is_valid_access,
8143 .convert_ctx_access = sk_msg_convert_ctx_access,
b09928b9 8144 .gen_prologue = bpf_noop_prologue,
4f738adb
JF
8145};
8146
8147const struct bpf_prog_ops sk_msg_prog_ops = {
8148};
8149
d58e468b
PP
8150const struct bpf_verifier_ops flow_dissector_verifier_ops = {
8151 .get_func_proto = flow_dissector_func_proto,
8152 .is_valid_access = flow_dissector_is_valid_access,
8153 .convert_ctx_access = bpf_convert_ctx_access,
8154};
8155
8156const struct bpf_prog_ops flow_dissector_prog_ops = {
b7a1848e 8157 .test_run = bpf_prog_test_run_flow_dissector,
d58e468b
PP
8158};
8159
8ced425e 8160int sk_detach_filter(struct sock *sk)
55b33325
PE
8161{
8162 int ret = -ENOENT;
8163 struct sk_filter *filter;
8164
d59577b6
VB
8165 if (sock_flag(sk, SOCK_FILTER_LOCKED))
8166 return -EPERM;
8167
8ced425e
HFS
8168 filter = rcu_dereference_protected(sk->sk_filter,
8169 lockdep_sock_is_held(sk));
55b33325 8170 if (filter) {
a9b3cd7f 8171 RCU_INIT_POINTER(sk->sk_filter, NULL);
46bcf14f 8172 sk_filter_uncharge(sk, filter);
55b33325
PE
8173 ret = 0;
8174 }
a3ea269b 8175
55b33325
PE
8176 return ret;
8177}
8ced425e 8178EXPORT_SYMBOL_GPL(sk_detach_filter);
a8fc9277 8179
a3ea269b
DB
8180int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
8181 unsigned int len)
a8fc9277 8182{
a3ea269b 8183 struct sock_fprog_kern *fprog;
a8fc9277 8184 struct sk_filter *filter;
a3ea269b 8185 int ret = 0;
a8fc9277
PE
8186
8187 lock_sock(sk);
8188 filter = rcu_dereference_protected(sk->sk_filter,
8ced425e 8189 lockdep_sock_is_held(sk));
a8fc9277
PE
8190 if (!filter)
8191 goto out;
a3ea269b
DB
8192
8193 /* We're copying the filter that has been originally attached,
93d08b69
DB
8194 * so no conversion/decode needed anymore. eBPF programs that
8195 * have no original program cannot be dumped through this.
a3ea269b 8196 */
93d08b69 8197 ret = -EACCES;
7ae457c1 8198 fprog = filter->prog->orig_prog;
93d08b69
DB
8199 if (!fprog)
8200 goto out;
a3ea269b
DB
8201
8202 ret = fprog->len;
a8fc9277 8203 if (!len)
a3ea269b 8204 /* User space only enquires number of filter blocks. */
a8fc9277 8205 goto out;
a3ea269b 8206
a8fc9277 8207 ret = -EINVAL;
a3ea269b 8208 if (len < fprog->len)
a8fc9277
PE
8209 goto out;
8210
8211 ret = -EFAULT;
009937e7 8212 if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
a3ea269b 8213 goto out;
a8fc9277 8214
a3ea269b
DB
8215 /* Instead of bytes, the API requests to return the number
8216 * of filter blocks.
8217 */
8218 ret = fprog->len;
a8fc9277
PE
8219out:
8220 release_sock(sk);
8221 return ret;
8222}
2dbb9b9e
MKL
8223
8224#ifdef CONFIG_INET
8225struct sk_reuseport_kern {
8226 struct sk_buff *skb;
8227 struct sock *sk;
8228 struct sock *selected_sk;
8229 void *data_end;
8230 u32 hash;
8231 u32 reuseport_id;
8232 bool bind_inany;
8233};
8234
8235static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
8236 struct sock_reuseport *reuse,
8237 struct sock *sk, struct sk_buff *skb,
8238 u32 hash)
8239{
8240 reuse_kern->skb = skb;
8241 reuse_kern->sk = sk;
8242 reuse_kern->selected_sk = NULL;
8243 reuse_kern->data_end = skb->data + skb_headlen(skb);
8244 reuse_kern->hash = hash;
8245 reuse_kern->reuseport_id = reuse->reuseport_id;
8246 reuse_kern->bind_inany = reuse->bind_inany;
8247}
8248
8249struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
8250 struct bpf_prog *prog, struct sk_buff *skb,
8251 u32 hash)
8252{
8253 struct sk_reuseport_kern reuse_kern;
8254 enum sk_action action;
8255
8256 bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash);
8257 action = BPF_PROG_RUN(prog, &reuse_kern);
8258
8259 if (action == SK_PASS)
8260 return reuse_kern.selected_sk;
8261 else
8262 return ERR_PTR(-ECONNREFUSED);
8263}
8264
8265BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
8266 struct bpf_map *, map, void *, key, u32, flags)
8267{
8268 struct sock_reuseport *reuse;
8269 struct sock *selected_sk;
8270
8271 selected_sk = map->ops->map_lookup_elem(map, key);
8272 if (!selected_sk)
8273 return -ENOENT;
8274
8275 reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
8276 if (!reuse)
8277 /* selected_sk is unhashed (e.g. by close()) after the
8278 * above map_lookup_elem(). Treat selected_sk has already
8279 * been removed from the map.
8280 */
8281 return -ENOENT;
8282
8283 if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
8284 struct sock *sk;
8285
8286 if (unlikely(!reuse_kern->reuseport_id))
8287 /* There is a small race between adding the
8288 * sk to the map and setting the
8289 * reuse_kern->reuseport_id.
8290 * Treat it as the sk has not been added to
8291 * the bpf map yet.
8292 */
8293 return -ENOENT;
8294
8295 sk = reuse_kern->sk;
8296 if (sk->sk_protocol != selected_sk->sk_protocol)
8297 return -EPROTOTYPE;
8298 else if (sk->sk_family != selected_sk->sk_family)
8299 return -EAFNOSUPPORT;
8300
8301 /* Catch all. Likely bound to a different sockaddr. */
8302 return -EBADFD;
8303 }
8304
8305 reuse_kern->selected_sk = selected_sk;
8306
8307 return 0;
8308}
8309
8310static const struct bpf_func_proto sk_select_reuseport_proto = {
8311 .func = sk_select_reuseport,
8312 .gpl_only = false,
8313 .ret_type = RET_INTEGER,
8314 .arg1_type = ARG_PTR_TO_CTX,
8315 .arg2_type = ARG_CONST_MAP_PTR,
8316 .arg3_type = ARG_PTR_TO_MAP_KEY,
8317 .arg4_type = ARG_ANYTHING,
8318};
8319
8320BPF_CALL_4(sk_reuseport_load_bytes,
8321 const struct sk_reuseport_kern *, reuse_kern, u32, offset,
8322 void *, to, u32, len)
8323{
8324 return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len);
8325}
8326
8327static const struct bpf_func_proto sk_reuseport_load_bytes_proto = {
8328 .func = sk_reuseport_load_bytes,
8329 .gpl_only = false,
8330 .ret_type = RET_INTEGER,
8331 .arg1_type = ARG_PTR_TO_CTX,
8332 .arg2_type = ARG_ANYTHING,
8333 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
8334 .arg4_type = ARG_CONST_SIZE,
8335};
8336
8337BPF_CALL_5(sk_reuseport_load_bytes_relative,
8338 const struct sk_reuseport_kern *, reuse_kern, u32, offset,
8339 void *, to, u32, len, u32, start_header)
8340{
8341 return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to,
8342 len, start_header);
8343}
8344
8345static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = {
8346 .func = sk_reuseport_load_bytes_relative,
8347 .gpl_only = false,
8348 .ret_type = RET_INTEGER,
8349 .arg1_type = ARG_PTR_TO_CTX,
8350 .arg2_type = ARG_ANYTHING,
8351 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
8352 .arg4_type = ARG_CONST_SIZE,
8353 .arg5_type = ARG_ANYTHING,
8354};
8355
8356static const struct bpf_func_proto *
8357sk_reuseport_func_proto(enum bpf_func_id func_id,
8358 const struct bpf_prog *prog)
8359{
8360 switch (func_id) {
8361 case BPF_FUNC_sk_select_reuseport:
8362 return &sk_select_reuseport_proto;
8363 case BPF_FUNC_skb_load_bytes:
8364 return &sk_reuseport_load_bytes_proto;
8365 case BPF_FUNC_skb_load_bytes_relative:
8366 return &sk_reuseport_load_bytes_relative_proto;
8367 default:
8368 return bpf_base_func_proto(func_id);
8369 }
8370}
8371
8372static bool
8373sk_reuseport_is_valid_access(int off, int size,
8374 enum bpf_access_type type,
8375 const struct bpf_prog *prog,
8376 struct bpf_insn_access_aux *info)
8377{
8378 const u32 size_default = sizeof(__u32);
8379
8380 if (off < 0 || off >= sizeof(struct sk_reuseport_md) ||
8381 off % size || type != BPF_READ)
8382 return false;
8383
8384 switch (off) {
8385 case offsetof(struct sk_reuseport_md, data):
8386 info->reg_type = PTR_TO_PACKET;
8387 return size == sizeof(__u64);
8388
8389 case offsetof(struct sk_reuseport_md, data_end):
8390 info->reg_type = PTR_TO_PACKET_END;
8391 return size == sizeof(__u64);
8392
8393 case offsetof(struct sk_reuseport_md, hash):
8394 return size == size_default;
8395
8396 /* Fields that allow narrowing */
8397 case offsetof(struct sk_reuseport_md, eth_protocol):
8398 if (size < FIELD_SIZEOF(struct sk_buff, protocol))
8399 return false;
4597b62f 8400 /* fall through */
2dbb9b9e
MKL
8401 case offsetof(struct sk_reuseport_md, ip_protocol):
8402 case offsetof(struct sk_reuseport_md, bind_inany):
8403 case offsetof(struct sk_reuseport_md, len):
8404 bpf_ctx_record_field_size(info, size_default);
8405 return bpf_ctx_narrow_access_ok(off, size, size_default);
8406
8407 default:
8408 return false;
8409 }
8410}
8411
8412#define SK_REUSEPORT_LOAD_FIELD(F) ({ \
8413 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
8414 si->dst_reg, si->src_reg, \
8415 bpf_target_off(struct sk_reuseport_kern, F, \
8416 FIELD_SIZEOF(struct sk_reuseport_kern, F), \
8417 target_size)); \
8418 })
8419
8420#define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD) \
8421 SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
8422 struct sk_buff, \
8423 skb, \
8424 SKB_FIELD)
8425
8426#define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \
8427 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern, \
8428 struct sock, \
8429 sk, \
8430 SK_FIELD, BPF_SIZE, EXTRA_OFF)
8431
8432static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
8433 const struct bpf_insn *si,
8434 struct bpf_insn *insn_buf,
8435 struct bpf_prog *prog,
8436 u32 *target_size)
8437{
8438 struct bpf_insn *insn = insn_buf;
8439
8440 switch (si->off) {
8441 case offsetof(struct sk_reuseport_md, data):
8442 SK_REUSEPORT_LOAD_SKB_FIELD(data);
8443 break;
8444
8445 case offsetof(struct sk_reuseport_md, len):
8446 SK_REUSEPORT_LOAD_SKB_FIELD(len);
8447 break;
8448
8449 case offsetof(struct sk_reuseport_md, eth_protocol):
8450 SK_REUSEPORT_LOAD_SKB_FIELD(protocol);
8451 break;
8452
8453 case offsetof(struct sk_reuseport_md, ip_protocol):
3f6e138d 8454 BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
2dbb9b9e
MKL
8455 SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
8456 BPF_W, 0);
8457 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
8458 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
8459 SK_FL_PROTO_SHIFT);
8460 /* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian
8461 * aware. No further narrowing or masking is needed.
8462 */
8463 *target_size = 1;
8464 break;
8465
8466 case offsetof(struct sk_reuseport_md, data_end):
8467 SK_REUSEPORT_LOAD_FIELD(data_end);
8468 break;
8469
8470 case offsetof(struct sk_reuseport_md, hash):
8471 SK_REUSEPORT_LOAD_FIELD(hash);
8472 break;
8473
8474 case offsetof(struct sk_reuseport_md, bind_inany):
8475 SK_REUSEPORT_LOAD_FIELD(bind_inany);
8476 break;
8477 }
8478
8479 return insn - insn_buf;
8480}
8481
8482const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
8483 .get_func_proto = sk_reuseport_func_proto,
8484 .is_valid_access = sk_reuseport_is_valid_access,
8485 .convert_ctx_access = sk_reuseport_convert_ctx_access,
8486};
8487
8488const struct bpf_prog_ops sk_reuseport_prog_ops = {
8489};
8490#endif /* CONFIG_INET */