1 From a94f54cc2145889b0a3462dbff2c0671462afc46 Mon Sep 17 00:00:00 2001
2 From: Jesper Dangaard Brouer <brouer@redhat.com>
3 Date: Fri, 29 Mar 2019 10:18:00 +0100
4 Subject: xdp: fix cpumap redirect SKB creation bug
6 [ Upstream commit 676e4a6fe703f2dae699ee9d56f14516f9ada4ea ]
8 We want to avoid leaking pointer info from xdp_frame (that is placed in
9 top of frame) like commit 6dfb970d3dbd ("xdp: avoid leaking info stored in
10 frame data on page reuse"), and followup commit 97e19cce05e5 ("bpf:
11 reserve xdp_frame size in xdp headroom") that reserve this headroom.
13 These changes also affected how cpumap constructed SKBs, as xdpf->headroom
14 size changed, the skb data starting point were in-effect shifted with 32
15 bytes (sizeof xdp_frame). This was still okay, as the cpumap frame_size
16 calculation also included xdpf->headroom which were reduced by same amount.
18 A bug was introduced in commit 77ea5f4cbe20 ("bpf/cpumap: make sure
19 frame_size for build_skb is aligned if headroom isn't"), where the
20 xdpf->headroom became part of the SKB_DATA_ALIGN rounding up. This
21 round-up to find the frame_size is in principle still correct as it does
22 not exceed the 2048 bytes frame_size (which is max for ixgbe and i40e),
23 but the 32 bytes offset of pkt_data_start puts this over the 2048 bytes
24 limit. This cause skb_shared_info to spill into next frame. It is a little
25 hard to trigger, as the SKB need to use above 15 skb_shinfo->frags[] as
26 far as I calculate. This does happen in practise for TCP streams when
27 skb_try_coalesce() kicks in.
29 KASAN can be used to detect these wrong memory accesses, I've seen:
30 BUG: KASAN: use-after-free in skb_try_coalesce+0x3cb/0x760
31 BUG: KASAN: wild-memory-access in skb_release_data+0xe2/0x250
33 Driver veth also construct a SKB from xdp_frame in this way, but is not
34 affected, as it doesn't reserve/deduct the room (used by xdp_frame) from
35 the SKB headroom. Instead is clears the pointers via xdp_scrub_frame(),
36 and allows SKB to use this area.
38 The fix in this patch is to do like veth and instead allow SKB to (re)use
39 the area occupied by xdp_frame, by clearing via xdp_scrub_frame(). (This
40 does kill the idea of the SKB being able to access (mem) info from this
41 area, but I guess it was a bad idea anyhow, and it was already killed by
44 Fixes: 77ea5f4cbe20 ("bpf/cpumap: make sure frame_size for build_skb is aligned if headroom isn't")
45 Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
46 Signed-off-by: Alexei Starovoitov <ast@kernel.org>
47 Signed-off-by: Sasha Levin (Microsoft) <sashal@kernel.org>
49 kernel/bpf/cpumap.c | 13 ++++++++++---
50 1 file changed, 10 insertions(+), 3 deletions(-)
52 diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
53 index 8974b3755670..3c18260403dd 100644
54 --- a/kernel/bpf/cpumap.c
55 +++ b/kernel/bpf/cpumap.c
56 @@ -162,10 +162,14 @@ static void cpu_map_kthread_stop(struct work_struct *work)
57 static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
58 struct xdp_frame *xdpf)
60 + unsigned int hard_start_headroom;
61 unsigned int frame_size;
65 + /* Part of headroom was reserved to xdpf */
66 + hard_start_headroom = sizeof(struct xdp_frame) + xdpf->headroom;
68 /* build_skb need to place skb_shared_info after SKB end, and
69 * also want to know the memory "truesize". Thus, need to
70 * know the memory frame size backing xdp_buff.
71 @@ -183,15 +187,15 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
72 * is not at a fixed memory location, with mixed length
73 * packets, which is bad for cache-line hotness.
75 - frame_size = SKB_DATA_ALIGN(xdpf->len + xdpf->headroom) +
76 + frame_size = SKB_DATA_ALIGN(xdpf->len + hard_start_headroom) +
77 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
79 - pkt_data_start = xdpf->data - xdpf->headroom;
80 + pkt_data_start = xdpf->data - hard_start_headroom;
81 skb = build_skb(pkt_data_start, frame_size);
85 - skb_reserve(skb, xdpf->headroom);
86 + skb_reserve(skb, hard_start_headroom);
87 __skb_put(skb, xdpf->len);
89 skb_metadata_set(skb, xdpf->metasize);
90 @@ -205,6 +209,9 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
91 * - RX ring dev queue index (skb_record_rx_queue)
94 + /* Allow SKB to reuse area used by xdp_frame */
95 + xdp_scrub_frame(xdpf);