From: Greg Kroah-Hartman Date: Thu, 3 Feb 2022 18:07:15 +0000 (+0100) Subject: 4.14-stable patches X-Git-Tag: v5.4.177~16 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d3d10a8f240dd0c06afa0d62eca0421574f12539;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: bpf-fix-truncated-jump-targets-on-heavy-expansions.patch netfilter-nat-limit-port-clash-resolution-attempts.patch netfilter-nat-remove-l4-protocol-port-rovers.patch --- diff --git a/queue-4.14/bpf-fix-truncated-jump-targets-on-heavy-expansions.patch b/queue-4.14/bpf-fix-truncated-jump-targets-on-heavy-expansions.patch new file mode 100644 index 00000000000..e2a1a5bcc4a --- /dev/null +++ b/queue-4.14/bpf-fix-truncated-jump-targets-on-heavy-expansions.patch @@ -0,0 +1,202 @@ +From 050fad7c4534c13c8eb1d9c2ba66012e014773cb Mon Sep 17 00:00:00 2001 +From: Daniel Borkmann +Date: Thu, 17 May 2018 01:44:11 +0200 +Subject: bpf: fix truncated jump targets on heavy expansions + +From: Daniel Borkmann + +commit 050fad7c4534c13c8eb1d9c2ba66012e014773cb upstream. + +Recently during testing, I ran into the following panic: + + [ 207.892422] Internal error: Accessing user space memory outside uaccess.h routines: 96000004 [#1] SMP + [ 207.901637] Modules linked in: binfmt_misc [...] + [ 207.966530] CPU: 45 PID: 2256 Comm: test_verifier Tainted: G W 4.17.0-rc3+ #7 + [ 207.974956] Hardware name: FOXCONN R2-1221R-A4/C2U4N_MB, BIOS G31FB18A 03/31/2017 + [ 207.982428] pstate: 60400005 (nZCv daif +PAN -UAO) + [ 207.987214] pc : bpf_skb_load_helper_8_no_cache+0x34/0xc0 + [ 207.992603] lr : 0xffff000000bdb754 + [ 207.996080] sp : ffff000013703ca0 + [ 207.999384] x29: ffff000013703ca0 x28: 0000000000000001 + [ 208.004688] x27: 0000000000000001 x26: 0000000000000000 + [ 208.009992] x25: ffff000013703ce0 x24: ffff800fb4afcb00 + [ 208.015295] x23: ffff00007d2f5038 x22: ffff00007d2f5000 + [ 208.020599] x21: fffffffffeff2a6f x20: 000000000000000a + [ 208.025903] x19: ffff000009578000 x18: 0000000000000a03 + [ 208.031206] x17: 0000000000000000 x16: 0000000000000000 + [ 208.036510] x15: 0000ffff9de83000 x14: 0000000000000000 + [ 208.041813] x13: 0000000000000000 x12: 0000000000000000 + [ 208.047116] x11: 0000000000000001 x10: ffff0000089e7f18 + [ 208.052419] x9 : fffffffffeff2a6f x8 : 0000000000000000 + [ 208.057723] x7 : 000000000000000a x6 : 00280c6160000000 + [ 208.063026] x5 : 0000000000000018 x4 : 0000000000007db6 + [ 208.068329] x3 : 000000000008647a x2 : 19868179b1484500 + [ 208.073632] x1 : 0000000000000000 x0 : ffff000009578c08 + [ 208.078938] Process test_verifier (pid: 2256, stack limit = 0x0000000049ca7974) + [ 208.086235] Call trace: + [ 208.088672] bpf_skb_load_helper_8_no_cache+0x34/0xc0 + [ 208.093713] 0xffff000000bdb754 + [ 208.096845] bpf_test_run+0x78/0xf8 + [ 208.100324] bpf_prog_test_run_skb+0x148/0x230 + [ 208.104758] sys_bpf+0x314/0x1198 + [ 208.108064] el0_svc_naked+0x30/0x34 + [ 208.111632] Code: 91302260 f9400001 f9001fa1 d2800001 (29500680) + [ 208.117717] ---[ end trace 263cb8a59b5bf29f ]--- + +The program itself which caused this had a long jump over the whole +instruction sequence where all of the inner instructions required +heavy expansions into multiple BPF instructions. Additionally, I also +had BPF hardening enabled which requires once more rewrites of all +constant values in order to blind them. Each time we rewrite insns, +bpf_adj_branches() would need to potentially adjust branch targets +which cross the patchlet boundary to accommodate for the additional +delta. Eventually that lead to the case where the target offset could +not fit into insn->off's upper 0x7fff limit anymore where then offset +wraps around becoming negative (in s16 universe), or vice versa +depending on the jump direction. + +Therefore it becomes necessary to detect and reject any such occasions +in a generic way for native eBPF and cBPF to eBPF migrations. For +the latter we can simply check bounds in the bpf_convert_filter()'s +BPF_EMIT_JMP helper macro and bail out once we surpass limits. The +bpf_patch_insn_single() for native eBPF (and cBPF to eBPF in case +of subsequent hardening) is a bit more complex in that we need to +detect such truncations before hitting the bpf_prog_realloc(). Thus +the latter is split into an extra pass to probe problematic offsets +on the original program in order to fail early. With that in place +and carefully tested I no longer hit the panic and the rewrites are +rejected properly. The above example panic I've seen on bpf-next, +though the issue itself is generic in that a guard against this issue +in bpf seems more appropriate in this case. + +Signed-off-by: Daniel Borkmann +Acked-by: Martin KaFai Lau +Signed-off-by: Alexei Starovoitov +[ab: Dropped BPF_PSEUDO_CALL hardening, introoduced in 4.16] +Signed-off-by: Alessio Balsini +Acked-by: Thadeu Lima de Souza Cascardo +Signed-off-by: Greg Kroah-Hartman +--- + kernel/bpf/core.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++-------- + net/core/filter.c | 11 ++++++++-- + 2 files changed, 60 insertions(+), 10 deletions(-) + +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -228,27 +228,57 @@ static bool bpf_is_jmp_and_has_target(co + BPF_OP(insn->code) != BPF_EXIT; + } + +-static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta) ++static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta, ++ u32 curr, const bool probe_pass) + { ++ const s32 off_min = S16_MIN, off_max = S16_MAX; ++ s32 off = insn->off; ++ ++ if (curr < pos && curr + off + 1 > pos) ++ off += delta; ++ else if (curr > pos + delta && curr + off + 1 <= pos + delta) ++ off -= delta; ++ if (off < off_min || off > off_max) ++ return -ERANGE; ++ if (!probe_pass) ++ insn->off = off; ++ return 0; ++} ++ ++static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta, ++ const bool probe_pass) ++{ ++ u32 i, insn_cnt = prog->len + (probe_pass ? delta : 0); + struct bpf_insn *insn = prog->insnsi; +- u32 i, insn_cnt = prog->len; ++ int ret = 0; + + for (i = 0; i < insn_cnt; i++, insn++) { ++ /* In the probing pass we still operate on the original, ++ * unpatched image in order to check overflows before we ++ * do any other adjustments. Therefore skip the patchlet. ++ */ ++ if (probe_pass && i == pos) { ++ i += delta + 1; ++ insn++; ++ } ++ + if (!bpf_is_jmp_and_has_target(insn)) + continue; + +- /* Adjust offset of jmps if we cross boundaries. */ +- if (i < pos && i + insn->off + 1 > pos) +- insn->off += delta; +- else if (i > pos + delta && i + insn->off + 1 <= pos + delta) +- insn->off -= delta; ++ /* Adjust offset of jmps if we cross patch boundaries. */ ++ ret = bpf_adj_delta_to_off(insn, pos, delta, i, probe_pass); ++ if (ret) ++ break; + } ++ ++ return ret; + } + + struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, + const struct bpf_insn *patch, u32 len) + { + u32 insn_adj_cnt, insn_rest, insn_delta = len - 1; ++ const u32 cnt_max = S16_MAX; + struct bpf_prog *prog_adj; + + /* Since our patchlet doesn't expand the image, we're done. */ +@@ -259,6 +289,15 @@ struct bpf_prog *bpf_patch_insn_single(s + + insn_adj_cnt = prog->len + insn_delta; + ++ /* Reject anything that would potentially let the insn->off ++ * target overflow when we have excessive program expansions. ++ * We need to probe here before we do any reallocation where ++ * we afterwards may not fail anymore. ++ */ ++ if (insn_adj_cnt > cnt_max && ++ bpf_adj_branches(prog, off, insn_delta, true)) ++ return NULL; ++ + /* Several new instructions need to be inserted. Make room + * for them. Likely, there's no need for a new allocation as + * last page could have large enough tailroom. +@@ -284,7 +323,11 @@ struct bpf_prog *bpf_patch_insn_single(s + sizeof(*patch) * insn_rest); + memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len); + +- bpf_adj_branches(prog_adj, off, insn_delta); ++ /* We are guaranteed to not fail at this point, otherwise ++ * the ship has sailed to reverse to the original state. An ++ * overflow cannot happen at this point. ++ */ ++ BUG_ON(bpf_adj_branches(prog_adj, off, insn_delta, false)); + + return prog_adj; + } +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -472,11 +472,18 @@ do_pass: + + #define BPF_EMIT_JMP \ + do { \ ++ const s32 off_min = S16_MIN, off_max = S16_MAX; \ ++ s32 off; \ ++ \ + if (target >= len || target < 0) \ + goto err; \ +- insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ ++ off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ + /* Adjust pc relative offset for 2nd or 3rd insn. */ \ +- insn->off -= insn - tmp_insns; \ ++ off -= insn - tmp_insns; \ ++ /* Reject anything not fitting into insn->off. */ \ ++ if (off < off_min || off > off_max) \ ++ goto err; \ ++ insn->off = off; \ + } while (0) + + case BPF_JMP | BPF_JA: diff --git a/queue-4.14/netfilter-nat-limit-port-clash-resolution-attempts.patch b/queue-4.14/netfilter-nat-limit-port-clash-resolution-attempts.patch new file mode 100644 index 00000000000..06ebe3b6f9c --- /dev/null +++ b/queue-4.14/netfilter-nat-limit-port-clash-resolution-attempts.patch @@ -0,0 +1,80 @@ +From foo@baz Thu Feb 3 06:43:12 PM CET 2022 +From: Florian Westphal +Date: Thu, 3 Feb 2022 13:41:55 +0100 +Subject: netfilter: nat: limit port clash resolution attempts +To: +Cc: , Florian Westphal , Pablo Neira Ayuso , Vimal Agrawal +Message-ID: <20220203124155.16693-3-fw@strlen.de> + +From: Florian Westphal + +commit a504b703bb1da526a01593da0e4be2af9d9f5fa8 upstream. + +In case almost or all available ports are taken, clash resolution can +take a very long time, resulting in soft lockup. + +This can happen when many to-be-natted hosts connect to same +destination:port (e.g. a proxy) and all connections pass the same SNAT. + +Pick a random offset in the acceptable range, then try ever smaller +number of adjacent port numbers, until either the limit is reached or a +useable port was found. This results in at most 248 attempts +(128 + 64 + 32 + 16 + 8, i.e. 4 restarts with new search offset) +instead of 64000+, + +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Vimal Agrawal +Signed-off-by: Greg Kroah-Hartman +--- + net/netfilter/nf_nat_proto_common.c | 29 +++++++++++++++++++++++------ + 1 file changed, 23 insertions(+), 6 deletions(-) + +--- a/net/netfilter/nf_nat_proto_common.c ++++ b/net/netfilter/nf_nat_proto_common.c +@@ -40,9 +40,10 @@ void nf_nat_l4proto_unique_tuple(const s + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) + { +- unsigned int range_size, min, max, i; ++ unsigned int range_size, min, max, i, attempts; + __be16 *portptr; +- u_int16_t off; ++ u16 off; ++ static const unsigned int max_attempts = 128; + + if (maniptype == NF_NAT_MANIP_SRC) + portptr = &tuple->src.u.all; +@@ -86,12 +87,28 @@ void nf_nat_l4proto_unique_tuple(const s + off = prandom_u32(); + } + +- for (i = 0; ; ++off) { ++ attempts = range_size; ++ if (attempts > max_attempts) ++ attempts = max_attempts; ++ ++ /* We are in softirq; doing a search of the entire range risks ++ * soft lockup when all tuples are already used. ++ * ++ * If we can't find any free port from first offset, pick a new ++ * one and try again, with ever smaller search window. ++ */ ++another_round: ++ for (i = 0; i < attempts; i++, off++) { + *portptr = htons(min + off % range_size); +- if (++i != range_size && nf_nat_used_tuple(tuple, ct)) +- continue; +- return; ++ if (!nf_nat_used_tuple(tuple, ct)) ++ return; + } ++ ++ if (attempts >= range_size || attempts < 16) ++ return; ++ attempts /= 2; ++ off = prandom_u32(); ++ goto another_round; + } + EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple); + diff --git a/queue-4.14/netfilter-nat-remove-l4-protocol-port-rovers.patch b/queue-4.14/netfilter-nat-remove-l4-protocol-port-rovers.patch new file mode 100644 index 00000000000..09df6bd6942 --- /dev/null +++ b/queue-4.14/netfilter-nat-remove-l4-protocol-port-rovers.patch @@ -0,0 +1,185 @@ +From foo@baz Thu Feb 3 06:43:12 PM CET 2022 +From: Florian Westphal +Date: Thu, 3 Feb 2022 13:41:54 +0100 +Subject: netfilter: nat: remove l4 protocol port rovers +To: +Cc: , Florian Westphal , Pablo Neira Ayuso +Message-ID: <20220203124155.16693-2-fw@strlen.de> + +From: Florian Westphal + +commit 6ed5943f8735e2b778d92ea4d9805c0a1d89bc2b upstream. + +This is a leftover from days where single-cpu systems were common: +Store last port used to resolve a clash to use it as a starting point when +the next conflict needs to be resolved. + +When we have parallel attempt to connect to same address:port pair, +its likely that both cores end up computing the same "available" port, +as both use same starting port, and newly used ports won't become +visible to other cores until the conntrack gets confirmed later. + +One of the cores then has to drop the packet at insertion time because +the chosen new tuple turns out to be in use after all. + +Lets simplify this: remove port rover and use a pseudo-random starting +point. + +Note that this doesn't make netfilter default to 'fully random' mode; +the 'rover' was only used if NAT could not reuse source port as-is. + +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman +--- + include/net/netfilter/nf_nat_l4proto.h | 2 +- + net/netfilter/nf_nat_proto_common.c | 7 ++----- + net/netfilter/nf_nat_proto_dccp.c | 5 +---- + net/netfilter/nf_nat_proto_sctp.c | 5 +---- + net/netfilter/nf_nat_proto_tcp.c | 5 +---- + net/netfilter/nf_nat_proto_udp.c | 10 ++-------- + 6 files changed, 8 insertions(+), 26 deletions(-) + +--- a/include/net/netfilter/nf_nat_l4proto.h ++++ b/include/net/netfilter/nf_nat_l4proto.h +@@ -74,7 +74,7 @@ void nf_nat_l4proto_unique_tuple(const s + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, +- const struct nf_conn *ct, u16 *rover); ++ const struct nf_conn *ct); + + int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range *range); +--- a/net/netfilter/nf_nat_proto_common.c ++++ b/net/netfilter/nf_nat_proto_common.c +@@ -38,8 +38,7 @@ void nf_nat_l4proto_unique_tuple(const s + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, +- const struct nf_conn *ct, +- u16 *rover) ++ const struct nf_conn *ct) + { + unsigned int range_size, min, max, i; + __be16 *portptr; +@@ -84,15 +83,13 @@ void nf_nat_l4proto_unique_tuple(const s + } else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) { + off = prandom_u32(); + } else { +- off = *rover; ++ off = prandom_u32(); + } + + for (i = 0; ; ++off) { + *portptr = htons(min + off % range_size); + if (++i != range_size && nf_nat_used_tuple(tuple, ct)) + continue; +- if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) +- *rover = off; + return; + } + } +--- a/net/netfilter/nf_nat_proto_dccp.c ++++ b/net/netfilter/nf_nat_proto_dccp.c +@@ -18,8 +18,6 @@ + #include + #include + +-static u_int16_t dccp_port_rover; +- + static void + dccp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, +@@ -27,8 +25,7 @@ dccp_unique_tuple(const struct nf_nat_l3 + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) + { +- nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, +- &dccp_port_rover); ++ nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct); + } + + static bool +--- a/net/netfilter/nf_nat_proto_sctp.c ++++ b/net/netfilter/nf_nat_proto_sctp.c +@@ -12,8 +12,6 @@ + + #include + +-static u_int16_t nf_sctp_port_rover; +- + static void + sctp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, +@@ -21,8 +19,7 @@ sctp_unique_tuple(const struct nf_nat_l3 + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) + { +- nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, +- &nf_sctp_port_rover); ++ nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct); + } + + static bool +--- a/net/netfilter/nf_nat_proto_tcp.c ++++ b/net/netfilter/nf_nat_proto_tcp.c +@@ -18,8 +18,6 @@ + #include + #include + +-static u16 tcp_port_rover; +- + static void + tcp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, +@@ -27,8 +25,7 @@ tcp_unique_tuple(const struct nf_nat_l3p + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) + { +- nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, +- &tcp_port_rover); ++ nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct); + } + + static bool +--- a/net/netfilter/nf_nat_proto_udp.c ++++ b/net/netfilter/nf_nat_proto_udp.c +@@ -17,8 +17,6 @@ + #include + #include + +-static u16 udp_port_rover; +- + static void + udp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, +@@ -26,8 +24,7 @@ udp_unique_tuple(const struct nf_nat_l3p + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) + { +- nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, +- &udp_port_rover); ++ nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct); + } + + static void +@@ -78,8 +75,6 @@ static bool udp_manip_pkt(struct sk_buff + } + + #ifdef CONFIG_NF_NAT_PROTO_UDPLITE +-static u16 udplite_port_rover; +- + static bool udplite_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, +@@ -103,8 +98,7 @@ udplite_unique_tuple(const struct nf_nat + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) + { +- nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct, +- &udplite_port_rover); ++ nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct); + } + + const struct nf_nat_l4proto nf_nat_l4proto_udplite = { diff --git a/queue-4.14/series b/queue-4.14/series index ba6d1dd14c2..6b30f03b540 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -35,3 +35,6 @@ ibmvnic-don-t-spin-in-tasklet.patch yam-fix-a-memory-leak-in-yam_siocdevprivate.patch ipv4-raw-lock-the-socket-in-raw_bind.patch ipv4-tcp-send-zero-ipid-in-synack-messages.patch +bpf-fix-truncated-jump-targets-on-heavy-expansions.patch +netfilter-nat-remove-l4-protocol-port-rovers.patch +netfilter-nat-limit-port-clash-resolution-attempts.patch