]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.14-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 3 Feb 2022 18:07:15 +0000 (19:07 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 3 Feb 2022 18:07:15 +0000 (19:07 +0100)
added patches:
bpf-fix-truncated-jump-targets-on-heavy-expansions.patch
netfilter-nat-limit-port-clash-resolution-attempts.patch
netfilter-nat-remove-l4-protocol-port-rovers.patch

queue-4.14/bpf-fix-truncated-jump-targets-on-heavy-expansions.patch [new file with mode: 0644]
queue-4.14/netfilter-nat-limit-port-clash-resolution-attempts.patch [new file with mode: 0644]
queue-4.14/netfilter-nat-remove-l4-protocol-port-rovers.patch [new file with mode: 0644]
queue-4.14/series

diff --git a/queue-4.14/bpf-fix-truncated-jump-targets-on-heavy-expansions.patch b/queue-4.14/bpf-fix-truncated-jump-targets-on-heavy-expansions.patch
new file mode 100644 (file)
index 0000000..e2a1a5b
--- /dev/null
@@ -0,0 +1,202 @@
+From 050fad7c4534c13c8eb1d9c2ba66012e014773cb Mon Sep 17 00:00:00 2001
+From: Daniel Borkmann <daniel@iogearbox.net>
+Date: Thu, 17 May 2018 01:44:11 +0200
+Subject: bpf: fix truncated jump targets on heavy expansions
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 050fad7c4534c13c8eb1d9c2ba66012e014773cb upstream.
+
+Recently during testing, I ran into the following panic:
+
+  [  207.892422] Internal error: Accessing user space memory outside uaccess.h routines: 96000004 [#1] SMP
+  [  207.901637] Modules linked in: binfmt_misc [...]
+  [  207.966530] CPU: 45 PID: 2256 Comm: test_verifier Tainted: G        W         4.17.0-rc3+ #7
+  [  207.974956] Hardware name: FOXCONN R2-1221R-A4/C2U4N_MB, BIOS G31FB18A 03/31/2017
+  [  207.982428] pstate: 60400005 (nZCv daif +PAN -UAO)
+  [  207.987214] pc : bpf_skb_load_helper_8_no_cache+0x34/0xc0
+  [  207.992603] lr : 0xffff000000bdb754
+  [  207.996080] sp : ffff000013703ca0
+  [  207.999384] x29: ffff000013703ca0 x28: 0000000000000001
+  [  208.004688] x27: 0000000000000001 x26: 0000000000000000
+  [  208.009992] x25: ffff000013703ce0 x24: ffff800fb4afcb00
+  [  208.015295] x23: ffff00007d2f5038 x22: ffff00007d2f5000
+  [  208.020599] x21: fffffffffeff2a6f x20: 000000000000000a
+  [  208.025903] x19: ffff000009578000 x18: 0000000000000a03
+  [  208.031206] x17: 0000000000000000 x16: 0000000000000000
+  [  208.036510] x15: 0000ffff9de83000 x14: 0000000000000000
+  [  208.041813] x13: 0000000000000000 x12: 0000000000000000
+  [  208.047116] x11: 0000000000000001 x10: ffff0000089e7f18
+  [  208.052419] x9 : fffffffffeff2a6f x8 : 0000000000000000
+  [  208.057723] x7 : 000000000000000a x6 : 00280c6160000000
+  [  208.063026] x5 : 0000000000000018 x4 : 0000000000007db6
+  [  208.068329] x3 : 000000000008647a x2 : 19868179b1484500
+  [  208.073632] x1 : 0000000000000000 x0 : ffff000009578c08
+  [  208.078938] Process test_verifier (pid: 2256, stack limit = 0x0000000049ca7974)
+  [  208.086235] Call trace:
+  [  208.088672]  bpf_skb_load_helper_8_no_cache+0x34/0xc0
+  [  208.093713]  0xffff000000bdb754
+  [  208.096845]  bpf_test_run+0x78/0xf8
+  [  208.100324]  bpf_prog_test_run_skb+0x148/0x230
+  [  208.104758]  sys_bpf+0x314/0x1198
+  [  208.108064]  el0_svc_naked+0x30/0x34
+  [  208.111632] Code: 91302260 f9400001 f9001fa1 d2800001 (29500680)
+  [  208.117717] ---[ end trace 263cb8a59b5bf29f ]---
+
+The program itself which caused this had a long jump over the whole
+instruction sequence where all of the inner instructions required
+heavy expansions into multiple BPF instructions. Additionally, I also
+had BPF hardening enabled which requires once more rewrites of all
+constant values in order to blind them. Each time we rewrite insns,
+bpf_adj_branches() would need to potentially adjust branch targets
+which cross the patchlet boundary to accommodate for the additional
+delta. Eventually that lead to the case where the target offset could
+not fit into insn->off's upper 0x7fff limit anymore where then offset
+wraps around becoming negative (in s16 universe), or vice versa
+depending on the jump direction.
+
+Therefore it becomes necessary to detect and reject any such occasions
+in a generic way for native eBPF and cBPF to eBPF migrations. For
+the latter we can simply check bounds in the bpf_convert_filter()'s
+BPF_EMIT_JMP helper macro and bail out once we surpass limits. The
+bpf_patch_insn_single() for native eBPF (and cBPF to eBPF in case
+of subsequent hardening) is a bit more complex in that we need to
+detect such truncations before hitting the bpf_prog_realloc(). Thus
+the latter is split into an extra pass to probe problematic offsets
+on the original program in order to fail early. With that in place
+and carefully tested I no longer hit the panic and the rewrites are
+rejected properly. The above example panic I've seen on bpf-next,
+though the issue itself is generic in that a guard against this issue
+in bpf seems more appropriate in this case.
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Martin KaFai Lau <kafai@fb.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+[ab: Dropped BPF_PSEUDO_CALL hardening, introoduced in 4.16]
+Signed-off-by: Alessio Balsini <balsini@android.com>
+Acked-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/core.c |   59 ++++++++++++++++++++++++++++++++++++++++++++++--------
+ net/core/filter.c |   11 ++++++++--
+ 2 files changed, 60 insertions(+), 10 deletions(-)
+
+--- a/kernel/bpf/core.c
++++ b/kernel/bpf/core.c
+@@ -228,27 +228,57 @@ static bool bpf_is_jmp_and_has_target(co
+              BPF_OP(insn->code) != BPF_EXIT;
+ }
+-static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta)
++static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta,
++                              u32 curr, const bool probe_pass)
+ {
++      const s32 off_min = S16_MIN, off_max = S16_MAX;
++      s32 off = insn->off;
++
++      if (curr < pos && curr + off + 1 > pos)
++              off += delta;
++      else if (curr > pos + delta && curr + off + 1 <= pos + delta)
++              off -= delta;
++      if (off < off_min || off > off_max)
++              return -ERANGE;
++      if (!probe_pass)
++              insn->off = off;
++      return 0;
++}
++
++static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta,
++                          const bool probe_pass)
++{
++      u32 i, insn_cnt = prog->len + (probe_pass ? delta : 0);
+       struct bpf_insn *insn = prog->insnsi;
+-      u32 i, insn_cnt = prog->len;
++      int ret = 0;
+       for (i = 0; i < insn_cnt; i++, insn++) {
++              /* In the probing pass we still operate on the original,
++               * unpatched image in order to check overflows before we
++               * do any other adjustments. Therefore skip the patchlet.
++               */
++              if (probe_pass && i == pos) {
++                      i += delta + 1;
++                      insn++;
++              }
++
+               if (!bpf_is_jmp_and_has_target(insn))
+                       continue;
+-              /* Adjust offset of jmps if we cross boundaries. */
+-              if (i < pos && i + insn->off + 1 > pos)
+-                      insn->off += delta;
+-              else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
+-                      insn->off -= delta;
++              /* Adjust offset of jmps if we cross patch boundaries. */
++              ret = bpf_adj_delta_to_off(insn, pos, delta, i, probe_pass);
++              if (ret)
++                      break;
+       }
++
++      return ret;
+ }
+ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
+                                      const struct bpf_insn *patch, u32 len)
+ {
+       u32 insn_adj_cnt, insn_rest, insn_delta = len - 1;
++      const u32 cnt_max = S16_MAX;
+       struct bpf_prog *prog_adj;
+       /* Since our patchlet doesn't expand the image, we're done. */
+@@ -259,6 +289,15 @@ struct bpf_prog *bpf_patch_insn_single(s
+       insn_adj_cnt = prog->len + insn_delta;
++      /* Reject anything that would potentially let the insn->off
++       * target overflow when we have excessive program expansions.
++       * We need to probe here before we do any reallocation where
++       * we afterwards may not fail anymore.
++       */
++      if (insn_adj_cnt > cnt_max &&
++          bpf_adj_branches(prog, off, insn_delta, true))
++              return NULL;
++
+       /* Several new instructions need to be inserted. Make room
+        * for them. Likely, there's no need for a new allocation as
+        * last page could have large enough tailroom.
+@@ -284,7 +323,11 @@ struct bpf_prog *bpf_patch_insn_single(s
+               sizeof(*patch) * insn_rest);
+       memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len);
+-      bpf_adj_branches(prog_adj, off, insn_delta);
++      /* We are guaranteed to not fail at this point, otherwise
++       * the ship has sailed to reverse to the original state. An
++       * overflow cannot happen at this point.
++       */
++      BUG_ON(bpf_adj_branches(prog_adj, off, insn_delta, false));
+       return prog_adj;
+ }
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -472,11 +472,18 @@ do_pass:
+ #define BPF_EMIT_JMP                                                  \
+       do {                                                            \
++              const s32 off_min = S16_MIN, off_max = S16_MAX;         \
++              s32 off;                                                \
++                                                                      \
+               if (target >= len || target < 0)                        \
+                       goto err;                                       \
+-              insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0;   \
++              off = addrs ? addrs[target] - addrs[i] - 1 : 0;         \
+               /* Adjust pc relative offset for 2nd or 3rd insn. */    \
+-              insn->off -= insn - tmp_insns;                          \
++              off -= insn - tmp_insns;                                \
++              /* Reject anything not fitting into insn->off. */       \
++              if (off < off_min || off > off_max)                     \
++                      goto err;                                       \
++              insn->off = off;                                        \
+       } while (0)
+               case BPF_JMP | BPF_JA:
diff --git a/queue-4.14/netfilter-nat-limit-port-clash-resolution-attempts.patch b/queue-4.14/netfilter-nat-limit-port-clash-resolution-attempts.patch
new file mode 100644 (file)
index 0000000..06ebe3b
--- /dev/null
@@ -0,0 +1,80 @@
+From foo@baz Thu Feb  3 06:43:12 PM CET 2022
+From: Florian Westphal <fw@strlen.de>
+Date: Thu,  3 Feb 2022 13:41:55 +0100
+Subject: netfilter: nat: limit port clash resolution attempts
+To: <stable@vger.kernel.org>
+Cc: <netfilter-devel@vger.kernel.org>, Florian Westphal <fw@strlen.de>, Pablo Neira Ayuso <pablo@netfilter.org>, Vimal Agrawal <vimal.agrawal@sophos.com>
+Message-ID: <20220203124155.16693-3-fw@strlen.de>
+
+From: Florian Westphal <fw@strlen.de>
+
+commit a504b703bb1da526a01593da0e4be2af9d9f5fa8 upstream.
+
+In case almost or all available ports are taken, clash resolution can
+take a very long time, resulting in soft lockup.
+
+This can happen when many to-be-natted hosts connect to same
+destination:port (e.g. a proxy) and all connections pass the same SNAT.
+
+Pick a random offset in the acceptable range, then try ever smaller
+number of adjacent port numbers, until either the limit is reached or a
+useable port was found.  This results in at most 248 attempts
+(128 + 64 + 32 + 16 + 8, i.e. 4 restarts with new search offset)
+instead of 64000+,
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Vimal Agrawal <vimal.agrawal@sophos.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netfilter/nf_nat_proto_common.c |   29 +++++++++++++++++++++++------
+ 1 file changed, 23 insertions(+), 6 deletions(-)
+
+--- a/net/netfilter/nf_nat_proto_common.c
++++ b/net/netfilter/nf_nat_proto_common.c
+@@ -40,9 +40,10 @@ void nf_nat_l4proto_unique_tuple(const s
+                                enum nf_nat_manip_type maniptype,
+                                const struct nf_conn *ct)
+ {
+-      unsigned int range_size, min, max, i;
++      unsigned int range_size, min, max, i, attempts;
+       __be16 *portptr;
+-      u_int16_t off;
++      u16 off;
++      static const unsigned int max_attempts = 128;
+       if (maniptype == NF_NAT_MANIP_SRC)
+               portptr = &tuple->src.u.all;
+@@ -86,12 +87,28 @@ void nf_nat_l4proto_unique_tuple(const s
+               off = prandom_u32();
+       }
+-      for (i = 0; ; ++off) {
++      attempts = range_size;
++      if (attempts > max_attempts)
++              attempts = max_attempts;
++
++      /* We are in softirq; doing a search of the entire range risks
++       * soft lockup when all tuples are already used.
++       *
++       * If we can't find any free port from first offset, pick a new
++       * one and try again, with ever smaller search window.
++       */
++another_round:
++      for (i = 0; i < attempts; i++, off++) {
+               *portptr = htons(min + off % range_size);
+-              if (++i != range_size && nf_nat_used_tuple(tuple, ct))
+-                      continue;
+-              return;
++              if (!nf_nat_used_tuple(tuple, ct))
++                      return;
+       }
++
++      if (attempts >= range_size || attempts < 16)
++              return;
++      attempts /= 2;
++      off = prandom_u32();
++      goto another_round;
+ }
+ EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
diff --git a/queue-4.14/netfilter-nat-remove-l4-protocol-port-rovers.patch b/queue-4.14/netfilter-nat-remove-l4-protocol-port-rovers.patch
new file mode 100644 (file)
index 0000000..09df6bd
--- /dev/null
@@ -0,0 +1,185 @@
+From foo@baz Thu Feb  3 06:43:12 PM CET 2022
+From: Florian Westphal <fw@strlen.de>
+Date: Thu,  3 Feb 2022 13:41:54 +0100
+Subject: netfilter: nat: remove l4 protocol port rovers
+To: <stable@vger.kernel.org>
+Cc: <netfilter-devel@vger.kernel.org>, Florian Westphal <fw@strlen.de>, Pablo Neira Ayuso <pablo@netfilter.org>
+Message-ID: <20220203124155.16693-2-fw@strlen.de>
+
+From: Florian Westphal <fw@strlen.de>
+
+commit 6ed5943f8735e2b778d92ea4d9805c0a1d89bc2b upstream.
+
+This is a leftover from days where single-cpu systems were common:
+Store last port used to resolve a clash to use it as a starting point when
+the next conflict needs to be resolved.
+
+When we have parallel attempt to connect to same address:port pair,
+its likely that both cores end up computing the same "available" port,
+as both use same starting port, and newly used ports won't become
+visible to other cores until the conntrack gets confirmed later.
+
+One of the cores then has to drop the packet at insertion time because
+the chosen new tuple turns out to be in use after all.
+
+Lets simplify this: remove port rover and use a pseudo-random starting
+point.
+
+Note that this doesn't make netfilter default to 'fully random' mode;
+the 'rover' was only used if NAT could not reuse source port as-is.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/netfilter/nf_nat_l4proto.h |    2 +-
+ net/netfilter/nf_nat_proto_common.c    |    7 ++-----
+ net/netfilter/nf_nat_proto_dccp.c      |    5 +----
+ net/netfilter/nf_nat_proto_sctp.c      |    5 +----
+ net/netfilter/nf_nat_proto_tcp.c       |    5 +----
+ net/netfilter/nf_nat_proto_udp.c       |   10 ++--------
+ 6 files changed, 8 insertions(+), 26 deletions(-)
+
+--- a/include/net/netfilter/nf_nat_l4proto.h
++++ b/include/net/netfilter/nf_nat_l4proto.h
+@@ -74,7 +74,7 @@ void nf_nat_l4proto_unique_tuple(const s
+                                struct nf_conntrack_tuple *tuple,
+                                const struct nf_nat_range *range,
+                                enum nf_nat_manip_type maniptype,
+-                               const struct nf_conn *ct, u16 *rover);
++                               const struct nf_conn *ct);
+ int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
+                                  struct nf_nat_range *range);
+--- a/net/netfilter/nf_nat_proto_common.c
++++ b/net/netfilter/nf_nat_proto_common.c
+@@ -38,8 +38,7 @@ void nf_nat_l4proto_unique_tuple(const s
+                                struct nf_conntrack_tuple *tuple,
+                                const struct nf_nat_range *range,
+                                enum nf_nat_manip_type maniptype,
+-                               const struct nf_conn *ct,
+-                               u16 *rover)
++                               const struct nf_conn *ct)
+ {
+       unsigned int range_size, min, max, i;
+       __be16 *portptr;
+@@ -84,15 +83,13 @@ void nf_nat_l4proto_unique_tuple(const s
+       } else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) {
+               off = prandom_u32();
+       } else {
+-              off = *rover;
++              off = prandom_u32();
+       }
+       for (i = 0; ; ++off) {
+               *portptr = htons(min + off % range_size);
+               if (++i != range_size && nf_nat_used_tuple(tuple, ct))
+                       continue;
+-              if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL))
+-                      *rover = off;
+               return;
+       }
+ }
+--- a/net/netfilter/nf_nat_proto_dccp.c
++++ b/net/netfilter/nf_nat_proto_dccp.c
+@@ -18,8 +18,6 @@
+ #include <net/netfilter/nf_nat_l3proto.h>
+ #include <net/netfilter/nf_nat_l4proto.h>
+-static u_int16_t dccp_port_rover;
+-
+ static void
+ dccp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+                 struct nf_conntrack_tuple *tuple,
+@@ -27,8 +25,7 @@ dccp_unique_tuple(const struct nf_nat_l3
+                 enum nf_nat_manip_type maniptype,
+                 const struct nf_conn *ct)
+ {
+-      nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
+-                                  &dccp_port_rover);
++      nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct);
+ }
+ static bool
+--- a/net/netfilter/nf_nat_proto_sctp.c
++++ b/net/netfilter/nf_nat_proto_sctp.c
+@@ -12,8 +12,6 @@
+ #include <net/netfilter/nf_nat_l4proto.h>
+-static u_int16_t nf_sctp_port_rover;
+-
+ static void
+ sctp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+                 struct nf_conntrack_tuple *tuple,
+@@ -21,8 +19,7 @@ sctp_unique_tuple(const struct nf_nat_l3
+                 enum nf_nat_manip_type maniptype,
+                 const struct nf_conn *ct)
+ {
+-      nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
+-                                  &nf_sctp_port_rover);
++      nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct);
+ }
+ static bool
+--- a/net/netfilter/nf_nat_proto_tcp.c
++++ b/net/netfilter/nf_nat_proto_tcp.c
+@@ -18,8 +18,6 @@
+ #include <net/netfilter/nf_nat_l4proto.h>
+ #include <net/netfilter/nf_nat_core.h>
+-static u16 tcp_port_rover;
+-
+ static void
+ tcp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+                struct nf_conntrack_tuple *tuple,
+@@ -27,8 +25,7 @@ tcp_unique_tuple(const struct nf_nat_l3p
+                enum nf_nat_manip_type maniptype,
+                const struct nf_conn *ct)
+ {
+-      nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
+-                                  &tcp_port_rover);
++      nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct);
+ }
+ static bool
+--- a/net/netfilter/nf_nat_proto_udp.c
++++ b/net/netfilter/nf_nat_proto_udp.c
+@@ -17,8 +17,6 @@
+ #include <net/netfilter/nf_nat_l3proto.h>
+ #include <net/netfilter/nf_nat_l4proto.h>
+-static u16 udp_port_rover;
+-
+ static void
+ udp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+                struct nf_conntrack_tuple *tuple,
+@@ -26,8 +24,7 @@ udp_unique_tuple(const struct nf_nat_l3p
+                enum nf_nat_manip_type maniptype,
+                const struct nf_conn *ct)
+ {
+-      nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
+-                                  &udp_port_rover);
++      nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct);
+ }
+ static void
+@@ -78,8 +75,6 @@ static bool udp_manip_pkt(struct sk_buff
+ }
+ #ifdef CONFIG_NF_NAT_PROTO_UDPLITE
+-static u16 udplite_port_rover;
+-
+ static bool udplite_manip_pkt(struct sk_buff *skb,
+                             const struct nf_nat_l3proto *l3proto,
+                             unsigned int iphdroff, unsigned int hdroff,
+@@ -103,8 +98,7 @@ udplite_unique_tuple(const struct nf_nat
+                    enum nf_nat_manip_type maniptype,
+                    const struct nf_conn *ct)
+ {
+-      nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
+-                                  &udplite_port_rover);
++      nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct);
+ }
+ const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
index ba6d1dd14c209e05f9c75b5825e8415e677ad28f..6b30f03b540384bdeef6a0fb48803a3af5c58d67 100644 (file)
@@ -35,3 +35,6 @@ ibmvnic-don-t-spin-in-tasklet.patch
 yam-fix-a-memory-leak-in-yam_siocdevprivate.patch
 ipv4-raw-lock-the-socket-in-raw_bind.patch
 ipv4-tcp-send-zero-ipid-in-synack-messages.patch
+bpf-fix-truncated-jump-targets-on-heavy-expansions.patch
+netfilter-nat-remove-l4-protocol-port-rovers.patch
+netfilter-nat-limit-port-clash-resolution-attempts.patch