]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 20 Jun 2022 11:56:51 +0000 (13:56 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 20 Jun 2022 11:56:51 +0000 (13:56 +0200)
added patches:
arm64-kprobes-use-brk-instead-of-single-step-when-executing-instructions-out-of-line.patch
net-openvswitch-fix-leak-of-nested-actions.patch
net-openvswitch-fix-misuse-of-the-cached-connection-on-tuple-changes.patch
net-sched-act_police-more-accurate-mtu-policing.patch

queue-5.4/arm64-kprobes-use-brk-instead-of-single-step-when-executing-instructions-out-of-line.patch [new file with mode: 0644]
queue-5.4/net-openvswitch-fix-leak-of-nested-actions.patch [new file with mode: 0644]
queue-5.4/net-openvswitch-fix-misuse-of-the-cached-connection-on-tuple-changes.patch [new file with mode: 0644]
queue-5.4/net-sched-act_police-more-accurate-mtu-policing.patch [new file with mode: 0644]
queue-5.4/series

diff --git a/queue-5.4/arm64-kprobes-use-brk-instead-of-single-step-when-executing-instructions-out-of-line.patch b/queue-5.4/arm64-kprobes-use-brk-instead-of-single-step-when-executing-instructions-out-of-line.patch
new file mode 100644 (file)
index 0000000..1c2e80c
--- /dev/null
@@ -0,0 +1,235 @@
+From 7ee31a3aa8f490c6507bc4294df6b70bed1c593e Mon Sep 17 00:00:00 2001
+From: Jean-Philippe Brucker <jean-philippe@linaro.org>
+Date: Tue, 3 Nov 2020 14:49:01 +0100
+Subject: arm64: kprobes: Use BRK instead of single-step when executing instructions out-of-line
+
+From: Jean-Philippe Brucker <jean-philippe@linaro.org>
+
+commit 7ee31a3aa8f490c6507bc4294df6b70bed1c593e upstream.
+
+Commit 36dadef23fcc ("kprobes: Init kprobes in early_initcall") enabled
+using kprobes from early_initcall. Unfortunately at this point the
+hardware debug infrastructure is not operational. The OS lock may still
+be locked, and the hardware watchpoints may have unknown values when
+kprobe enables debug monitors to single-step instructions.
+
+Rather than using hardware single-step, append a BRK instruction after
+the instruction to be executed out-of-line.
+
+Fixes: 36dadef23fcc ("kprobes: Init kprobes in early_initcall")
+Suggested-by: Will Deacon <will@kernel.org>
+Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
+Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
+Link: https://lore.kernel.org/r/20201103134900.337243-1-jean-philippe@linaro.org
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Mark-PK Tsai <mark-pk.tsai@mediatek.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/include/asm/brk-imm.h        |    2 
+ arch/arm64/include/asm/debug-monitors.h |    1 
+ arch/arm64/include/asm/kprobes.h        |    2 
+ arch/arm64/kernel/probes/kprobes.c      |   69 ++++++++++----------------------
+ 4 files changed, 27 insertions(+), 47 deletions(-)
+
+--- a/arch/arm64/include/asm/brk-imm.h
++++ b/arch/arm64/include/asm/brk-imm.h
+@@ -10,6 +10,7 @@
+  * #imm16 values used for BRK instruction generation
+  * 0x004: for installing kprobes
+  * 0x005: for installing uprobes
++ * 0x006: for kprobe software single-step
+  * Allowed values for kgdb are 0x400 - 0x7ff
+  * 0x100: for triggering a fault on purpose (reserved)
+  * 0x400: for dynamic BRK instruction
+@@ -19,6 +20,7 @@
+  */
+ #define KPROBES_BRK_IMM                       0x004
+ #define UPROBES_BRK_IMM                       0x005
++#define KPROBES_BRK_SS_IMM            0x006
+ #define FAULT_BRK_IMM                 0x100
+ #define KGDB_DYN_DBG_BRK_IMM          0x400
+ #define KGDB_COMPILED_DBG_BRK_IMM     0x401
+--- a/arch/arm64/include/asm/debug-monitors.h
++++ b/arch/arm64/include/asm/debug-monitors.h
+@@ -53,6 +53,7 @@
+ /* kprobes BRK opcodes with ESR encoding  */
+ #define BRK64_OPCODE_KPROBES  (AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5))
++#define BRK64_OPCODE_KPROBES_SS       (AARCH64_BREAK_MON | (KPROBES_BRK_SS_IMM << 5))
+ /* uprobes BRK opcodes with ESR encoding  */
+ #define BRK64_OPCODE_UPROBES  (AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5))
+--- a/arch/arm64/include/asm/kprobes.h
++++ b/arch/arm64/include/asm/kprobes.h
+@@ -16,7 +16,7 @@
+ #include <linux/percpu.h>
+ #define __ARCH_WANT_KPROBES_INSN_SLOT
+-#define MAX_INSN_SIZE                 1
++#define MAX_INSN_SIZE                 2
+ #define flush_insn_slot(p)            do { } while (0)
+ #define kretprobe_blacklist_size      0
+--- a/arch/arm64/kernel/probes/kprobes.c
++++ b/arch/arm64/kernel/probes/kprobes.c
+@@ -36,25 +36,16 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kpr
+ static void __kprobes
+ post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
+-static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
+-{
+-      void *addrs[1];
+-      u32 insns[1];
+-
+-      addrs[0] = addr;
+-      insns[0] = opcode;
+-
+-      return aarch64_insn_patch_text(addrs, insns, 1);
+-}
+-
+ static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
+ {
++      kprobe_opcode_t *addr = p->ainsn.api.insn;
++      void *addrs[] = {addr, addr + 1};
++      u32 insns[] = {p->opcode, BRK64_OPCODE_KPROBES_SS};
++
+       /* prepare insn slot */
+-      patch_text(p->ainsn.api.insn, p->opcode);
++      aarch64_insn_patch_text(addrs, insns, 2);
+-      flush_icache_range((uintptr_t) (p->ainsn.api.insn),
+-                         (uintptr_t) (p->ainsn.api.insn) +
+-                         MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
++      flush_icache_range((uintptr_t)addr, (uintptr_t)(addr + MAX_INSN_SIZE));
+       /*
+        * Needs restoring of return address after stepping xol.
+@@ -134,13 +125,18 @@ void *alloc_insn_page(void)
+ /* arm kprobe: install breakpoint in text */
+ void __kprobes arch_arm_kprobe(struct kprobe *p)
+ {
+-      patch_text(p->addr, BRK64_OPCODE_KPROBES);
++      void *addr = p->addr;
++      u32 insn = BRK64_OPCODE_KPROBES;
++
++      aarch64_insn_patch_text(&addr, &insn, 1);
+ }
+ /* disarm kprobe: remove breakpoint from text */
+ void __kprobes arch_disarm_kprobe(struct kprobe *p)
+ {
+-      patch_text(p->addr, p->opcode);
++      void *addr = p->addr;
++
++      aarch64_insn_patch_text(&addr, &p->opcode, 1);
+ }
+ void __kprobes arch_remove_kprobe(struct kprobe *p)
+@@ -169,20 +165,15 @@ static void __kprobes set_current_kprobe
+ }
+ /*
+- * Interrupts need to be disabled before single-step mode is set, and not
+- * reenabled until after single-step mode ends.
+- * Without disabling interrupt on local CPU, there is a chance of
+- * interrupt occurrence in the period of exception return and  start of
+- * out-of-line single-step, that result in wrongly single stepping
+- * into the interrupt handler.
++ * Mask all of DAIF while executing the instruction out-of-line, to keep things
++ * simple and avoid nesting exceptions. Interrupts do have to be disabled since
++ * the kprobe state is per-CPU and doesn't get migrated.
+  */
+ static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,
+                                               struct pt_regs *regs)
+ {
+       kcb->saved_irqflag = regs->pstate & DAIF_MASK;
+-      regs->pstate |= PSR_I_BIT;
+-      /* Unmask PSTATE.D for enabling software step exceptions. */
+-      regs->pstate &= ~PSR_D_BIT;
++      regs->pstate |= DAIF_MASK;
+ }
+ static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb,
+@@ -225,10 +216,7 @@ static void __kprobes setup_singlestep(s
+               slot = (unsigned long)p->ainsn.api.insn;
+               set_ss_context(kcb, slot);      /* mark pending ss */
+-
+-              /* IRQs and single stepping do not mix well. */
+               kprobes_save_local_irqflag(kcb, regs);
+-              kernel_enable_single_step(regs);
+               instruction_pointer_set(regs, slot);
+       } else {
+               /* insn simulation */
+@@ -279,12 +267,8 @@ post_kprobe_handler(struct kprobe_ctlblk
+       }
+       /* call post handler */
+       kcb->kprobe_status = KPROBE_HIT_SSDONE;
+-      if (cur->post_handler)  {
+-              /* post_handler can hit breakpoint and single step
+-               * again, so we enable D-flag for recursive exception.
+-               */
++      if (cur->post_handler)
+               cur->post_handler(cur, regs, 0);
+-      }
+       reset_current_kprobe();
+ }
+@@ -308,8 +292,6 @@ int __kprobes kprobe_fault_handler(struc
+               if (!instruction_pointer(regs))
+                       BUG();
+-              kernel_disable_single_step();
+-
+               if (kcb->kprobe_status == KPROBE_REENTER)
+                       restore_previous_kprobe(kcb);
+               else
+@@ -371,10 +353,6 @@ static void __kprobes kprobe_handler(str
+                        * pre-handler and it returned non-zero, it will
+                        * modify the execution path and no need to single
+                        * stepping. Let's just reset current kprobe and exit.
+-                       *
+-                       * pre_handler can hit a breakpoint and can step thru
+-                       * before return, keep PSTATE D-flag enabled until
+-                       * pre_handler return back.
+                        */
+                       if (!p->pre_handler || !p->pre_handler(p, regs)) {
+                               setup_singlestep(p, regs, kcb, 0);
+@@ -405,7 +383,7 @@ kprobe_ss_hit(struct kprobe_ctlblk *kcb,
+ }
+ static int __kprobes
+-kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
++kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr)
+ {
+       struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+       int retval;
+@@ -415,16 +393,15 @@ kprobe_single_step_handler(struct pt_reg
+       if (retval == DBG_HOOK_HANDLED) {
+               kprobes_restore_local_irqflag(kcb, regs);
+-              kernel_disable_single_step();
+-
+               post_kprobe_handler(kcb, regs);
+       }
+       return retval;
+ }
+-static struct step_hook kprobes_step_hook = {
+-      .fn = kprobe_single_step_handler,
++static struct break_hook kprobes_break_ss_hook = {
++      .imm = KPROBES_BRK_SS_IMM,
++      .fn = kprobe_breakpoint_ss_handler,
+ };
+ static int __kprobes
+@@ -568,7 +545,7 @@ int __kprobes arch_trampoline_kprobe(str
+ int __init arch_init_kprobes(void)
+ {
+       register_kernel_break_hook(&kprobes_break_hook);
+-      register_kernel_step_hook(&kprobes_step_hook);
++      register_kernel_break_hook(&kprobes_break_ss_hook);
+       return 0;
+ }
diff --git a/queue-5.4/net-openvswitch-fix-leak-of-nested-actions.patch b/queue-5.4/net-openvswitch-fix-leak-of-nested-actions.patch
new file mode 100644 (file)
index 0000000..2741e02
--- /dev/null
@@ -0,0 +1,168 @@
+From 1f30fb9166d4f15a1aa19449b9da871fe0ed4796 Mon Sep 17 00:00:00 2001
+From: Ilya Maximets <i.maximets@ovn.org>
+Date: Mon, 4 Apr 2022 17:43:45 +0200
+Subject: net: openvswitch: fix leak of nested actions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ilya Maximets <i.maximets@ovn.org>
+
+commit 1f30fb9166d4f15a1aa19449b9da871fe0ed4796 upstream.
+
+While parsing user-provided actions, openvswitch module may dynamically
+allocate memory and store pointers in the internal copy of the actions.
+So this memory has to be freed while destroying the actions.
+
+Currently there are only two such actions: ct() and set().  However,
+there are many actions that can hold nested lists of actions and
+ovs_nla_free_flow_actions() just jumps over them leaking the memory.
+
+For example, removal of the flow with the following actions will lead
+to a leak of the memory allocated by nf_ct_tmpl_alloc():
+
+  actions:clone(ct(commit),0)
+
+Non-freed set() action may also leak the 'dst' structure for the
+tunnel info including device references.
+
+Under certain conditions with a high rate of flow rotation that may
+cause significant memory leak problem (2MB per second in reporter's
+case).  The problem is also hard to mitigate, because the user doesn't
+have direct control over the datapath flows generated by OVS.
+
+Fix that by iterating over all the nested actions and freeing
+everything that needs to be freed recursively.
+
+New build time assertion should protect us from this problem if new
+actions will be added in the future.
+
+Unfortunately, openvswitch module doesn't use NLA_F_NESTED, so all
+attributes has to be explicitly checked.  sample() and clone() actions
+are mixing extra attributes into the user-provided action list.  That
+prevents some code generalization too.
+
+Fixes: 34ae932a4036 ("openvswitch: Make tunnel set action attach a metadata dst")
+Link: https://mail.openvswitch.org/pipermail/ovs-dev/2022-March/392922.html
+Reported-by: Stéphane Graber <stgraber@ubuntu.com>
+Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
+Acked-by: Aaron Conole <aconole@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[Backport for 5.4: Removed handling of OVS_ACTION_ATTR_DEC_TTL as it
+ doesn't exist in this version.  BUILD_BUG_ON condition adjusted
+ accordingly.]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/flow_netlink.c |   80 ++++++++++++++++++++++++++++++++++++++---
+ 1 file changed, 75 insertions(+), 5 deletions(-)
+
+--- a/net/openvswitch/flow_netlink.c
++++ b/net/openvswitch/flow_netlink.c
+@@ -2266,6 +2266,51 @@ static struct sw_flow_actions *nla_alloc
+       return sfa;
+ }
++static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len);
++
++static void ovs_nla_free_check_pkt_len_action(const struct nlattr *action)
++{
++      const struct nlattr *a;
++      int rem;
++
++      nla_for_each_nested(a, action, rem) {
++              switch (nla_type(a)) {
++              case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL:
++              case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER:
++                      ovs_nla_free_nested_actions(nla_data(a), nla_len(a));
++                      break;
++              }
++      }
++}
++
++static void ovs_nla_free_clone_action(const struct nlattr *action)
++{
++      const struct nlattr *a = nla_data(action);
++      int rem = nla_len(action);
++
++      switch (nla_type(a)) {
++      case OVS_CLONE_ATTR_EXEC:
++              /* The real list of actions follows this attribute. */
++              a = nla_next(a, &rem);
++              ovs_nla_free_nested_actions(a, rem);
++              break;
++      }
++}
++
++static void ovs_nla_free_sample_action(const struct nlattr *action)
++{
++      const struct nlattr *a = nla_data(action);
++      int rem = nla_len(action);
++
++      switch (nla_type(a)) {
++      case OVS_SAMPLE_ATTR_ARG:
++              /* The real list of actions follows this attribute. */
++              a = nla_next(a, &rem);
++              ovs_nla_free_nested_actions(a, rem);
++              break;
++      }
++}
++
+ static void ovs_nla_free_set_action(const struct nlattr *a)
+ {
+       const struct nlattr *ovs_key = nla_data(a);
+@@ -2279,25 +2324,50 @@ static void ovs_nla_free_set_action(cons
+       }
+ }
+-void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
++static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
+ {
+       const struct nlattr *a;
+       int rem;
+-      if (!sf_acts)
++      /* Whenever new actions are added, the need to update this
++       * function should be considered.
++       */
++      BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 21);
++
++      if (!actions)
+               return;
+-      nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
++      nla_for_each_attr(a, actions, len, rem) {
+               switch (nla_type(a)) {
+-              case OVS_ACTION_ATTR_SET:
+-                      ovs_nla_free_set_action(a);
++              case OVS_ACTION_ATTR_CHECK_PKT_LEN:
++                      ovs_nla_free_check_pkt_len_action(a);
+                       break;
++
++              case OVS_ACTION_ATTR_CLONE:
++                      ovs_nla_free_clone_action(a);
++                      break;
++
+               case OVS_ACTION_ATTR_CT:
+                       ovs_ct_free_action(a);
+                       break;
++
++              case OVS_ACTION_ATTR_SAMPLE:
++                      ovs_nla_free_sample_action(a);
++                      break;
++
++              case OVS_ACTION_ATTR_SET:
++                      ovs_nla_free_set_action(a);
++                      break;
+               }
+       }
++}
++
++void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
++{
++      if (!sf_acts)
++              return;
++      ovs_nla_free_nested_actions(sf_acts->actions, sf_acts->actions_len);
+       kfree(sf_acts);
+ }
diff --git a/queue-5.4/net-openvswitch-fix-misuse-of-the-cached-connection-on-tuple-changes.patch b/queue-5.4/net-openvswitch-fix-misuse-of-the-cached-connection-on-tuple-changes.patch
new file mode 100644 (file)
index 0000000..20e4eb9
--- /dev/null
@@ -0,0 +1,107 @@
+From 2061ecfdf2350994e5b61c43e50e98a7a70e95ee Mon Sep 17 00:00:00 2001
+From: Ilya Maximets <i.maximets@ovn.org>
+Date: Tue, 7 Jun 2022 00:11:40 +0200
+Subject: net: openvswitch: fix misuse of the cached connection on tuple changes
+
+From: Ilya Maximets <i.maximets@ovn.org>
+
+commit 2061ecfdf2350994e5b61c43e50e98a7a70e95ee upstream.
+
+If packet headers changed, the cached nfct is no longer relevant
+for the packet and attempt to re-use it leads to the incorrect packet
+classification.
+
+This issue is causing broken connectivity in OpenStack deployments
+with OVS/OVN due to hairpin traffic being unexpectedly dropped.
+
+The setup has datapath flows with several conntrack actions and tuple
+changes between them:
+
+  actions:ct(commit,zone=8,mark=0/0x1,nat(src)),
+          set(eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:06)),
+          set(ipv4(src=172.18.2.10,dst=192.168.100.6,ttl=62)),
+          ct(zone=8),recirc(0x4)
+
+After the first ct() action the packet headers are almost fully
+re-written.  The next ct() tries to re-use the existing nfct entry
+and marks the packet as invalid, so it gets dropped later in the
+pipeline.
+
+Clearing the cached conntrack entry whenever packet tuple is changed
+to avoid the issue.
+
+The flow key should not be cleared though, because we should still
+be able to match on the ct_state if the recirculation happens after
+the tuple change but before the next ct() action.
+
+Cc: stable@vger.kernel.org
+Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
+Reported-by: Frode Nordahl <frode.nordahl@canonical.com>
+Link: https://mail.openvswitch.org/pipermail/ovs-discuss/2022-May/051829.html
+Link: https://bugs.launchpad.net/ubuntu/+source/ovn/+bug/1967856
+Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
+Link: https://lore.kernel.org/r/20220606221140.488984-1-i.maximets@ovn.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[Backport to 5.10: minor rebase in ovs_ct_clear function.
+ This version also applicable to and tested on 5.4 and 4.19.]
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/actions.c   |    6 ++++++
+ net/openvswitch/conntrack.c |    3 ++-
+ 2 files changed, 8 insertions(+), 1 deletion(-)
+
+--- a/net/openvswitch/actions.c
++++ b/net/openvswitch/actions.c
+@@ -377,6 +377,7 @@ static void set_ip_addr(struct sk_buff *
+       update_ip_l4_checksum(skb, nh, *addr, new_addr);
+       csum_replace4(&nh->check, *addr, new_addr);
+       skb_clear_hash(skb);
++      ovs_ct_clear(skb, NULL);
+       *addr = new_addr;
+ }
+@@ -424,6 +425,7 @@ static void set_ipv6_addr(struct sk_buff
+               update_ipv6_checksum(skb, l4_proto, addr, new_addr);
+       skb_clear_hash(skb);
++      ovs_ct_clear(skb, NULL);
+       memcpy(addr, new_addr, sizeof(__be32[4]));
+ }
+@@ -664,6 +666,7 @@ static int set_nsh(struct sk_buff *skb,
+ static void set_tp_port(struct sk_buff *skb, __be16 *port,
+                       __be16 new_port, __sum16 *check)
+ {
++      ovs_ct_clear(skb, NULL);
+       inet_proto_csum_replace2(check, skb, *port, new_port, false);
+       *port = new_port;
+ }
+@@ -703,6 +706,7 @@ static int set_udp(struct sk_buff *skb,
+               uh->dest = dst;
+               flow_key->tp.src = src;
+               flow_key->tp.dst = dst;
++              ovs_ct_clear(skb, NULL);
+       }
+       skb_clear_hash(skb);
+@@ -765,6 +769,8 @@ static int set_sctp(struct sk_buff *skb,
+       sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
+       skb_clear_hash(skb);
++      ovs_ct_clear(skb, NULL);
++
+       flow_key->tp.src = sh->source;
+       flow_key->tp.dst = sh->dest;
+--- a/net/openvswitch/conntrack.c
++++ b/net/openvswitch/conntrack.c
+@@ -1319,7 +1319,8 @@ int ovs_ct_clear(struct sk_buff *skb, st
+       if (skb_nfct(skb)) {
+               nf_conntrack_put(skb_nfct(skb));
+               nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+-              ovs_ct_fill_key(skb, key);
++              if (key)
++                      ovs_ct_fill_key(skb, key);
+       }
+       return 0;
diff --git a/queue-5.4/net-sched-act_police-more-accurate-mtu-policing.patch b/queue-5.4/net-sched-act_police-more-accurate-mtu-policing.patch
new file mode 100644 (file)
index 0000000..f63ba9c
--- /dev/null
@@ -0,0 +1,68 @@
+From 4ddc844eb81da59bfb816d8d52089aba4e59e269 Mon Sep 17 00:00:00 2001
+From: Davide Caratti <dcaratti@redhat.com>
+Date: Thu, 10 Feb 2022 18:56:08 +0100
+Subject: net/sched: act_police: more accurate MTU policing
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+commit 4ddc844eb81da59bfb816d8d52089aba4e59e269 upstream.
+
+in current Linux, MTU policing does not take into account that packets at
+the TC ingress have the L2 header pulled. Thus, the same TC police action
+(with the same value of tcfp_mtu) behaves differently for ingress/egress.
+In addition, the full GSO size is compared to tcfp_mtu: as a consequence,
+the policer drops GSO packets even when individual segments have the L2 +
+L3 + L4 + payload length below the configured valued of tcfp_mtu.
+
+Improve the accuracy of MTU policing as follows:
+ - account for mac_len for non-GSO packets at TC ingress.
+ - compare MTU threshold with the segmented size for GSO packets.
+Also, add a kselftest that verifies the correct behavior.
+
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[dcaratti: fix conflicts due to lack of the following commits:
+ - commit 2ffe0395288a ("net/sched: act_police: add support for
+   packet-per-second policing")
+ - commit afe231d32eb5 ("selftests: forwarding: Add tc-police tests")
+ - commit 53b61f29367d ("selftests: forwarding: Add tc-police tests for
+   packets per second")]
+Link: https://lore.kernel.org/netdev/876d597a0ff55f6ba786f73c5a9fd9eb8d597a03.1644514748.git.dcaratti@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_police.c |   16 +++++++++++++++-
+ 1 file changed, 15 insertions(+), 1 deletion(-)
+
+--- a/net/sched/act_police.c
++++ b/net/sched/act_police.c
+@@ -213,6 +213,20 @@ release_idr:
+       return err;
+ }
++static bool tcf_police_mtu_check(struct sk_buff *skb, u32 limit)
++{
++      u32 len;
++
++      if (skb_is_gso(skb))
++              return skb_gso_validate_mac_len(skb, limit);
++
++      len = qdisc_pkt_len(skb);
++      if (skb_at_tc_ingress(skb))
++              len += skb->mac_len;
++
++      return len <= limit;
++}
++
+ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
+                         struct tcf_result *res)
+ {
+@@ -235,7 +249,7 @@ static int tcf_police_act(struct sk_buff
+                       goto inc_overlimits;
+       }
+-      if (qdisc_pkt_len(skb) <= p->tcfp_mtu) {
++      if (tcf_police_mtu_check(skb, p->tcfp_mtu)) {
+               if (!p->rate_present) {
+                       ret = p->tcfp_result;
+                       goto end;
index b1e0a2611b7274bfd335039aa14a481b4bdc4ad6..28b0bd799f6fb23cd6298f7ffccda782ed11b7b3 100644 (file)
@@ -231,3 +231,7 @@ ext4-add-reserved-gdt-blocks-check.patch
 alsa-hda-realtek-fix-mute-micmute-leds-for-hp-440-g8.patch
 alsa-hda-realtek-fix-right-sounds-and-mute-micmute-leds-for-hp-machine.patch
 virtio-pci-remove-wrong-address-verification-in-vp_del_vqs.patch
+net-sched-act_police-more-accurate-mtu-policing.patch
+net-openvswitch-fix-misuse-of-the-cached-connection-on-tuple-changes.patch
+net-openvswitch-fix-leak-of-nested-actions.patch
+arm64-kprobes-use-brk-instead-of-single-step-when-executing-instructions-out-of-line.patch