Fixes for 5.10

author Sasha Levin <sashal@kernel.org>

Mon, 21 Jun 2021 02:03:39 +0000 (22:03 -0400)

committer Sasha Levin <sashal@kernel.org>

Mon, 21 Jun 2021 02:03:39 +0000 (22:03 -0400)
author Sasha Levin <sashal@kernel.org>
Mon, 21 Jun 2021 02:03:39 +0000 (22:03 -0400)
committer Sasha Levin <sashal@kernel.org>
Mon, 21 Jun 2021 02:03:39 +0000 (22:03 -0400)
diff --git a/queue-5.10/bpf-do-not-mark-insn-as-seen-under-speculative-path-.patch b/queue-5.10/bpf-do-not-mark-insn-as-seen-under-speculative-path-.patch

new file mode 100644 (file)

index 0000000..63dcf78
--- /dev/null
+++ b/queue-5.10/bpf-do-not-mark-insn-as-seen-under-speculative-path-.patch
@@ -0,0 +1,84 @@
+From c39c9f2fda386291c4faf50e5270fc27c90118b8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 May 2021 13:47:27 +0000
+Subject: bpf: Do not mark insn as seen under speculative path verification
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit fe9a5ca7e370e613a9a75a13008a3845ea759d6e ]
+
+... in such circumstances, we do not want to mark the instruction as seen given
+the goal is still to jmp-1 rewrite/sanitize dead code, if it is not reachable
+from the non-speculative path verification. We do however want to verify it for
+safety regardless.
+
+With the patch as-is all the insns that have been marked as seen before the
+patch will also be marked as seen after the patch (just with a potentially
+different non-zero count). An upcoming patch will also verify paths that are
+unreachable in the non-speculative domain, hence this extension is needed.
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Reviewed-by: Benedict Schlueter <benedict.schlueter@rub.de>
+Reviewed-by: Piotr Krysiuk <piotras@gmail.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 20 ++++++++++++++++++--
+ 1 file changed, 18 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 71ac1da127a6..e97724e36dfb 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -5851,6 +5851,19 @@ do_sim:
+       return !ret ? REASON_STACK : 0;
+ }
+ 
++static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
++{
++      struct bpf_verifier_state *vstate = env->cur_state;
++
++      /* If we simulate paths under speculation, we don't update the
++       * insn as 'seen' such that when we verify unreachable paths in
++       * the non-speculative domain, sanitize_dead_code() can still
++       * rewrite/sanitize them.
++       */
++      if (!vstate->speculative)
++              env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
++}
++
+ static int sanitize_err(struct bpf_verifier_env *env,
+                       const struct bpf_insn *insn, int reason,
+                       const struct bpf_reg_state *off_reg,
+@@ -9847,7 +9860,7 @@ static int do_check(struct bpf_verifier_env *env)
+               }
+ 
+               regs = cur_regs(env);
+-              env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
++              sanitize_mark_insn_seen(env);
+               prev_insn_idx = env->insn_idx;
+ 
+               if (class == BPF_ALU || class == BPF_ALU64) {
+@@ -10067,7 +10080,7 @@ process_bpf_exit:
+                                       return err;
+ 
+                               env->insn_idx++;
+-                              env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
++                              sanitize_mark_insn_seen(env);
+                       } else {
+                               verbose(env, "invalid BPF_LD mode\n");
+                               return -EINVAL;
+@@ -11741,6 +11754,9 @@ static void free_states(struct bpf_verifier_env *env)
+  * insn_aux_data was touched. These variables are compared to clear temporary
+  * data from failed pass. For testing and experiments do_check_common() can be
+  * run multiple times even when prior attempt to verify is unsuccessful.
++ *
++ * Note that special handling is needed on !env->bypass_spec_v1 if this is
++ * ever called outside of error path with subsequent program rejection.
+  */
+ static void sanitize_insn_aux_data(struct bpf_verifier_env *env)
+ {
+-- 
+2.30.2
+
diff --git a/queue-5.10/bpf-inherit-expanded-patched-seen-count-from-old-aux.patch b/queue-5.10/bpf-inherit-expanded-patched-seen-count-from-old-aux.patch

new file mode 100644 (file)

index 0000000..8d7594d
--- /dev/null
+++ b/queue-5.10/bpf-inherit-expanded-patched-seen-count-from-old-aux.patch
@@ -0,0 +1,54 @@
+From 11fefa27b6cf96e6209df2e74a83b69f66844336 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 May 2021 13:03:30 +0000
+Subject: bpf: Inherit expanded/patched seen count from old aux data
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+[ Upstream commit d203b0fd863a2261e5d00b97f3d060c4c2a6db71 ]
+
+Instead of relying on current env->pass_cnt, use the seen count from the
+old aux data in adjust_insn_aux_data(), and expand it to the new range of
+patched instructions. This change is valid given we always expand 1:n
+with n>=1, so what applies to the old/original instruction needs to apply
+for the replacement as well.
+
+Not relying on env->pass_cnt is a prerequisite for a later change where we
+want to avoid marking an instruction seen when verified under speculative
+execution path.
+
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Reviewed-by: Benedict Schlueter <benedict.schlueter@rub.de>
+Reviewed-by: Piotr Krysiuk <piotras@gmail.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index da8fc57ff5b2..71ac1da127a6 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -10475,6 +10475,7 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
+ {
+       struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
+       struct bpf_insn *insn = new_prog->insnsi;
++      u32 old_seen = old_data[off].seen;
+       u32 prog_len;
+       int i;
+ 
+@@ -10495,7 +10496,8 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
+       memcpy(new_data + off + cnt - 1, old_data + off,
+              sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
+       for (i = off; i < off + cnt - 1; i++) {
+-              new_data[i].seen = env->pass_cnt;
++              /* Expand insni[off]'s seen count to the patched range. */
++              new_data[i].seen = old_seen;
+               new_data[i].zext_dst = insn_has_def32(env, insn + i);
+       }
+       env->insn_aux_data = new_data;
+-- 
+2.30.2
+
diff --git a/queue-5.10/irqchip-gic-v3-workaround-inconsistent-pmr-setting-o.patch b/queue-5.10/irqchip-gic-v3-workaround-inconsistent-pmr-setting-o.patch

new file mode 100644 (file)

index 0000000..3500946
--- /dev/null
+++ b/queue-5.10/irqchip-gic-v3-workaround-inconsistent-pmr-setting-o.patch
@@ -0,0 +1,94 @@
+From be4c7ac0521cf1f3e4d6d5b2db9652dfd7052889 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 10 Jun 2021 15:13:46 +0100
+Subject: irqchip/gic-v3: Workaround inconsistent PMR setting on NMI entry
+
+From: Marc Zyngier <maz@kernel.org>
+
+[ Upstream commit 382e6e177bc1c02473e56591fe5083ae1e4904f6 ]
+
+The arm64 entry code suffers from an annoying issue on taking
+a NMI, as it sets PMR to a value that actually allows IRQs
+to be acknowledged. This is done for consistency with other parts
+of the code, and is in the process of being fixed. This shouldn't
+be a problem, as we are not enabling interrupts whilst in NMI
+context.
+
+However, in the infortunate scenario that we took a spurious NMI
+(retired before the read of IAR) *and* that there is an IRQ pending
+at the same time, we'll ack the IRQ in NMI context. Too bad.
+
+In order to avoid deadlocks while running something like perf,
+teach the GICv3 driver about this situation: if we were in
+a context where no interrupt should have fired, transiently
+set PMR to a value that only allows NMIs before acking the pending
+interrupt, and restore the original value after that.
+
+This papers over the core issue for the time being, and makes
+NMIs great again. Sort of.
+
+Fixes: 4d6a38da8e79e94c ("arm64: entry: always set GIC_PRIO_PSR_I_SET during entry")
+Co-developed-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Mark Rutland <mark.rutland@arm.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Reviewed-by: Mark Rutland <mark.rutland@arm.com>
+Link: https://lore.kernel.org/lkml/20210610145731.1350460-1-maz@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/irqchip/irq-gic-v3.c | 36 +++++++++++++++++++++++++++++++++++-
+ 1 file changed, 35 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
+index 7929bf12651c..1005b182bab4 100644
+--- a/drivers/irqchip/irq-gic-v3.c
++++ b/drivers/irqchip/irq-gic-v3.c
+@@ -642,11 +642,45 @@ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
+               nmi_exit();
+ }
+ 
++static u32 do_read_iar(struct pt_regs *regs)
++{
++      u32 iar;
++
++      if (gic_supports_nmi() && unlikely(!interrupts_enabled(regs))) {
++              u64 pmr;
++
++              /*
++               * We were in a context with IRQs disabled. However, the
++               * entry code has set PMR to a value that allows any
++               * interrupt to be acknowledged, and not just NMIs. This can
++               * lead to surprising effects if the NMI has been retired in
++               * the meantime, and that there is an IRQ pending. The IRQ
++               * would then be taken in NMI context, something that nobody
++               * wants to debug twice.
++               *
++               * Until we sort this, drop PMR again to a level that will
++               * actually only allow NMIs before reading IAR, and then
++               * restore it to what it was.
++               */
++              pmr = gic_read_pmr();
++              gic_pmr_mask_irqs();
++              isb();
++
++              iar = gic_read_iar();
++
++              gic_write_pmr(pmr);
++      } else {
++              iar = gic_read_iar();
++      }
++
++      return iar;
++}
++
+ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
+ {
+       u32 irqnr;
+ 
+-      irqnr = gic_read_iar();
++      irqnr = do_read_iar(regs);
+ 
+       /* Check for special IDs first */
+       if ((irqnr >= 1020 && irqnr <= 1023))
+-- 
+2.30.2
+
diff --git a/queue-5.10/sched-fair-correctly-insert-cfs_rq-s-to-list-on-unth.patch b/queue-5.10/sched-fair-correctly-insert-cfs_rq-s-to-list-on-unth.patch

new file mode 100644 (file)

index 0000000..3fa7a23
--- /dev/null
+++ b/queue-5.10/sched-fair-correctly-insert-cfs_rq-s-to-list-on-unth.patch
@@ -0,0 +1,120 @@
+From fcdb8bc3bcb97a33cba7278eb7e18f07744805fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 12 Jun 2021 13:28:15 +0200
+Subject: sched/fair: Correctly insert cfs_rq's to list on unthrottle
+
+From: Odin Ugedal <odin@uged.al>
+
+[ Upstream commit a7b359fc6a37faaf472125867c8dc5a068c90982 ]
+
+Fix an issue where fairness is decreased since cfs_rq's can end up not
+being decayed properly. For two sibling control groups with the same
+priority, this can often lead to a load ratio of 99/1 (!!).
+
+This happens because when a cfs_rq is throttled, all the descendant
+cfs_rq's will be removed from the leaf list. When they initial cfs_rq
+is unthrottled, it will currently only re add descendant cfs_rq's if
+they have one or more entities enqueued. This is not a perfect
+heuristic.
+
+Instead, we insert all cfs_rq's that contain one or more enqueued
+entities, or it its load is not completely decayed.
+
+Can often lead to situations like this for equally weighted control
+groups:
+
+  $ ps u -C stress
+  USER         PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
+  root       10009 88.8  0.0   3676   100 pts/1    R+   11:04   0:13 stress --cpu 1
+  root       10023  3.0  0.0   3676   104 pts/1    R+   11:04   0:00 stress --cpu 1
+
+Fixes: 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
+[vingo: !SMP build fix]
+Signed-off-by: Odin Ugedal <odin@uged.al>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
+Link: https://lore.kernel.org/r/20210612112815.61678-1-odin@uged.al
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 44 +++++++++++++++++++++++++-------------------
+ 1 file changed, 25 insertions(+), 19 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index d6e1c90de570..1cbb7f80db31 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -3300,6 +3300,24 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
+ 
+ #ifdef CONFIG_SMP
+ #ifdef CONFIG_FAIR_GROUP_SCHED
++
++static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
++{
++      if (cfs_rq->load.weight)
++              return false;
++
++      if (cfs_rq->avg.load_sum)
++              return false;
++
++      if (cfs_rq->avg.util_sum)
++              return false;
++
++      if (cfs_rq->avg.runnable_sum)
++              return false;
++
++      return true;
++}
++
+ /**
+  * update_tg_load_avg - update the tg's load avg
+  * @cfs_rq: the cfs_rq whose avg changed
+@@ -4093,6 +4111,11 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
+ 
+ #else /* CONFIG_SMP */
+ 
++static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
++{
++      return true;
++}
++
+ #define UPDATE_TG     0x0
+ #define SKIP_AGE_LOAD 0x0
+ #define DO_ATTACH     0x0
+@@ -4751,8 +4774,8 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
+               cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
+                                            cfs_rq->throttled_clock_task;
+ 
+-              /* Add cfs_rq with already running entity in the list */
+-              if (cfs_rq->nr_running >= 1)
++              /* Add cfs_rq with load or one or more already running entities to the list */
++              if (!cfs_rq_is_decayed(cfs_rq) || cfs_rq->nr_running)
+                       list_add_leaf_cfs_rq(cfs_rq);
+       }
+ 
+@@ -7927,23 +7950,6 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 
+-static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
+-{
+-      if (cfs_rq->load.weight)
+-              return false;
+-
+-      if (cfs_rq->avg.load_sum)
+-              return false;
+-
+-      if (cfs_rq->avg.util_sum)
+-              return false;
+-
+-      if (cfs_rq->avg.runnable_sum)
+-              return false;
+-
+-      return true;
+-}
+-
+ static bool __update_blocked_fair(struct rq *rq, bool *done)
+ {
+       struct cfs_rq *cfs_rq, *pos;
+-- 
+2.30.2
+
diff --git a/queue-5.10/series b/queue-5.10/series

index 6188a7ed4a9e062d9e813b5922233071fe294892..f108441724b778c070acbe3909bb38d4133da310 100644 (file)
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -91,3 +91,7 @@ asoc-qcom-lpass-cpu-fix-pop-noise-during-audio-captu.patch
  radeon-use-memcpy_to-fromio-for-uvd-fw-upload.patch
  hwmon-scpi-hwmon-shows-the-negative-temperature-prop.patch
  mm-relocate-write_protect_seq-in-struct-mm_struct.patch
+irqchip-gic-v3-workaround-inconsistent-pmr-setting-o.patch
+sched-fair-correctly-insert-cfs_rq-s-to-list-on-unth.patch
+bpf-inherit-expanded-patched-seen-count-from-old-aux.patch
+bpf-do-not-mark-insn-as-seen-under-speculative-path-.patch
author	Sasha Levin <sashal@kernel.org>
	Mon, 21 Jun 2021 02:03:39 +0000 (22:03 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Mon, 21 Jun 2021 02:03:39 +0000 (22:03 -0400)
queue-5.10/bpf-do-not-mark-insn-as-seen-under-speculative-path-.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/bpf-inherit-expanded-patched-seen-count-from-old-aux.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/irqchip-gic-v3-workaround-inconsistent-pmr-setting-o.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/sched-fair-correctly-insert-cfs_rq-s-to-list-on-unth.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/series		patch \| blob \| blame \| history