From 20b509f11a764c0099dcb668b61df69b57720c09 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 8 Jun 2021 17:02:14 +0200 Subject: [PATCH] 4.19-stable patches added patches: bnxt_en-remove-the-setting-of-dev_port.patch perf-cgroups-don-t-rotate-events-for-cgroups-unnecessarily.patch perf-core-fix-corner-case-in-perf_rotate_context.patch --- ...xt_en-remove-the-setting-of-dev_port.patch | 32 ++++ ...ate-events-for-cgroups-unnecessarily.patch | 149 ++++++++++++++++++ ...x-corner-case-in-perf_rotate_context.patch | 99 ++++++++++++ queue-4.19/series | 3 + 4 files changed, 283 insertions(+) create mode 100644 queue-4.19/bnxt_en-remove-the-setting-of-dev_port.patch create mode 100644 queue-4.19/perf-cgroups-don-t-rotate-events-for-cgroups-unnecessarily.patch create mode 100644 queue-4.19/perf-core-fix-corner-case-in-perf_rotate_context.patch diff --git a/queue-4.19/bnxt_en-remove-the-setting-of-dev_port.patch b/queue-4.19/bnxt_en-remove-the-setting-of-dev_port.patch new file mode 100644 index 00000000000..b1e5334e305 --- /dev/null +++ b/queue-4.19/bnxt_en-remove-the-setting-of-dev_port.patch @@ -0,0 +1,32 @@ +From 1d86859fdf31a0d50cc82b5d0d6bfb5fe98f6c00 Mon Sep 17 00:00:00 2001 +From: Michael Chan +Date: Mon, 27 Jan 2020 04:56:15 -0500 +Subject: bnxt_en: Remove the setting of dev_port. + +From: Michael Chan + +commit 1d86859fdf31a0d50cc82b5d0d6bfb5fe98f6c00 upstream. + +The dev_port is meant to distinguish the network ports belonging to +the same PCI function. Our devices only have one network port +associated with each PCI function and so we should not set it for +correctness. + +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Krzysztof Kozlowski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -5252,7 +5252,6 @@ static int __bnxt_hwrm_func_qcaps(struct + + pf->fw_fid = le16_to_cpu(resp->fid); + pf->port_id = le16_to_cpu(resp->port_id); +- bp->dev->dev_port = pf->port_id; + memcpy(pf->mac_addr, resp->mac_address, ETH_ALEN); + pf->first_vf_id = le16_to_cpu(resp->first_vf_id); + pf->max_vfs = le16_to_cpu(resp->max_vfs); diff --git a/queue-4.19/perf-cgroups-don-t-rotate-events-for-cgroups-unnecessarily.patch b/queue-4.19/perf-cgroups-don-t-rotate-events-for-cgroups-unnecessarily.patch new file mode 100644 index 00000000000..cb0410d8852 --- /dev/null +++ b/queue-4.19/perf-cgroups-don-t-rotate-events-for-cgroups-unnecessarily.patch @@ -0,0 +1,149 @@ +From fd7d55172d1e2e501e6da0a5c1de25f06612dc2e Mon Sep 17 00:00:00 2001 +From: Ian Rogers +Date: Sat, 1 Jun 2019 01:27:22 -0700 +Subject: perf/cgroups: Don't rotate events for cgroups unnecessarily + +From: Ian Rogers + +commit fd7d55172d1e2e501e6da0a5c1de25f06612dc2e upstream. + +Currently perf_rotate_context assumes that if the context's nr_events != +nr_active a rotation is necessary for perf event multiplexing. With +cgroups, nr_events is the total count of events for all cgroups and +nr_active will not include events in a cgroup other than the current +task's. This makes rotation appear necessary for cgroups when it is not. + +Add a perf_event_context flag that is set when rotation is necessary. +Clear the flag during sched_out and set it when a flexible sched_in +fails due to resources. + +Signed-off-by: Ian Rogers +Signed-off-by: Peter Zijlstra (Intel) +Cc: Alexander Shishkin +Cc: Arnaldo Carvalho de Melo +Cc: Arnaldo Carvalho de Melo +Cc: Borislav Petkov +Cc: Jiri Olsa +Cc: Kan Liang +Cc: Linus Torvalds +Cc: Namhyung Kim +Cc: Peter Zijlstra +Cc: Stephane Eranian +Cc: Thomas Gleixner +Cc: Vince Weaver +Link: https://lkml.kernel.org/r/20190601082722.44543-1-irogers@google.com +Signed-off-by: Ingo Molnar +Signed-off-by: Wen Yang +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/perf_event.h | 5 +++++ + kernel/events/core.c | 42 ++++++++++++++++++++++-------------------- + 2 files changed, 27 insertions(+), 20 deletions(-) + +--- a/include/linux/perf_event.h ++++ b/include/linux/perf_event.h +@@ -747,6 +747,11 @@ struct perf_event_context { + int nr_stat; + int nr_freq; + int rotate_disable; ++ /* ++ * Set when nr_events != nr_active, except tolerant to events not ++ * necessary to be active due to scheduling constraints, such as cgroups. ++ */ ++ int rotate_necessary; + atomic_t refcount; + struct task_struct *task; + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -2952,6 +2952,12 @@ static void ctx_sched_out(struct perf_ev + if (!ctx->nr_active || !(is_active & EVENT_ALL)) + return; + ++ /* ++ * If we had been multiplexing, no rotations are necessary, now no events ++ * are active. ++ */ ++ ctx->rotate_necessary = 0; ++ + perf_pmu_disable(ctx->pmu); + if (is_active & EVENT_PINNED) { + list_for_each_entry_safe(event, tmp, &ctx->pinned_active, active_list) +@@ -3319,10 +3325,13 @@ static int flexible_sched_in(struct perf + return 0; + + if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) { +- if (!group_sched_in(event, sid->cpuctx, sid->ctx)) +- list_add_tail(&event->active_list, &sid->ctx->flexible_active); +- else ++ int ret = group_sched_in(event, sid->cpuctx, sid->ctx); ++ if (ret) { + sid->can_add_hw = 0; ++ sid->ctx->rotate_necessary = 1; ++ return 0; ++ } ++ list_add_tail(&event->active_list, &sid->ctx->flexible_active); + } + + return 0; +@@ -3690,24 +3699,17 @@ ctx_first_active(struct perf_event_conte + static bool perf_rotate_context(struct perf_cpu_context *cpuctx) + { + struct perf_event *cpu_event = NULL, *task_event = NULL; +- bool cpu_rotate = false, task_rotate = false; +- struct perf_event_context *ctx = NULL; ++ struct perf_event_context *task_ctx = NULL; ++ int cpu_rotate, task_rotate; + + /* + * Since we run this from IRQ context, nobody can install new + * events, thus the event count values are stable. + */ + +- if (cpuctx->ctx.nr_events) { +- if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) +- cpu_rotate = true; +- } +- +- ctx = cpuctx->task_ctx; +- if (ctx && ctx->nr_events) { +- if (ctx->nr_events != ctx->nr_active) +- task_rotate = true; +- } ++ cpu_rotate = cpuctx->ctx.rotate_necessary; ++ task_ctx = cpuctx->task_ctx; ++ task_rotate = task_ctx ? task_ctx->rotate_necessary : 0; + + if (!(cpu_rotate || task_rotate)) + return false; +@@ -3716,7 +3718,7 @@ static bool perf_rotate_context(struct p + perf_pmu_disable(cpuctx->ctx.pmu); + + if (task_rotate) +- task_event = ctx_first_active(ctx); ++ task_event = ctx_first_active(task_ctx); + if (cpu_rotate) + cpu_event = ctx_first_active(&cpuctx->ctx); + +@@ -3724,17 +3726,17 @@ static bool perf_rotate_context(struct p + * As per the order given at ctx_resched() first 'pop' task flexible + * and then, if needed CPU flexible. + */ +- if (task_event || (ctx && cpu_event)) +- ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); ++ if (task_event || (task_ctx && cpu_event)) ++ ctx_sched_out(task_ctx, cpuctx, EVENT_FLEXIBLE); + if (cpu_event) + cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); + + if (task_event) +- rotate_ctx(ctx, task_event); ++ rotate_ctx(task_ctx, task_event); + if (cpu_event) + rotate_ctx(&cpuctx->ctx, cpu_event); + +- perf_event_sched_in(cpuctx, ctx, current); ++ perf_event_sched_in(cpuctx, task_ctx, current); + + perf_pmu_enable(cpuctx->ctx.pmu); + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); diff --git a/queue-4.19/perf-core-fix-corner-case-in-perf_rotate_context.patch b/queue-4.19/perf-core-fix-corner-case-in-perf_rotate_context.patch new file mode 100644 index 00000000000..25d64d08899 --- /dev/null +++ b/queue-4.19/perf-core-fix-corner-case-in-perf_rotate_context.patch @@ -0,0 +1,99 @@ +From 7fa343b7fdc4f351de4e3f28d5c285937dd1f42f Mon Sep 17 00:00:00 2001 +From: Song Liu +Date: Tue, 8 Oct 2019 09:59:49 -0700 +Subject: perf/core: Fix corner case in perf_rotate_context() + +From: Song Liu + +commit 7fa343b7fdc4f351de4e3f28d5c285937dd1f42f upstream. + +In perf_rotate_context(), when the first cpu flexible event fail to +schedule, cpu_rotate is 1, while cpu_event is NULL. Since cpu_event is +NULL, perf_rotate_context will _NOT_ call cpu_ctx_sched_out(), thus +cpuctx->ctx.is_active will have EVENT_FLEXIBLE set. Then, the next +perf_event_sched_in() will skip all cpu flexible events because of the +EVENT_FLEXIBLE bit. + +In the next call of perf_rotate_context(), cpu_rotate stays 1, and +cpu_event stays NULL, so this process repeats. The end result is, flexible +events on this cpu will not be scheduled (until another event being added +to the cpuctx). + +Here is an easy repro of this issue. On Intel CPUs, where ref-cycles +could only use one counter, run one pinned event for ref-cycles, one +flexible event for ref-cycles, and one flexible event for cycles. The +flexible ref-cycles is never scheduled, which is expected. However, +because of this issue, the cycles event is never scheduled either. + + $ perf stat -e ref-cycles:D,ref-cycles,cycles -C 5 -I 1000 + + time counts unit events + 1.000152973 15,412,480 ref-cycles:D + 1.000152973 ref-cycles (0.00%) + 1.000152973 cycles (0.00%) + 2.000486957 18,263,120 ref-cycles:D + 2.000486957 ref-cycles (0.00%) + 2.000486957 cycles (0.00%) + +To fix this, when the flexible_active list is empty, try rotate the +first event in the flexible_groups. Also, rename ctx_first_active() to +ctx_event_to_rotate(), which is more accurate. + +Signed-off-by: Song Liu +Signed-off-by: Peter Zijlstra (Intel) +Cc: +Cc: Arnaldo Carvalho de Melo +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Sasha Levin +Cc: Thomas Gleixner +Fixes: 8d5bce0c37fa ("perf/core: Optimize perf_rotate_context() event scheduling") +Link: https://lkml.kernel.org/r/20191008165949.920548-1-songliubraving@fb.com +Signed-off-by: Ingo Molnar +Signed-off-by: Wen Yang +Signed-off-by: Greg Kroah-Hartman +--- + kernel/events/core.c | 22 +++++++++++++++++----- + 1 file changed, 17 insertions(+), 5 deletions(-) + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -3689,11 +3689,23 @@ static void rotate_ctx(struct perf_event + perf_event_groups_insert(&ctx->flexible_groups, event); + } + ++/* pick an event from the flexible_groups to rotate */ + static inline struct perf_event * +-ctx_first_active(struct perf_event_context *ctx) ++ctx_event_to_rotate(struct perf_event_context *ctx) + { +- return list_first_entry_or_null(&ctx->flexible_active, +- struct perf_event, active_list); ++ struct perf_event *event; ++ ++ /* pick the first active flexible event */ ++ event = list_first_entry_or_null(&ctx->flexible_active, ++ struct perf_event, active_list); ++ ++ /* if no active flexible event, pick the first event */ ++ if (!event) { ++ event = rb_entry_safe(rb_first(&ctx->flexible_groups.tree), ++ typeof(*event), group_node); ++ } ++ ++ return event; + } + + static bool perf_rotate_context(struct perf_cpu_context *cpuctx) +@@ -3718,9 +3730,9 @@ static bool perf_rotate_context(struct p + perf_pmu_disable(cpuctx->ctx.pmu); + + if (task_rotate) +- task_event = ctx_first_active(task_ctx); ++ task_event = ctx_event_to_rotate(task_ctx); + if (cpu_rotate) +- cpu_event = ctx_first_active(&cpuctx->ctx); ++ cpu_event = ctx_event_to_rotate(&cpuctx->ctx); + + /* + * As per the order given at ctx_resched() first 'pop' task flexible diff --git a/queue-4.19/series b/queue-4.19/series index bfeef500599..0596a8ff0b8 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -46,3 +46,6 @@ bpf-make-more-use-of-any-alignment-in-test_verifier.c.patch bpf-apply-f_needs_efficient_unaligned_access-to-more-accept-test-cases.patch selftests-bpf-add-any-alignment-annotation-for-some-tests.patch selftests-bpf-avoid-running-unprivileged-tests-with-alignment-requirements.patch +bnxt_en-remove-the-setting-of-dev_port.patch +perf-cgroups-don-t-rotate-events-for-cgroups-unnecessarily.patch +perf-core-fix-corner-case-in-perf_rotate_context.patch -- 2.47.3