From: Greg Kroah-Hartman Date: Mon, 15 Feb 2021 14:58:42 +0000 (+0100) Subject: 5.10-stable patches X-Git-Tag: v5.4.99~10 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=33794e652ef4fd892814a806cbf3c68bf14a0813;p=thirdparty%2Fkernel%2Fstable-queue.git 5.10-stable patches added patches: bridge-mrp-fix-the-usage-of-br_mrp_port_switchdev_set_state.patch cpufreq-acpi-extend-frequency-tables-to-cover-boost-frequencies.patch cpufreq-acpi-update-arch-scale-invariance-max-perf-ratio-if-cppc-is-not-there.patch kcov-usb-only-collect-coverage-from-__usb_hcd_giveback_urb-in-softirq.patch net-dsa-call-teardown-method-on-probe-failure.patch net-fix-iteration-for-sctp-transport-seq_files.patch net-gro-do-not-keep-too-many-gro-packets-in-napi-rx_list.patch net-qrtr-restrict-user-controlled-length-in-qrtr_tun_write_iter.patch net-rds-restrict-iovecs-length-for-rds_cmsg_rdma_args.patch net-vmw_vsock-fix-null-pointer-dereference.patch net-vmw_vsock-improve-locking-in-vsock_connect_timeout.patch net-watchdog-hold-device-global-xmit-lock-during-tx-disable.patch ovl-expand-warning-in-ovl_d_real.patch rxrpc-fix-clearance-of-tx-rx-ring-when-releasing-a-call.patch switchdev-mrp-remove-switchdev_attr_id_mrp_port_stat.patch udp-fix-skb_copy_and_csum_datagram-with-odd-segment-sizes.patch vsock-fix-locking-in-vsock_shutdown.patch vsock-virtio-update-credit-only-if-socket-is-not-closed.patch --- diff --git a/queue-5.10/bridge-mrp-fix-the-usage-of-br_mrp_port_switchdev_set_state.patch b/queue-5.10/bridge-mrp-fix-the-usage-of-br_mrp_port_switchdev_set_state.patch new file mode 100644 index 00000000000..2585e8ddf77 --- /dev/null +++ b/queue-5.10/bridge-mrp-fix-the-usage-of-br_mrp_port_switchdev_set_state.patch @@ -0,0 +1,92 @@ +From b2bdba1cbc84cadb14393d0101a5bfd38d342e0a Mon Sep 17 00:00:00 2001 +From: Horatiu Vultur +Date: Sat, 6 Feb 2021 22:47:33 +0100 +Subject: bridge: mrp: Fix the usage of br_mrp_port_switchdev_set_state + +From: Horatiu Vultur + +commit b2bdba1cbc84cadb14393d0101a5bfd38d342e0a upstream. + +The function br_mrp_port_switchdev_set_state was called both with MRP +port state and STP port state, which is an issue because they don't +match exactly. + +Therefore, update the function to be used only with STP port state and +use the id SWITCHDEV_ATTR_ID_PORT_STP_STATE. + +The choice of using STP over MRP is that the drivers already implement +SWITCHDEV_ATTR_ID_PORT_STP_STATE and already in SW we update the port +STP state. + +Fixes: 9a9f26e8f7ea30 ("bridge: mrp: Connect MRP API with the switchdev API") +Fixes: fadd409136f0f2 ("bridge: switchdev: mrp: Implement MRP API for switchdev") +Fixes: 2f1a11ae11d222 ("bridge: mrp: Add MRP interface.") +Reported-by: Rasmus Villemoes +Signed-off-by: Horatiu Vultur +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_mrp.c | 9 ++++++--- + net/bridge/br_mrp_switchdev.c | 7 +++---- + net/bridge/br_private_mrp.h | 3 +-- + 3 files changed, 10 insertions(+), 9 deletions(-) + +--- a/net/bridge/br_mrp.c ++++ b/net/bridge/br_mrp.c +@@ -544,19 +544,22 @@ int br_mrp_del(struct net_bridge *br, st + int br_mrp_set_port_state(struct net_bridge_port *p, + enum br_mrp_port_state_type state) + { ++ u32 port_state; ++ + if (!p || !(p->flags & BR_MRP_AWARE)) + return -EINVAL; + + spin_lock_bh(&p->br->lock); + + if (state == BR_MRP_PORT_STATE_FORWARDING) +- p->state = BR_STATE_FORWARDING; ++ port_state = BR_STATE_FORWARDING; + else +- p->state = BR_STATE_BLOCKING; ++ port_state = BR_STATE_BLOCKING; + ++ p->state = port_state; + spin_unlock_bh(&p->br->lock); + +- br_mrp_port_switchdev_set_state(p, state); ++ br_mrp_port_switchdev_set_state(p, port_state); + + return 0; + } +--- a/net/bridge/br_mrp_switchdev.c ++++ b/net/bridge/br_mrp_switchdev.c +@@ -169,13 +169,12 @@ int br_mrp_switchdev_send_in_test(struct + return err; + } + +-int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, +- enum br_mrp_port_state_type state) ++int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, u32 state) + { + struct switchdev_attr attr = { + .orig_dev = p->dev, +- .id = SWITCHDEV_ATTR_ID_MRP_PORT_STATE, +- .u.mrp_port_state = state, ++ .id = SWITCHDEV_ATTR_ID_PORT_STP_STATE, ++ .u.stp_state = state, + }; + int err; + +--- a/net/bridge/br_private_mrp.h ++++ b/net/bridge/br_private_mrp.h +@@ -72,8 +72,7 @@ int br_mrp_switchdev_set_ring_state(stru + int br_mrp_switchdev_send_ring_test(struct net_bridge *br, struct br_mrp *mrp, + u32 interval, u8 max_miss, u32 period, + bool monitor); +-int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, +- enum br_mrp_port_state_type state); ++int br_mrp_port_switchdev_set_state(struct net_bridge_port *p, u32 state); + int br_mrp_port_switchdev_set_role(struct net_bridge_port *p, + enum br_mrp_port_role_type role); + int br_mrp_switchdev_set_in_role(struct net_bridge *br, struct br_mrp *mrp, diff --git a/queue-5.10/cpufreq-acpi-extend-frequency-tables-to-cover-boost-frequencies.patch b/queue-5.10/cpufreq-acpi-extend-frequency-tables-to-cover-boost-frequencies.patch new file mode 100644 index 00000000000..bedf123b99c --- /dev/null +++ b/queue-5.10/cpufreq-acpi-extend-frequency-tables-to-cover-boost-frequencies.patch @@ -0,0 +1,256 @@ +From 3c55e94c0adea4a5389c4b80f6ae9927dd6a4501 Mon Sep 17 00:00:00 2001 +From: "Rafael J. Wysocki" +Date: Thu, 4 Feb 2021 18:25:37 +0100 +Subject: cpufreq: ACPI: Extend frequency tables to cover boost frequencies + +From: Rafael J. Wysocki + +commit 3c55e94c0adea4a5389c4b80f6ae9927dd6a4501 upstream. + +A severe performance regression on AMD EPYC processors when using +the schedutil scaling governor was discovered by Phoronix.com and +attributed to the following commits: + + 41ea667227ba ("x86, sched: Calculate frequency invariance for AMD + systems") + + 976df7e5730e ("x86, sched: Use midpoint of max_boost and max_P for + frequency invariance on AMD EPYC") + +The source of the problem is that the maximum performance level taken +for computing the arch_max_freq_ratio value used in the x86 scale- +invariance code is higher than the one corresponding to the +cpuinfo.max_freq value coming from the acpi_cpufreq driver. + +This effectively causes the scale-invariant utilization to fall below +100% even if the CPU runs at cpuinfo.max_freq or slightly faster, so +the schedutil governor selects a frequency below cpuinfo.max_freq +then. That frequency corresponds to a frequency table entry below +the maximum performance level necessary to get to the "boost" range +of CPU frequencies. + +However, if the cpuinfo.max_freq value coming from acpi_cpufreq was +higher, the schedutil governor would select higher frequencies which +in turn would allow acpi_cpufreq to set more adequate performance +levels and to get to the "boost" range of CPU frequencies more often. + +This issue affects any systems where acpi_cpufreq is used and the +"boost" (or "turbo") frequencies are enabled, not just AMD EPYC. +Moreover, commit db865272d9c4 ("cpufreq: Avoid configuring old +governors as default with intel_pstate") from the 5.10 development +cycle made it extremely easy to default to schedutil even if the +preferred driver is acpi_cpufreq as long as intel_pstate is built +too, because the mere presence of the latter effectively removes the +ondemand governor from the defaults. Distro kernels are likely to +include both intel_pstate and acpi_cpufreq on x86, so their users +who cannot use intel_pstate or choose to use acpi_cpufreq may +easily be affectecd by this issue. + +To address this issue, extend the frequency table constructed by +acpi_cpufreq for each CPU to cover the entire range of available +frequencies (including the "boost" ones) if CPPC is available and +indicates that "boost" (or "turbo") frequencies are enabled. That +causes cpuinfo.max_freq to become the maximum "boost" frequency of +the given CPU (instead of the maximum frequency returned by the ACPI +_PSS object that corresponds to the "nominal" performance level). + +Fixes: 41ea667227ba ("x86, sched: Calculate frequency invariance for AMD systems") +Fixes: 976df7e5730e ("x86, sched: Use midpoint of max_boost and max_P for frequency invariance on AMD EPYC") +Fixes: db865272d9c4 ("cpufreq: Avoid configuring old governors as default with intel_pstate") +Link: https://www.phoronix.com/scan.php?page=article&item=linux511-amd-schedutil&num=1 +Link: https://lore.kernel.org/linux-pm/20210203135321.12253-2-ggherdovich@suse.cz/ +Reported-by: Michael Larabel +Diagnosed-by: Giovanni Gherdovich +Signed-off-by: Rafael J. Wysocki +Tested-by: Giovanni Gherdovich +Reviewed-by: Giovanni Gherdovich +Tested-by: Michael Larabel +Signed-off-by: Greg Kroah-Hartman +--- + drivers/cpufreq/acpi-cpufreq.c | 107 +++++++++++++++++++++++++++++---- + 1 file changed, 95 insertions(+), 12 deletions(-) + +diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c +index 1e4fbb002a31..4614f1c6f50a 100644 +--- a/drivers/cpufreq/acpi-cpufreq.c ++++ b/drivers/cpufreq/acpi-cpufreq.c +@@ -26,6 +26,7 @@ + #include + + #include ++#include + + #include + #include +@@ -53,6 +54,7 @@ struct acpi_cpufreq_data { + unsigned int resume; + unsigned int cpu_feature; + unsigned int acpi_perf_cpu; ++ unsigned int first_perf_state; + cpumask_var_t freqdomain_cpus; + void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val); + u32 (*cpu_freq_read)(struct acpi_pct_register *reg); +@@ -221,10 +223,10 @@ static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr) + + perf = to_perf_data(data); + +- cpufreq_for_each_entry(pos, policy->freq_table) ++ cpufreq_for_each_entry(pos, policy->freq_table + data->first_perf_state) + if (msr == perf->states[pos->driver_data].status) + return pos->frequency; +- return policy->freq_table[0].frequency; ++ return policy->freq_table[data->first_perf_state].frequency; + } + + static unsigned extract_freq(struct cpufreq_policy *policy, u32 val) +@@ -363,6 +365,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) + struct cpufreq_policy *policy; + unsigned int freq; + unsigned int cached_freq; ++ unsigned int state; + + pr_debug("%s (%d)\n", __func__, cpu); + +@@ -374,7 +377,11 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) + if (unlikely(!data || !policy->freq_table)) + return 0; + +- cached_freq = policy->freq_table[to_perf_data(data)->state].frequency; ++ state = to_perf_data(data)->state; ++ if (state < data->first_perf_state) ++ state = data->first_perf_state; ++ ++ cached_freq = policy->freq_table[state].frequency; + freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data)); + if (freq != cached_freq) { + /* +@@ -628,16 +635,54 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) + } + #endif + ++#ifdef CONFIG_ACPI_CPPC_LIB ++static u64 get_max_boost_ratio(unsigned int cpu) ++{ ++ struct cppc_perf_caps perf_caps; ++ u64 highest_perf, nominal_perf; ++ int ret; ++ ++ if (acpi_pstate_strict) ++ return 0; ++ ++ ret = cppc_get_perf_caps(cpu, &perf_caps); ++ if (ret) { ++ pr_debug("CPU%d: Unable to get performance capabilities (%d)\n", ++ cpu, ret); ++ return 0; ++ } ++ ++ highest_perf = perf_caps.highest_perf; ++ nominal_perf = perf_caps.nominal_perf; ++ ++ if (!highest_perf || !nominal_perf) { ++ pr_debug("CPU%d: highest or nominal performance missing\n", cpu); ++ return 0; ++ } ++ ++ if (highest_perf < nominal_perf) { ++ pr_debug("CPU%d: nominal performance above highest\n", cpu); ++ return 0; ++ } ++ ++ return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); ++} ++#else ++static inline u64 get_max_boost_ratio(unsigned int cpu) { return 0; } ++#endif ++ + static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) + { +- unsigned int i; +- unsigned int valid_states = 0; +- unsigned int cpu = policy->cpu; ++ struct cpufreq_frequency_table *freq_table; ++ struct acpi_processor_performance *perf; + struct acpi_cpufreq_data *data; ++ unsigned int cpu = policy->cpu; ++ struct cpuinfo_x86 *c = &cpu_data(cpu); ++ unsigned int valid_states = 0; + unsigned int result = 0; +- struct cpuinfo_x86 *c = &cpu_data(policy->cpu); +- struct acpi_processor_performance *perf; +- struct cpufreq_frequency_table *freq_table; ++ unsigned int state_count; ++ u64 max_boost_ratio; ++ unsigned int i; + #ifdef CONFIG_SMP + static int blacklisted; + #endif +@@ -750,8 +795,20 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) + goto err_unreg; + } + +- freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table), +- GFP_KERNEL); ++ state_count = perf->state_count + 1; ++ ++ max_boost_ratio = get_max_boost_ratio(cpu); ++ if (max_boost_ratio) { ++ /* ++ * Make a room for one more entry to represent the highest ++ * available "boost" frequency. ++ */ ++ state_count++; ++ valid_states++; ++ data->first_perf_state = valid_states; ++ } ++ ++ freq_table = kcalloc(state_count, sizeof(*freq_table), GFP_KERNEL); + if (!freq_table) { + result = -ENOMEM; + goto err_unreg; +@@ -785,6 +842,30 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) + valid_states++; + } + freq_table[valid_states].frequency = CPUFREQ_TABLE_END; ++ ++ if (max_boost_ratio) { ++ unsigned int state = data->first_perf_state; ++ unsigned int freq = freq_table[state].frequency; ++ ++ /* ++ * Because the loop above sorts the freq_table entries in the ++ * descending order, freq is the maximum frequency in the table. ++ * Assume that it corresponds to the CPPC nominal frequency and ++ * use it to populate the frequency field of the extra "boost" ++ * frequency entry. ++ */ ++ freq_table[0].frequency = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT; ++ /* ++ * The purpose of the extra "boost" frequency entry is to make ++ * the rest of cpufreq aware of the real maximum frequency, but ++ * the way to request it is the same as for the first_perf_state ++ * entry that is expected to cover the entire range of "boost" ++ * frequencies of the CPU, so copy the driver_data value from ++ * that entry. ++ */ ++ freq_table[0].driver_data = freq_table[state].driver_data; ++ } ++ + policy->freq_table = freq_table; + perf->state = 0; + +@@ -858,8 +939,10 @@ static void acpi_cpufreq_cpu_ready(struct cpufreq_policy *policy) + { + struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data, + policy->cpu); ++ struct acpi_cpufreq_data *data = policy->driver_data; ++ unsigned int freq = policy->freq_table[data->first_perf_state].frequency; + +- if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq) ++ if (perf->states[0].core_frequency * 1000 != freq) + pr_warn(FW_WARN "P-state 0 is not max freq\n"); + } + +-- +2.30.1 + diff --git a/queue-5.10/cpufreq-acpi-update-arch-scale-invariance-max-perf-ratio-if-cppc-is-not-there.patch b/queue-5.10/cpufreq-acpi-update-arch-scale-invariance-max-perf-ratio-if-cppc-is-not-there.patch new file mode 100644 index 00000000000..d744bd67a3c --- /dev/null +++ b/queue-5.10/cpufreq-acpi-update-arch-scale-invariance-max-perf-ratio-if-cppc-is-not-there.patch @@ -0,0 +1,77 @@ +From d11a1d08a082a7dc0ada423d2b2e26e9b6f2525c Mon Sep 17 00:00:00 2001 +From: "Rafael J. Wysocki" +Date: Thu, 4 Feb 2021 18:34:32 +0100 +Subject: cpufreq: ACPI: Update arch scale-invariance max perf ratio if CPPC is not there + +From: Rafael J. Wysocki + +commit d11a1d08a082a7dc0ada423d2b2e26e9b6f2525c upstream. + +If the maximum performance level taken for computing the +arch_max_freq_ratio value used in the x86 scale-invariance code is +higher than the one corresponding to the cpuinfo.max_freq value +coming from the acpi_cpufreq driver, the scale-invariant utilization +falls below 100% even if the CPU runs at cpuinfo.max_freq or slightly +faster, which causes the schedutil governor to select a frequency +below cpuinfo.max_freq. That frequency corresponds to a frequency +table entry below the maximum performance level necessary to get to +the "boost" range of CPU frequencies which prevents "boost" +frequencies from being used in some workloads. + +While this issue is related to scale-invariance, it may be amplified +by commit db865272d9c4 ("cpufreq: Avoid configuring old governors as +default with intel_pstate") from the 5.10 development cycle which +made it extremely easy to default to schedutil even if the preferred +driver is acpi_cpufreq as long as intel_pstate is built too, because +the mere presence of the latter effectively removes the ondemand +governor from the defaults. Distro kernels are likely to include +both intel_pstate and acpi_cpufreq on x86, so their users who cannot +use intel_pstate or choose to use acpi_cpufreq may easily be +affectecd by this issue. + +If CPPC is available, it can be used to address this issue by +extending the frequency tables created by acpi_cpufreq to cover the +entire available frequency range (including "boost" frequencies) for +each CPU, but if CPPC is not there, acpi_cpufreq has no idea what +the maximum "boost" frequency is and the frequency tables created by +it cannot be extended in a meaningful way, so in that case make it +ask the arch scale-invariance code to to use the "nominal" performance +level for CPU utilization scaling in order to avoid the issue at hand. + +Fixes: db865272d9c4 ("cpufreq: Avoid configuring old governors as default with intel_pstate") +Signed-off-by: Rafael J. Wysocki +Reviewed-by: Giovanni Gherdovich +Acked-by: Peter Zijlstra (Intel) +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/smpboot.c | 1 + + drivers/cpufreq/acpi-cpufreq.c | 8 ++++++++ + 2 files changed, 9 insertions(+) + +--- a/arch/x86/kernel/smpboot.c ++++ b/arch/x86/kernel/smpboot.c +@@ -1829,6 +1829,7 @@ void arch_set_max_freq_ratio(bool turbo_ + arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE : + arch_turbo_freq_ratio; + } ++EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio); + + static bool turbo_disabled(void) + { +--- a/drivers/cpufreq/acpi-cpufreq.c ++++ b/drivers/cpufreq/acpi-cpufreq.c +@@ -806,6 +806,14 @@ static int acpi_cpufreq_cpu_init(struct + state_count++; + valid_states++; + data->first_perf_state = valid_states; ++ } else { ++ /* ++ * If the maximum "boost" frequency is unknown, ask the arch ++ * scale-invariance code to use the "nominal" performance for ++ * CPU utilization scaling so as to prevent the schedutil ++ * governor from selecting inadequate CPU frequencies. ++ */ ++ arch_set_max_freq_ratio(true); + } + + freq_table = kcalloc(state_count, sizeof(*freq_table), GFP_KERNEL); diff --git a/queue-5.10/kcov-usb-only-collect-coverage-from-__usb_hcd_giveback_urb-in-softirq.patch b/queue-5.10/kcov-usb-only-collect-coverage-from-__usb_hcd_giveback_urb-in-softirq.patch new file mode 100644 index 00000000000..8cbf98324d7 --- /dev/null +++ b/queue-5.10/kcov-usb-only-collect-coverage-from-__usb_hcd_giveback_urb-in-softirq.patch @@ -0,0 +1,59 @@ +From aee9ddb1d3718d3ba05b50c51622d7792ae749c9 Mon Sep 17 00:00:00 2001 +From: Andrey Konovalov +Date: Fri, 16 Oct 2020 15:57:45 +0200 +Subject: kcov, usb: only collect coverage from __usb_hcd_giveback_urb in softirq + +From: Andrey Konovalov + +commit aee9ddb1d3718d3ba05b50c51622d7792ae749c9 upstream. + +Currently there's a KCOV remote coverage collection section in +__usb_hcd_giveback_urb(). Initially that section was added based on the +assumption that usb_hcd_giveback_urb() can only be called in interrupt +context as indicated by a comment before it. This is what happens when +syzkaller is fuzzing the USB stack via the dummy_hcd driver. + +As it turns out, it's actually valid to call usb_hcd_giveback_urb() in task +context, provided that the caller turned off the interrupts; USB/IP does +exactly that. This can lead to a nested KCOV remote coverage collection +sections both trying to collect coverage in task context. This isn't +supported by KCOV, and leads to a WARNING. + +Change __usb_hcd_giveback_urb() to only call kcov_remote_*() callbacks +when it's being executed in a softirq. As the result, the coverage from +USB/IP related usb_hcd_giveback_urb() calls won't be collected, but the +WARNING is fixed. + +A potential future improvement would be to support nested remote coverage +collection sections, but this patch doesn't address that. + +Reviewed-by: Dmitry Vyukov +Acked-by: Marco Elver +Signed-off-by: Andrey Konovalov +Link: https://lore.kernel.org/r/f3a7a153f0719cb53ec385b16e912798bd3e4cf9.1602856358.git.andreyknvl@google.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/usb/core/hcd.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +--- a/drivers/usb/core/hcd.c ++++ b/drivers/usb/core/hcd.c +@@ -1646,9 +1646,16 @@ static void __usb_hcd_giveback_urb(struc + + /* pass ownership to the completion handler */ + urb->status = status; +- kcov_remote_start_usb((u64)urb->dev->bus->busnum); ++ /* ++ * This function can be called in task context inside another remote ++ * coverage collection section, but KCOV doesn't support that kind of ++ * recursion yet. Only collect coverage in softirq context for now. ++ */ ++ if (in_serving_softirq()) ++ kcov_remote_start_usb((u64)urb->dev->bus->busnum); + urb->complete(urb); +- kcov_remote_stop(); ++ if (in_serving_softirq()) ++ kcov_remote_stop(); + + usb_anchor_resume_wakeups(anchor); + atomic_dec(&urb->use_count); diff --git a/queue-5.10/net-dsa-call-teardown-method-on-probe-failure.patch b/queue-5.10/net-dsa-call-teardown-method-on-probe-failure.patch new file mode 100644 index 00000000000..073a70813a0 --- /dev/null +++ b/queue-5.10/net-dsa-call-teardown-method-on-probe-failure.patch @@ -0,0 +1,52 @@ +From 8fd54a73b7cda11548154451bdb4bde6d8ff74c7 Mon Sep 17 00:00:00 2001 +From: Vladimir Oltean +Date: Thu, 4 Feb 2021 18:33:51 +0200 +Subject: net: dsa: call teardown method on probe failure + +From: Vladimir Oltean + +commit 8fd54a73b7cda11548154451bdb4bde6d8ff74c7 upstream. + +Since teardown is supposed to undo the effects of the setup method, it +should be called in the error path for dsa_switch_setup, not just in +dsa_switch_teardown. + +Fixes: 5e3f847a02aa ("net: dsa: Add teardown callback for drivers") +Signed-off-by: Vladimir Oltean +Reviewed-by: Andrew Lunn +Reviewed-by: Florian Fainelli +Link: https://lore.kernel.org/r/20210204163351.2929670-1-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/dsa/dsa2.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/net/dsa/dsa2.c ++++ b/net/dsa/dsa2.c +@@ -462,20 +462,23 @@ static int dsa_switch_setup(struct dsa_s + ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); + if (!ds->slave_mii_bus) { + err = -ENOMEM; +- goto unregister_notifier; ++ goto teardown; + } + + dsa_slave_mii_bus_init(ds); + + err = mdiobus_register(ds->slave_mii_bus); + if (err < 0) +- goto unregister_notifier; ++ goto teardown; + } + + ds->setup = true; + + return 0; + ++teardown: ++ if (ds->ops->teardown) ++ ds->ops->teardown(ds); + unregister_notifier: + dsa_switch_unregister_notifier(ds); + unregister_devlink_ports: diff --git a/queue-5.10/net-fix-iteration-for-sctp-transport-seq_files.patch b/queue-5.10/net-fix-iteration-for-sctp-transport-seq_files.patch new file mode 100644 index 00000000000..9fe759259f4 --- /dev/null +++ b/queue-5.10/net-fix-iteration-for-sctp-transport-seq_files.patch @@ -0,0 +1,76 @@ +From af8085f3a4712c57d0dd415ad543bac85780375c Mon Sep 17 00:00:00 2001 +From: NeilBrown +Date: Fri, 5 Feb 2021 11:36:30 +1100 +Subject: net: fix iteration for sctp transport seq_files + +From: NeilBrown + +commit af8085f3a4712c57d0dd415ad543bac85780375c upstream. + +The sctp transport seq_file iterators take a reference to the transport +in the ->start and ->next functions and releases the reference in the +->show function. The preferred handling for such resources is to +release them in the subsequent ->next or ->stop function call. + +Since Commit 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration +code and interface") there is no guarantee that ->show will be called +after ->next, so this function can now leak references. + +So move the sctp_transport_put() call to ->next and ->stop. + +Fixes: 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code and interface") +Reported-by: Xin Long +Signed-off-by: NeilBrown +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/proc.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +--- a/net/sctp/proc.c ++++ b/net/sctp/proc.c +@@ -215,6 +215,12 @@ static void sctp_transport_seq_stop(stru + { + struct sctp_ht_iter *iter = seq->private; + ++ if (v && v != SEQ_START_TOKEN) { ++ struct sctp_transport *transport = v; ++ ++ sctp_transport_put(transport); ++ } ++ + sctp_transport_walk_stop(&iter->hti); + } + +@@ -222,6 +228,12 @@ static void *sctp_transport_seq_next(str + { + struct sctp_ht_iter *iter = seq->private; + ++ if (v && v != SEQ_START_TOKEN) { ++ struct sctp_transport *transport = v; ++ ++ sctp_transport_put(transport); ++ } ++ + ++*pos; + + return sctp_transport_get_next(seq_file_net(seq), &iter->hti); +@@ -277,8 +289,6 @@ static int sctp_assocs_seq_show(struct s + sk->sk_rcvbuf); + seq_printf(seq, "\n"); + +- sctp_transport_put(transport); +- + return 0; + } + +@@ -354,8 +364,6 @@ static int sctp_remaddr_seq_show(struct + seq_printf(seq, "\n"); + } + +- sctp_transport_put(transport); +- + return 0; + } + diff --git a/queue-5.10/net-gro-do-not-keep-too-many-gro-packets-in-napi-rx_list.patch b/queue-5.10/net-gro-do-not-keep-too-many-gro-packets-in-napi-rx_list.patch new file mode 100644 index 00000000000..d0ce730530d --- /dev/null +++ b/queue-5.10/net-gro-do-not-keep-too-many-gro-packets-in-napi-rx_list.patch @@ -0,0 +1,80 @@ +From 8dc1c444df193701910f5e80b5d4caaf705a8fb0 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Thu, 4 Feb 2021 13:31:46 -0800 +Subject: net: gro: do not keep too many GRO packets in napi->rx_list + +From: Eric Dumazet + +commit 8dc1c444df193701910f5e80b5d4caaf705a8fb0 upstream. + +Commit c80794323e82 ("net: Fix packet reordering caused by GRO and +listified RX cooperation") had the unfortunate effect of adding +latencies in common workloads. + +Before the patch, GRO packets were immediately passed to +upper stacks. + +After the patch, we can accumulate quite a lot of GRO +packets (depdending on NAPI budget). + +My fix is counting in napi->rx_count number of segments +instead of number of logical packets. + +Fixes: c80794323e82 ("net: Fix packet reordering caused by GRO and listified RX cooperation") +Signed-off-by: Eric Dumazet +Bisected-by: John Sperbeck +Tested-by: Jian Yang +Cc: Maxim Mikityanskiy +Reviewed-by: Saeed Mahameed +Reviewed-by: Edward Cree +Reviewed-by: Alexander Lobakin +Link: https://lore.kernel.org/r/20210204213146.4192368-1-eric.dumazet@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -5723,10 +5723,11 @@ static void gro_normal_list(struct napi_ + /* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, + * pass the whole batch up to the stack. + */ +-static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) ++static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs) + { + list_add_tail(&skb->list, &napi->rx_list); +- if (++napi->rx_count >= gro_normal_batch) ++ napi->rx_count += segs; ++ if (napi->rx_count >= gro_normal_batch) + gro_normal_list(napi); + } + +@@ -5765,7 +5766,7 @@ static int napi_gro_complete(struct napi + } + + out: +- gro_normal_one(napi, skb); ++ gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count); + return NET_RX_SUCCESS; + } + +@@ -6055,7 +6056,7 @@ static gro_result_t napi_skb_finish(stru + { + switch (ret) { + case GRO_NORMAL: +- gro_normal_one(napi, skb); ++ gro_normal_one(napi, skb, 1); + break; + + case GRO_DROP: +@@ -6143,7 +6144,7 @@ static gro_result_t napi_frags_finish(st + __skb_push(skb, ETH_HLEN); + skb->protocol = eth_type_trans(skb, skb->dev); + if (ret == GRO_NORMAL) +- gro_normal_one(napi, skb); ++ gro_normal_one(napi, skb, 1); + break; + + case GRO_DROP: diff --git a/queue-5.10/net-qrtr-restrict-user-controlled-length-in-qrtr_tun_write_iter.patch b/queue-5.10/net-qrtr-restrict-user-controlled-length-in-qrtr_tun_write_iter.patch new file mode 100644 index 00000000000..f8124afd327 --- /dev/null +++ b/queue-5.10/net-qrtr-restrict-user-controlled-length-in-qrtr_tun_write_iter.patch @@ -0,0 +1,51 @@ +From 2a80c15812372e554474b1dba0b1d8e467af295d Mon Sep 17 00:00:00 2001 +From: Sabyrzhan Tasbolatov +Date: Tue, 2 Feb 2021 15:20:59 +0600 +Subject: net/qrtr: restrict user-controlled length in qrtr_tun_write_iter() + +From: Sabyrzhan Tasbolatov + +commit 2a80c15812372e554474b1dba0b1d8e467af295d upstream. + +syzbot found WARNING in qrtr_tun_write_iter [1] when write_iter length +exceeds KMALLOC_MAX_SIZE causing order >= MAX_ORDER condition. + +Additionally, there is no check for 0 length write. + +[1] +WARNING: mm/page_alloc.c:5011 +[..] +Call Trace: + alloc_pages_current+0x18c/0x2a0 mm/mempolicy.c:2267 + alloc_pages include/linux/gfp.h:547 [inline] + kmalloc_order+0x2e/0xb0 mm/slab_common.c:837 + kmalloc_order_trace+0x14/0x120 mm/slab_common.c:853 + kmalloc include/linux/slab.h:557 [inline] + kzalloc include/linux/slab.h:682 [inline] + qrtr_tun_write_iter+0x8a/0x180 net/qrtr/tun.c:83 + call_write_iter include/linux/fs.h:1901 [inline] + +Reported-by: syzbot+c2a7e5c5211605a90865@syzkaller.appspotmail.com +Signed-off-by: Sabyrzhan Tasbolatov +Link: https://lore.kernel.org/r/20210202092059.1361381-1-snovitoll@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/qrtr/tun.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/qrtr/tun.c ++++ b/net/qrtr/tun.c +@@ -80,6 +80,12 @@ static ssize_t qrtr_tun_write_iter(struc + ssize_t ret; + void *kbuf; + ++ if (!len) ++ return -EINVAL; ++ ++ if (len > KMALLOC_MAX_SIZE) ++ return -ENOMEM; ++ + kbuf = kzalloc(len, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; diff --git a/queue-5.10/net-rds-restrict-iovecs-length-for-rds_cmsg_rdma_args.patch b/queue-5.10/net-rds-restrict-iovecs-length-for-rds_cmsg_rdma_args.patch new file mode 100644 index 00000000000..67b4d68cb5b --- /dev/null +++ b/queue-5.10/net-rds-restrict-iovecs-length-for-rds_cmsg_rdma_args.patch @@ -0,0 +1,53 @@ +From a11148e6fcce2ae53f47f0a442d098d860b4f7db Mon Sep 17 00:00:00 2001 +From: Sabyrzhan Tasbolatov +Date: Tue, 2 Feb 2021 02:32:33 +0600 +Subject: net/rds: restrict iovecs length for RDS_CMSG_RDMA_ARGS + +From: Sabyrzhan Tasbolatov + +commit a11148e6fcce2ae53f47f0a442d098d860b4f7db upstream. + +syzbot found WARNING in rds_rdma_extra_size [1] when RDS_CMSG_RDMA_ARGS +control message is passed with user-controlled +0x40001 bytes of args->nr_local, causing order >= MAX_ORDER condition. + +The exact value 0x40001 can be checked with UIO_MAXIOV which is 0x400. +So for kcalloc() 0x400 iovecs with sizeof(struct rds_iovec) = 0x10 +is the closest limit, with 0x10 leftover. + +Same condition is currently done in rds_cmsg_rdma_args(). + +[1] WARNING: mm/page_alloc.c:5011 +[..] +Call Trace: + alloc_pages_current+0x18c/0x2a0 mm/mempolicy.c:2267 + alloc_pages include/linux/gfp.h:547 [inline] + kmalloc_order+0x2e/0xb0 mm/slab_common.c:837 + kmalloc_order_trace+0x14/0x120 mm/slab_common.c:853 + kmalloc_array include/linux/slab.h:592 [inline] + kcalloc include/linux/slab.h:621 [inline] + rds_rdma_extra_size+0xb2/0x3b0 net/rds/rdma.c:568 + rds_rm_size net/rds/send.c:928 [inline] + +Reported-by: syzbot+1bd2b07f93745fa38425@syzkaller.appspotmail.com +Signed-off-by: Sabyrzhan Tasbolatov +Acked-by: Santosh Shilimkar +Link: https://lore.kernel.org/r/20210201203233.1324704-1-snovitoll@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/rds/rdma.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/rds/rdma.c ++++ b/net/rds/rdma.c +@@ -565,6 +565,9 @@ int rds_rdma_extra_size(struct rds_rdma_ + if (args->nr_local == 0) + return -EINVAL; + ++ if (args->nr_local > UIO_MAXIOV) ++ return -EMSGSIZE; ++ + iov->iov = kcalloc(args->nr_local, + sizeof(struct rds_iovec), + GFP_KERNEL); diff --git a/queue-5.10/net-vmw_vsock-fix-null-pointer-dereference.patch b/queue-5.10/net-vmw_vsock-fix-null-pointer-dereference.patch new file mode 100644 index 00000000000..ea9a6c9b89e --- /dev/null +++ b/queue-5.10/net-vmw_vsock-fix-null-pointer-dereference.patch @@ -0,0 +1,37 @@ +From 5d1cbcc990f18edaddddef26677073c4e6fad7b7 Mon Sep 17 00:00:00 2001 +From: Norbert Slusarek +Date: Fri, 5 Feb 2021 13:12:06 +0100 +Subject: net/vmw_vsock: fix NULL pointer dereference + +From: Norbert Slusarek + +commit 5d1cbcc990f18edaddddef26677073c4e6fad7b7 upstream. + +In vsock_stream_connect(), a thread will enter schedule_timeout(). +While being scheduled out, another thread can enter vsock_stream_connect() +as well and set vsk->transport to NULL. In case a signal was sent, the +first thread can leave schedule_timeout() and vsock_transport_cancel_pkt() +will be called right after. Inside vsock_transport_cancel_pkt(), a null +dereference will happen on transport->cancel_pkt. + +Fixes: c0cfa2d8a788 ("vsock: add multi-transports support") +Signed-off-by: Norbert Slusarek +Reviewed-by: Stefano Garzarella +Link: https://lore.kernel.org/r/trinity-c2d6cede-bfb1-44e2-85af-1fbc7f541715-1612535117028@3c-app-gmx-bap12 +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/vmw_vsock/af_vsock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/vmw_vsock/af_vsock.c ++++ b/net/vmw_vsock/af_vsock.c +@@ -1216,7 +1216,7 @@ static int vsock_transport_cancel_pkt(st + { + const struct vsock_transport *transport = vsk->transport; + +- if (!transport->cancel_pkt) ++ if (!transport || !transport->cancel_pkt) + return -EOPNOTSUPP; + + return transport->cancel_pkt(vsk); diff --git a/queue-5.10/net-vmw_vsock-improve-locking-in-vsock_connect_timeout.patch b/queue-5.10/net-vmw_vsock-improve-locking-in-vsock_connect_timeout.patch new file mode 100644 index 00000000000..96b5e9c3d58 --- /dev/null +++ b/queue-5.10/net-vmw_vsock-improve-locking-in-vsock_connect_timeout.patch @@ -0,0 +1,49 @@ +From 3d0bc44d39bca615b72637e340317b7899b7f911 Mon Sep 17 00:00:00 2001 +From: Norbert Slusarek +Date: Fri, 5 Feb 2021 13:14:05 +0100 +Subject: net/vmw_vsock: improve locking in vsock_connect_timeout() + +From: Norbert Slusarek + +commit 3d0bc44d39bca615b72637e340317b7899b7f911 upstream. + +A possible locking issue in vsock_connect_timeout() was recognized by +Eric Dumazet which might cause a null pointer dereference in +vsock_transport_cancel_pkt(). This patch assures that +vsock_transport_cancel_pkt() will be called within the lock, so a race +condition won't occur which could result in vsk->transport to be set to NULL. + +Fixes: 380feae0def7 ("vsock: cancel packets when failing to connect") +Reported-by: Eric Dumazet +Signed-off-by: Norbert Slusarek +Reviewed-by: Stefano Garzarella +Link: https://lore.kernel.org/r/trinity-f8e0937a-cf0e-4d80-a76e-d9a958ba3ef1-1612535522360@3c-app-gmx-bap12 +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/vmw_vsock/af_vsock.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/net/vmw_vsock/af_vsock.c ++++ b/net/vmw_vsock/af_vsock.c +@@ -1226,7 +1226,6 @@ static void vsock_connect_timeout(struct + { + struct sock *sk; + struct vsock_sock *vsk; +- int cancel = 0; + + vsk = container_of(work, struct vsock_sock, connect_work.work); + sk = sk_vsock(vsk); +@@ -1237,11 +1236,9 @@ static void vsock_connect_timeout(struct + sk->sk_state = TCP_CLOSE; + sk->sk_err = ETIMEDOUT; + sk->sk_error_report(sk); +- cancel = 1; ++ vsock_transport_cancel_pkt(vsk); + } + release_sock(sk); +- if (cancel) +- vsock_transport_cancel_pkt(vsk); + + sock_put(sk); + } diff --git a/queue-5.10/net-watchdog-hold-device-global-xmit-lock-during-tx-disable.patch b/queue-5.10/net-watchdog-hold-device-global-xmit-lock-during-tx-disable.patch new file mode 100644 index 00000000000..2869948376e --- /dev/null +++ b/queue-5.10/net-watchdog-hold-device-global-xmit-lock-during-tx-disable.patch @@ -0,0 +1,47 @@ +From 3aa6bce9af0e25b735c9c1263739a5639a336ae8 Mon Sep 17 00:00:00 2001 +From: Edwin Peer +Date: Fri, 5 Feb 2021 17:37:32 -0800 +Subject: net: watchdog: hold device global xmit lock during tx disable + +From: Edwin Peer + +commit 3aa6bce9af0e25b735c9c1263739a5639a336ae8 upstream. + +Prevent netif_tx_disable() running concurrently with dev_watchdog() by +taking the device global xmit lock. Otherwise, the recommended: + + netif_carrier_off(dev); + netif_tx_disable(dev); + +driver shutdown sequence can happen after the watchdog has already +checked carrier, resulting in possible false alarms. This is because +netif_tx_lock() only sets the frozen bit without maintaining the locks +on the individual queues. + +Fixes: c3f26a269c24 ("netdev: Fix lockdep warnings in multiqueue configurations.") +Signed-off-by: Edwin Peer +Reviewed-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -4313,6 +4313,7 @@ static inline void netif_tx_disable(stru + + local_bh_disable(); + cpu = smp_processor_id(); ++ spin_lock(&dev->tx_global_lock); + for (i = 0; i < dev->num_tx_queues; i++) { + struct netdev_queue *txq = netdev_get_tx_queue(dev, i); + +@@ -4320,6 +4321,7 @@ static inline void netif_tx_disable(stru + netif_tx_stop_queue(txq); + __netif_tx_unlock(txq); + } ++ spin_unlock(&dev->tx_global_lock); + local_bh_enable(); + } + diff --git a/queue-5.10/ovl-expand-warning-in-ovl_d_real.patch b/queue-5.10/ovl-expand-warning-in-ovl_d_real.patch new file mode 100644 index 00000000000..2eec825ec94 --- /dev/null +++ b/queue-5.10/ovl-expand-warning-in-ovl_d_real.patch @@ -0,0 +1,54 @@ +From cef4cbff06fbc3be54d6d79ee139edecc2ee8598 Mon Sep 17 00:00:00 2001 +From: Miklos Szeredi +Date: Thu, 12 Nov 2020 11:31:55 +0100 +Subject: ovl: expand warning in ovl_d_real() + +From: Miklos Szeredi + +commit cef4cbff06fbc3be54d6d79ee139edecc2ee8598 upstream. + +There was a syzbot report with this warning but insufficient information... + +Signed-off-by: Miklos Szeredi +Signed-off-by: Greg Kroah-Hartman +--- + fs/overlayfs/super.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/fs/overlayfs/super.c ++++ b/fs/overlayfs/super.c +@@ -79,7 +79,7 @@ static void ovl_dentry_release(struct de + static struct dentry *ovl_d_real(struct dentry *dentry, + const struct inode *inode) + { +- struct dentry *real; ++ struct dentry *real = NULL, *lower; + + /* It's an overlay file */ + if (inode && d_inode(dentry) == inode) +@@ -98,9 +98,10 @@ static struct dentry *ovl_d_real(struct + if (real && !inode && ovl_has_upperdata(d_inode(dentry))) + return real; + +- real = ovl_dentry_lowerdata(dentry); +- if (!real) ++ lower = ovl_dentry_lowerdata(dentry); ++ if (!lower) + goto bug; ++ real = lower; + + /* Handle recursion */ + real = d_real(real, inode); +@@ -108,8 +109,10 @@ static struct dentry *ovl_d_real(struct + if (!inode || inode == d_inode(real)) + return real; + bug: +- WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, +- inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); ++ WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n", ++ __func__, dentry, inode ? inode->i_sb->s_id : "NULL", ++ inode ? inode->i_ino : 0, real, ++ real && d_inode(real) ? d_inode(real)->i_ino : 0); + return dentry; + } + diff --git a/queue-5.10/rxrpc-fix-clearance-of-tx-rx-ring-when-releasing-a-call.patch b/queue-5.10/rxrpc-fix-clearance-of-tx-rx-ring-when-releasing-a-call.patch new file mode 100644 index 00000000000..19036642d26 --- /dev/null +++ b/queue-5.10/rxrpc-fix-clearance-of-tx-rx-ring-when-releasing-a-call.patch @@ -0,0 +1,85 @@ +From 7b5eab57cac45e270a0ad624ba157c5b30b3d44d Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Wed, 3 Feb 2021 08:47:56 +0000 +Subject: rxrpc: Fix clearance of Tx/Rx ring when releasing a call + +From: David Howells + +commit 7b5eab57cac45e270a0ad624ba157c5b30b3d44d upstream. + +At the end of rxrpc_release_call(), rxrpc_cleanup_ring() is called to clear +the Rx/Tx skbuff ring, but this doesn't lock the ring whilst it's accessing +it. Unfortunately, rxrpc_resend() might be trying to retransmit a packet +concurrently with this - and whilst it does lock the ring, this isn't +protection against rxrpc_cleanup_call(). + +Fix this by removing the call to rxrpc_cleanup_ring() from +rxrpc_release_call(). rxrpc_cleanup_ring() will be called again anyway +from rxrpc_cleanup_call(). The earlier call is just an optimisation to +recycle skbuffs more quickly. + +Alternative solutions include rxrpc_release_call() could try to cancel the +work item or wait for it to complete or rxrpc_cleanup_ring() could lock +when accessing the ring (which would require a bh lock). + +This can produce a report like the following: + + BUG: KASAN: use-after-free in rxrpc_send_data_packet+0x19b4/0x1e70 net/rxrpc/output.c:372 + Read of size 4 at addr ffff888011606e04 by task kworker/0:0/5 + ... + Workqueue: krxrpcd rxrpc_process_call + Call Trace: + ... + kasan_report.cold+0x79/0xd5 mm/kasan/report.c:413 + rxrpc_send_data_packet+0x19b4/0x1e70 net/rxrpc/output.c:372 + rxrpc_resend net/rxrpc/call_event.c:266 [inline] + rxrpc_process_call+0x1634/0x1f60 net/rxrpc/call_event.c:412 + process_one_work+0x98d/0x15f0 kernel/workqueue.c:2275 + ... + + Allocated by task 2318: + ... + sock_alloc_send_pskb+0x793/0x920 net/core/sock.c:2348 + rxrpc_send_data+0xb51/0x2bf0 net/rxrpc/sendmsg.c:358 + rxrpc_do_sendmsg+0xc03/0x1350 net/rxrpc/sendmsg.c:744 + rxrpc_sendmsg+0x420/0x630 net/rxrpc/af_rxrpc.c:560 + ... + + Freed by task 2318: + ... + kfree_skb+0x140/0x3f0 net/core/skbuff.c:704 + rxrpc_free_skb+0x11d/0x150 net/rxrpc/skbuff.c:78 + rxrpc_cleanup_ring net/rxrpc/call_object.c:485 [inline] + rxrpc_release_call+0x5dd/0x860 net/rxrpc/call_object.c:552 + rxrpc_release_calls_on_socket+0x21c/0x300 net/rxrpc/call_object.c:579 + rxrpc_release_sock net/rxrpc/af_rxrpc.c:885 [inline] + rxrpc_release+0x263/0x5a0 net/rxrpc/af_rxrpc.c:916 + __sock_release+0xcd/0x280 net/socket.c:597 + ... + + The buggy address belongs to the object at ffff888011606dc0 + which belongs to the cache skbuff_head_cache of size 232 + +Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") +Reported-by: syzbot+174de899852504e4a74a@syzkaller.appspotmail.com +Reported-by: syzbot+3d1c772efafd3c38d007@syzkaller.appspotmail.com +Signed-off-by: David Howells +cc: Hillf Danton +Link: https://lore.kernel.org/r/161234207610.653119.5287360098400436976.stgit@warthog.procyon.org.uk +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/rxrpc/call_object.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/net/rxrpc/call_object.c ++++ b/net/rxrpc/call_object.c +@@ -548,8 +548,6 @@ void rxrpc_release_call(struct rxrpc_soc + rxrpc_disconnect_call(call); + if (call->security) + call->security->free_call_crypto(call); +- +- rxrpc_cleanup_ring(call); + _leave(""); + } + diff --git a/queue-5.10/series b/queue-5.10/series index df10a43b201..64aa1927ead 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -84,3 +84,21 @@ h8300-fix-preemption-build-ti_pre_count-undefined.patch scripts-set-proper-openssl-include-dir-also-for-sign-file.patch x86-pci-create-pci-msi-irqdomain-after-x86_init.pci.arch_init.patch arm64-mte-allow-ptrace_peekmtetags-access-to-the-zero-page.patch +rxrpc-fix-clearance-of-tx-rx-ring-when-releasing-a-call.patch +udp-fix-skb_copy_and_csum_datagram-with-odd-segment-sizes.patch +net-dsa-call-teardown-method-on-probe-failure.patch +cpufreq-acpi-extend-frequency-tables-to-cover-boost-frequencies.patch +cpufreq-acpi-update-arch-scale-invariance-max-perf-ratio-if-cppc-is-not-there.patch +net-gro-do-not-keep-too-many-gro-packets-in-napi-rx_list.patch +net-fix-iteration-for-sctp-transport-seq_files.patch +net-vmw_vsock-fix-null-pointer-dereference.patch +net-vmw_vsock-improve-locking-in-vsock_connect_timeout.patch +net-watchdog-hold-device-global-xmit-lock-during-tx-disable.patch +bridge-mrp-fix-the-usage-of-br_mrp_port_switchdev_set_state.patch +switchdev-mrp-remove-switchdev_attr_id_mrp_port_stat.patch +vsock-virtio-update-credit-only-if-socket-is-not-closed.patch +vsock-fix-locking-in-vsock_shutdown.patch +net-rds-restrict-iovecs-length-for-rds_cmsg_rdma_args.patch +net-qrtr-restrict-user-controlled-length-in-qrtr_tun_write_iter.patch +ovl-expand-warning-in-ovl_d_real.patch +kcov-usb-only-collect-coverage-from-__usb_hcd_giveback_urb-in-softirq.patch diff --git a/queue-5.10/switchdev-mrp-remove-switchdev_attr_id_mrp_port_stat.patch b/queue-5.10/switchdev-mrp-remove-switchdev_attr_id_mrp_port_stat.patch new file mode 100644 index 00000000000..0a3a818f099 --- /dev/null +++ b/queue-5.10/switchdev-mrp-remove-switchdev_attr_id_mrp_port_stat.patch @@ -0,0 +1,39 @@ +From 059d2a1004981dce19f0127dabc1b4ec927d202a Mon Sep 17 00:00:00 2001 +From: Horatiu Vultur +Date: Sat, 6 Feb 2021 22:47:34 +0100 +Subject: switchdev: mrp: Remove SWITCHDEV_ATTR_ID_MRP_PORT_STAT + +From: Horatiu Vultur + +commit 059d2a1004981dce19f0127dabc1b4ec927d202a upstream. + +Now that MRP started to use also SWITCHDEV_ATTR_ID_PORT_STP_STATE to +notify HW, then SWITCHDEV_ATTR_ID_MRP_PORT_STAT is not used anywhere +else, therefore we can remove it. + +Fixes: c284b545900830 ("switchdev: mrp: Extend switchdev API to offload MRP") +Signed-off-by: Horatiu Vultur +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/switchdev.h | 2 -- + 1 file changed, 2 deletions(-) + +--- a/include/net/switchdev.h ++++ b/include/net/switchdev.h +@@ -41,7 +41,6 @@ enum switchdev_attr_id { + SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED, + SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, + #if IS_ENABLED(CONFIG_BRIDGE_MRP) +- SWITCHDEV_ATTR_ID_MRP_PORT_STATE, + SWITCHDEV_ATTR_ID_MRP_PORT_ROLE, + #endif + }; +@@ -60,7 +59,6 @@ struct switchdev_attr { + bool vlan_filtering; /* BRIDGE_VLAN_FILTERING */ + bool mc_disabled; /* MC_DISABLED */ + #if IS_ENABLED(CONFIG_BRIDGE_MRP) +- u8 mrp_port_state; /* MRP_PORT_STATE */ + u8 mrp_port_role; /* MRP_PORT_ROLE */ + #endif + } u; diff --git a/queue-5.10/udp-fix-skb_copy_and_csum_datagram-with-odd-segment-sizes.patch b/queue-5.10/udp-fix-skb_copy_and_csum_datagram-with-odd-segment-sizes.patch new file mode 100644 index 00000000000..2d9825b7673 --- /dev/null +++ b/queue-5.10/udp-fix-skb_copy_and_csum_datagram-with-odd-segment-sizes.patch @@ -0,0 +1,140 @@ +From 52cbd23a119c6ebf40a527e53f3402d2ea38eccb Mon Sep 17 00:00:00 2001 +From: Willem de Bruijn +Date: Wed, 3 Feb 2021 14:29:52 -0500 +Subject: udp: fix skb_copy_and_csum_datagram with odd segment sizes + +From: Willem de Bruijn + +commit 52cbd23a119c6ebf40a527e53f3402d2ea38eccb upstream. + +When iteratively computing a checksum with csum_block_add, track the +offset "pos" to correctly rotate in csum_block_add when offset is odd. + +The open coded implementation of skb_copy_and_csum_datagram did this. +With the switch to __skb_datagram_iter calling csum_and_copy_to_iter, +pos was reinitialized to 0 on each call. + +Bring back the pos by passing it along with the csum to the callback. + +Changes v1->v2 + - pass csum value, instead of csump pointer (Alexander Duyck) + +Link: https://lore.kernel.org/netdev/20210128152353.GB27281@optiplex/ +Fixes: 950fcaecd5cc ("datagram: consolidate datagram copy to iter helpers") +Reported-by: Oliver Graute +Signed-off-by: Willem de Bruijn +Reviewed-by: Alexander Duyck +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20210203192952.1849843-1-willemdebruijn.kernel@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/uio.h | 8 +++++++- + lib/iov_iter.c | 24 ++++++++++++++---------- + net/core/datagram.c | 12 ++++++++++-- + 3 files changed, 31 insertions(+), 13 deletions(-) + +--- a/include/linux/uio.h ++++ b/include/linux/uio.h +@@ -260,7 +260,13 @@ static inline void iov_iter_reexpand(str + { + i->count = count; + } +-size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i); ++ ++struct csum_state { ++ __wsum csum; ++ size_t off; ++}; ++ ++size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csstate, struct iov_iter *i); + size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); + bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); + size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, +--- a/lib/iov_iter.c ++++ b/lib/iov_iter.c +@@ -592,14 +592,15 @@ static __wsum csum_and_memcpy(void *to, + } + + static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, +- __wsum *csum, struct iov_iter *i) ++ struct csum_state *csstate, ++ struct iov_iter *i) + { + struct pipe_inode_info *pipe = i->pipe; + unsigned int p_mask = pipe->ring_size - 1; ++ __wsum sum = csstate->csum; ++ size_t off = csstate->off; + unsigned int i_head; + size_t n, r; +- size_t off = 0; +- __wsum sum = *csum; + + if (!sanity(i)) + return 0; +@@ -621,7 +622,8 @@ static size_t csum_and_copy_to_pipe_iter + i_head++; + } while (n); + i->count -= bytes; +- *csum = sum; ++ csstate->csum = sum; ++ csstate->off = off; + return bytes; + } + +@@ -1522,18 +1524,19 @@ bool csum_and_copy_from_iter_full(void * + } + EXPORT_SYMBOL(csum_and_copy_from_iter_full); + +-size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, ++size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, + struct iov_iter *i) + { ++ struct csum_state *csstate = _csstate; + const char *from = addr; +- __wsum *csum = csump; + __wsum sum, next; +- size_t off = 0; ++ size_t off; + + if (unlikely(iov_iter_is_pipe(i))) +- return csum_and_copy_to_pipe_iter(addr, bytes, csum, i); ++ return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i); + +- sum = *csum; ++ sum = csstate->csum; ++ off = csstate->off; + if (unlikely(iov_iter_is_discard(i))) { + WARN_ON(1); /* for now */ + return 0; +@@ -1561,7 +1564,8 @@ size_t csum_and_copy_to_iter(const void + off += v.iov_len; + }) + ) +- *csum = sum; ++ csstate->csum = sum; ++ csstate->off = off; + return bytes; + } + EXPORT_SYMBOL(csum_and_copy_to_iter); +--- a/net/core/datagram.c ++++ b/net/core/datagram.c +@@ -721,8 +721,16 @@ static int skb_copy_and_csum_datagram(co + struct iov_iter *to, int len, + __wsum *csump) + { +- return __skb_datagram_iter(skb, offset, to, len, true, +- csum_and_copy_to_iter, csump); ++ struct csum_state csdata = { .csum = *csump }; ++ int ret; ++ ++ ret = __skb_datagram_iter(skb, offset, to, len, true, ++ csum_and_copy_to_iter, &csdata); ++ if (ret) ++ return ret; ++ ++ *csump = csdata.csum; ++ return 0; + } + + /** diff --git a/queue-5.10/vsock-fix-locking-in-vsock_shutdown.patch b/queue-5.10/vsock-fix-locking-in-vsock_shutdown.patch new file mode 100644 index 00000000000..4acc668865e --- /dev/null +++ b/queue-5.10/vsock-fix-locking-in-vsock_shutdown.patch @@ -0,0 +1,86 @@ +From 1c5fae9c9a092574398a17facc31c533791ef232 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Tue, 9 Feb 2021 09:52:19 +0100 +Subject: vsock: fix locking in vsock_shutdown() + +From: Stefano Garzarella + +commit 1c5fae9c9a092574398a17facc31c533791ef232 upstream. + +In vsock_shutdown() we touched some socket fields without holding the +socket lock, such as 'state' and 'sk_flags'. + +Also, after the introduction of multi-transport, we are accessing +'vsk->transport' in vsock_send_shutdown() without holding the lock +and this call can be made while the connection is in progress, so +the transport can change in the meantime. + +To avoid issues, we hold the socket lock when we enter in +vsock_shutdown() and release it when we leave. + +Among the transports that implement the 'shutdown' callback, only +hyperv_transport acquired the lock. Since the caller now holds it, +we no longer take it. + +Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") +Signed-off-by: Stefano Garzarella +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/vmw_vsock/af_vsock.c | 8 +++++--- + net/vmw_vsock/hyperv_transport.c | 4 ---- + 2 files changed, 5 insertions(+), 7 deletions(-) + +--- a/net/vmw_vsock/af_vsock.c ++++ b/net/vmw_vsock/af_vsock.c +@@ -926,10 +926,12 @@ static int vsock_shutdown(struct socket + */ + + sk = sock->sk; ++ ++ lock_sock(sk); + if (sock->state == SS_UNCONNECTED) { + err = -ENOTCONN; + if (sk->sk_type == SOCK_STREAM) +- return err; ++ goto out; + } else { + sock->state = SS_DISCONNECTING; + err = 0; +@@ -938,10 +940,8 @@ static int vsock_shutdown(struct socket + /* Receive and send shutdowns are treated alike. */ + mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN); + if (mode) { +- lock_sock(sk); + sk->sk_shutdown |= mode; + sk->sk_state_change(sk); +- release_sock(sk); + + if (sk->sk_type == SOCK_STREAM) { + sock_reset_flag(sk, SOCK_DONE); +@@ -949,6 +949,8 @@ static int vsock_shutdown(struct socket + } + } + ++out: ++ release_sock(sk); + return err; + } + +--- a/net/vmw_vsock/hyperv_transport.c ++++ b/net/vmw_vsock/hyperv_transport.c +@@ -474,14 +474,10 @@ static void hvs_shutdown_lock_held(struc + + static int hvs_shutdown(struct vsock_sock *vsk, int mode) + { +- struct sock *sk = sk_vsock(vsk); +- + if (!(mode & SEND_SHUTDOWN)) + return 0; + +- lock_sock(sk); + hvs_shutdown_lock_held(vsk->trans, mode); +- release_sock(sk); + return 0; + } + diff --git a/queue-5.10/vsock-virtio-update-credit-only-if-socket-is-not-closed.patch b/queue-5.10/vsock-virtio-update-credit-only-if-socket-is-not-closed.patch new file mode 100644 index 00000000000..95a47d171f4 --- /dev/null +++ b/queue-5.10/vsock-virtio-update-credit-only-if-socket-is-not-closed.patch @@ -0,0 +1,45 @@ +From ce7536bc7398e2ae552d2fabb7e0e371a9f1fe46 Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Mon, 8 Feb 2021 15:44:54 +0100 +Subject: vsock/virtio: update credit only if socket is not closed + +From: Stefano Garzarella + +commit ce7536bc7398e2ae552d2fabb7e0e371a9f1fe46 upstream. + +If the socket is closed or is being released, some resources used by +virtio_transport_space_update() such as 'vsk->trans' may be released. + +To avoid a use after free bug we should only update the available credit +when we are sure the socket is still open and we have the lock held. + +Fixes: 06a8fc78367d ("VSOCK: Introduce virtio_vsock_common.ko") +Signed-off-by: Stefano Garzarella +Acked-by: Michael S. Tsirkin +Link: https://lore.kernel.org/r/20210208144454.84438-1-sgarzare@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/vmw_vsock/virtio_transport_common.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/vmw_vsock/virtio_transport_common.c ++++ b/net/vmw_vsock/virtio_transport_common.c +@@ -1130,8 +1130,6 @@ void virtio_transport_recv_pkt(struct vi + + vsk = vsock_sk(sk); + +- space_available = virtio_transport_space_update(sk, pkt); +- + lock_sock(sk); + + /* Check if sk has been closed before lock_sock */ +@@ -1142,6 +1140,8 @@ void virtio_transport_recv_pkt(struct vi + goto free_pkt; + } + ++ space_available = virtio_transport_space_update(sk, pkt); ++ + /* Update CID in case it has changed after a transport reset event */ + vsk->local_addr.svm_cid = dst.svm_cid; +