From: Sasha Levin Date: Wed, 31 May 2023 17:11:06 +0000 (-0400) Subject: Fixes for 5.15 X-Git-Tag: v5.4.245~30 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3fe6a64f1a0e3b7f7ea9bef92b7f1bb5fb95c49b;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.15 Signed-off-by: Sasha Levin --- diff --git a/queue-5.15/binder-fix-uaf-caused-by-faulty-buffer-cleanup.patch b/queue-5.15/binder-fix-uaf-caused-by-faulty-buffer-cleanup.patch new file mode 100644 index 00000000000..ed909957882 --- /dev/null +++ b/queue-5.15/binder-fix-uaf-caused-by-faulty-buffer-cleanup.patch @@ -0,0 +1,155 @@ +From f3cf74f395f7f0a8c546ff490af5644803731263 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 5 May 2023 20:30:20 +0000 +Subject: binder: fix UAF caused by faulty buffer cleanup + +From: Carlos Llamas + +[ Upstream commit bdc1c5fac982845a58d28690cdb56db8c88a530d ] + +In binder_transaction_buffer_release() the 'failed_at' offset indicates +the number of objects to clean up. However, this function was changed by +commit 44d8047f1d87 ("binder: use standard functions to allocate fds"), +to release all the objects in the buffer when 'failed_at' is zero. + +This introduced an issue when a transaction buffer is released without +any objects having been processed so far. In this case, 'failed_at' is +indeed zero yet it is misinterpreted as releasing the entire buffer. + +This leads to use-after-free errors where nodes are incorrectly freed +and subsequently accessed. Such is the case in the following KASAN +report: + + ================================================================== + BUG: KASAN: slab-use-after-free in binder_thread_read+0xc40/0x1f30 + Read of size 8 at addr ffff4faf037cfc58 by task poc/474 + + CPU: 6 PID: 474 Comm: poc Not tainted 6.3.0-12570-g7df047b3f0aa #5 + Hardware name: linux,dummy-virt (DT) + Call trace: + dump_backtrace+0x94/0xec + show_stack+0x18/0x24 + dump_stack_lvl+0x48/0x60 + print_report+0xf8/0x5b8 + kasan_report+0xb8/0xfc + __asan_load8+0x9c/0xb8 + binder_thread_read+0xc40/0x1f30 + binder_ioctl+0xd9c/0x1768 + __arm64_sys_ioctl+0xd4/0x118 + invoke_syscall+0x60/0x188 + [...] + + Allocated by task 474: + kasan_save_stack+0x3c/0x64 + kasan_set_track+0x2c/0x40 + kasan_save_alloc_info+0x24/0x34 + __kasan_kmalloc+0xb8/0xbc + kmalloc_trace+0x48/0x5c + binder_new_node+0x3c/0x3a4 + binder_transaction+0x2b58/0x36f0 + binder_thread_write+0x8e0/0x1b78 + binder_ioctl+0x14a0/0x1768 + __arm64_sys_ioctl+0xd4/0x118 + invoke_syscall+0x60/0x188 + [...] + + Freed by task 475: + kasan_save_stack+0x3c/0x64 + kasan_set_track+0x2c/0x40 + kasan_save_free_info+0x38/0x5c + __kasan_slab_free+0xe8/0x154 + __kmem_cache_free+0x128/0x2bc + kfree+0x58/0x70 + binder_dec_node_tmpref+0x178/0x1fc + binder_transaction_buffer_release+0x430/0x628 + binder_transaction+0x1954/0x36f0 + binder_thread_write+0x8e0/0x1b78 + binder_ioctl+0x14a0/0x1768 + __arm64_sys_ioctl+0xd4/0x118 + invoke_syscall+0x60/0x188 + [...] + ================================================================== + +In order to avoid these issues, let's always calculate the intended +'failed_at' offset beforehand. This is renamed and wrapped in a helper +function to make it clear and convenient. + +Fixes: 32e9f56a96d8 ("binder: don't detect sender/target during buffer cleanup") +Reported-by: Zi Fan Tan +Cc: stable@vger.kernel.org +Signed-off-by: Carlos Llamas +Acked-by: Todd Kjos +Link: https://lore.kernel.org/r/20230505203020.4101154-1-cmllamas@google.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/android/binder.c | 26 ++++++++++++++++++++------ + 1 file changed, 20 insertions(+), 6 deletions(-) + +diff --git a/drivers/android/binder.c b/drivers/android/binder.c +index c8d33c5dbe295..a4749b6c3d730 100644 +--- a/drivers/android/binder.c ++++ b/drivers/android/binder.c +@@ -1903,24 +1903,23 @@ static void binder_deferred_fd_close(int fd) + static void binder_transaction_buffer_release(struct binder_proc *proc, + struct binder_thread *thread, + struct binder_buffer *buffer, +- binder_size_t failed_at, ++ binder_size_t off_end_offset, + bool is_failure) + { + int debug_id = buffer->debug_id; +- binder_size_t off_start_offset, buffer_offset, off_end_offset; ++ binder_size_t off_start_offset, buffer_offset; + + binder_debug(BINDER_DEBUG_TRANSACTION, + "%d buffer release %d, size %zd-%zd, failed at %llx\n", + proc->pid, buffer->debug_id, + buffer->data_size, buffer->offsets_size, +- (unsigned long long)failed_at); ++ (unsigned long long)off_end_offset); + + if (buffer->target_node) + binder_dec_node(buffer->target_node, 1, 0); + + off_start_offset = ALIGN(buffer->data_size, sizeof(void *)); +- off_end_offset = is_failure && failed_at ? failed_at : +- off_start_offset + buffer->offsets_size; ++ + for (buffer_offset = off_start_offset; buffer_offset < off_end_offset; + buffer_offset += sizeof(binder_size_t)) { + struct binder_object_header *hdr; +@@ -2080,6 +2079,21 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, + } + } + ++/* Clean up all the objects in the buffer */ ++static inline void binder_release_entire_buffer(struct binder_proc *proc, ++ struct binder_thread *thread, ++ struct binder_buffer *buffer, ++ bool is_failure) ++{ ++ binder_size_t off_end_offset; ++ ++ off_end_offset = ALIGN(buffer->data_size, sizeof(void *)); ++ off_end_offset += buffer->offsets_size; ++ ++ binder_transaction_buffer_release(proc, thread, buffer, ++ off_end_offset, is_failure); ++} ++ + static int binder_translate_binder(struct flat_binder_object *fp, + struct binder_transaction *t, + struct binder_thread *thread) +@@ -3578,7 +3592,7 @@ binder_free_buf(struct binder_proc *proc, + binder_node_inner_unlock(buf_node); + } + trace_binder_transaction_buffer_release(buffer); +- binder_transaction_buffer_release(proc, thread, buffer, 0, is_failure); ++ binder_release_entire_buffer(proc, thread, buffer, is_failure); + binder_alloc_free_buf(&proc->alloc, buffer); + } + +-- +2.39.2 + diff --git a/queue-5.15/bonding-add-arp_missed_max-option.patch b/queue-5.15/bonding-add-arp_missed_max-option.patch new file mode 100644 index 00000000000..ea053d476e0 --- /dev/null +++ b/queue-5.15/bonding-add-arp_missed_max-option.patch @@ -0,0 +1,314 @@ +From 400f2759f9a97f84f45ed103be5f52cafecdb39e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 30 Nov 2021 12:29:47 +0800 +Subject: Bonding: add arp_missed_max option + +From: Hangbin Liu + +[ Upstream commit 5944b5abd8646e8c6ac6af2b55f87dede1dae898 ] + +Currently, we use hard code number to verify if we are in the +arp_interval timeslice. But some user may want to reduce/extend +the verify timeslice. With the similar team option 'missed_max' +the uers could change that number based on their own environment. + +Acked-by: Jay Vosburgh +Signed-off-by: Hangbin Liu +Signed-off-by: David S. Miller +Stable-dep-of: 9949e2efb54e ("bonding: fix send_peer_notif overflow") +Signed-off-by: Sasha Levin +--- + Documentation/networking/bonding.rst | 11 +++++++++++ + drivers/net/bonding/bond_main.c | 17 +++++++++-------- + drivers/net/bonding/bond_netlink.c | 15 +++++++++++++++ + drivers/net/bonding/bond_options.c | 28 ++++++++++++++++++++++++++++ + drivers/net/bonding/bond_procfs.c | 2 ++ + drivers/net/bonding/bond_sysfs.c | 13 +++++++++++++ + include/net/bond_options.h | 1 + + include/net/bonding.h | 1 + + include/uapi/linux/if_link.h | 1 + + tools/include/uapi/linux/if_link.h | 1 + + 10 files changed, 82 insertions(+), 8 deletions(-) + +diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst +index c0a789b008063..ab98373535ea6 100644 +--- a/Documentation/networking/bonding.rst ++++ b/Documentation/networking/bonding.rst +@@ -422,6 +422,17 @@ arp_all_targets + consider the slave up only when all of the arp_ip_targets + are reachable + ++arp_missed_max ++ ++ Specifies the number of arp_interval monitor checks that must ++ fail in order for an interface to be marked down by the ARP monitor. ++ ++ In order to provide orderly failover semantics, backup interfaces ++ are permitted an extra monitor check (i.e., they must fail ++ arp_missed_max + 1 times before being marked down). ++ ++ The default value is 2, and the allowable range is 1 - 255. ++ + downdelay + + Specifies the time, in milliseconds, to wait before disabling +diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c +index a2ce9f0fb43c5..b4d613bdbc060 100644 +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -3145,8 +3145,8 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) + * when the source ip is 0, so don't take the link down + * if we don't know our ip yet + */ +- if (!bond_time_in_interval(bond, trans_start, 2) || +- !bond_time_in_interval(bond, slave->last_rx, 2)) { ++ if (!bond_time_in_interval(bond, trans_start, bond->params.missed_max) || ++ !bond_time_in_interval(bond, slave->last_rx, bond->params.missed_max)) { + + bond_propose_link_state(slave, BOND_LINK_DOWN); + slave_state_changed = 1; +@@ -3240,7 +3240,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) + + /* Backup slave is down if: + * - No current_arp_slave AND +- * - more than 3*delta since last receive AND ++ * - more than (missed_max+1)*delta since last receive AND + * - the bond has an IP address + * + * Note: a non-null current_arp_slave indicates +@@ -3252,20 +3252,20 @@ static int bond_ab_arp_inspect(struct bonding *bond) + */ + if (!bond_is_active_slave(slave) && + !rcu_access_pointer(bond->current_arp_slave) && +- !bond_time_in_interval(bond, last_rx, 3)) { ++ !bond_time_in_interval(bond, last_rx, bond->params.missed_max + 1)) { + bond_propose_link_state(slave, BOND_LINK_DOWN); + commit++; + } + + /* Active slave is down if: +- * - more than 2*delta since transmitting OR +- * - (more than 2*delta since receive AND ++ * - more than missed_max*delta since transmitting OR ++ * - (more than missed_max*delta since receive AND + * the bond has an IP address) + */ + trans_start = dev_trans_start(slave->dev); + if (bond_is_active_slave(slave) && +- (!bond_time_in_interval(bond, trans_start, 2) || +- !bond_time_in_interval(bond, last_rx, 2))) { ++ (!bond_time_in_interval(bond, trans_start, bond->params.missed_max) || ++ !bond_time_in_interval(bond, last_rx, bond->params.missed_max))) { + bond_propose_link_state(slave, BOND_LINK_DOWN); + commit++; + } +@@ -5886,6 +5886,7 @@ static int bond_check_params(struct bond_params *params) + params->arp_interval = arp_interval; + params->arp_validate = arp_validate_value; + params->arp_all_targets = arp_all_targets_value; ++ params->missed_max = 2; + params->updelay = updelay; + params->downdelay = downdelay; + params->peer_notif_delay = 0; +diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c +index 5d54e11d18fa5..1007bf6d385d4 100644 +--- a/drivers/net/bonding/bond_netlink.c ++++ b/drivers/net/bonding/bond_netlink.c +@@ -110,6 +110,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { + .len = ETH_ALEN }, + [IFLA_BOND_TLB_DYNAMIC_LB] = { .type = NLA_U8 }, + [IFLA_BOND_PEER_NOTIF_DELAY] = { .type = NLA_U32 }, ++ [IFLA_BOND_MISSED_MAX] = { .type = NLA_U8 }, + }; + + static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { +@@ -453,6 +454,15 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], + return err; + } + ++ if (data[IFLA_BOND_MISSED_MAX]) { ++ int missed_max = nla_get_u8(data[IFLA_BOND_MISSED_MAX]); ++ ++ bond_opt_initval(&newval, missed_max); ++ err = __bond_opt_set(bond, BOND_OPT_MISSED_MAX, &newval); ++ if (err) ++ return err; ++ } ++ + return 0; + } + +@@ -515,6 +525,7 @@ static size_t bond_get_size(const struct net_device *bond_dev) + nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_ACTOR_SYSTEM */ + nla_total_size(sizeof(u8)) + /* IFLA_BOND_TLB_DYNAMIC_LB */ + nla_total_size(sizeof(u32)) + /* IFLA_BOND_PEER_NOTIF_DELAY */ ++ nla_total_size(sizeof(u8)) + /* IFLA_BOND_MISSED_MAX */ + 0; + } + +@@ -650,6 +661,10 @@ static int bond_fill_info(struct sk_buff *skb, + bond->params.tlb_dynamic_lb)) + goto nla_put_failure; + ++ if (nla_put_u8(skb, IFLA_BOND_MISSED_MAX, ++ bond->params.missed_max)) ++ goto nla_put_failure; ++ + if (BOND_MODE(bond) == BOND_MODE_8023AD) { + struct ad_info info; + +diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c +index b93337b5a7211..2e8484a91a0e7 100644 +--- a/drivers/net/bonding/bond_options.c ++++ b/drivers/net/bonding/bond_options.c +@@ -78,6 +78,8 @@ static int bond_option_ad_actor_system_set(struct bonding *bond, + const struct bond_opt_value *newval); + static int bond_option_ad_user_port_key_set(struct bonding *bond, + const struct bond_opt_value *newval); ++static int bond_option_missed_max_set(struct bonding *bond, ++ const struct bond_opt_value *newval); + + + static const struct bond_opt_value bond_mode_tbl[] = { +@@ -213,6 +215,13 @@ static const struct bond_opt_value bond_ad_user_port_key_tbl[] = { + { NULL, -1, 0}, + }; + ++static const struct bond_opt_value bond_missed_max_tbl[] = { ++ { "minval", 1, BOND_VALFLAG_MIN}, ++ { "maxval", 255, BOND_VALFLAG_MAX}, ++ { "default", 2, BOND_VALFLAG_DEFAULT}, ++ { NULL, -1, 0}, ++}; ++ + static const struct bond_option bond_opts[BOND_OPT_LAST] = { + [BOND_OPT_MODE] = { + .id = BOND_OPT_MODE, +@@ -270,6 +279,15 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { + .values = bond_intmax_tbl, + .set = bond_option_arp_interval_set + }, ++ [BOND_OPT_MISSED_MAX] = { ++ .id = BOND_OPT_MISSED_MAX, ++ .name = "arp_missed_max", ++ .desc = "Maximum number of missed ARP interval", ++ .unsuppmodes = BIT(BOND_MODE_8023AD) | BIT(BOND_MODE_TLB) | ++ BIT(BOND_MODE_ALB), ++ .values = bond_missed_max_tbl, ++ .set = bond_option_missed_max_set ++ }, + [BOND_OPT_ARP_TARGETS] = { + .id = BOND_OPT_ARP_TARGETS, + .name = "arp_ip_target", +@@ -1186,6 +1204,16 @@ static int bond_option_arp_all_targets_set(struct bonding *bond, + return 0; + } + ++static int bond_option_missed_max_set(struct bonding *bond, ++ const struct bond_opt_value *newval) ++{ ++ netdev_dbg(bond->dev, "Setting missed max to %s (%llu)\n", ++ newval->string, newval->value); ++ bond->params.missed_max = newval->value; ++ ++ return 0; ++} ++ + static int bond_option_primary_set(struct bonding *bond, + const struct bond_opt_value *newval) + { +diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c +index f3e3bfd72556c..2ec11af5f0cce 100644 +--- a/drivers/net/bonding/bond_procfs.c ++++ b/drivers/net/bonding/bond_procfs.c +@@ -115,6 +115,8 @@ static void bond_info_show_master(struct seq_file *seq) + + seq_printf(seq, "ARP Polling Interval (ms): %d\n", + bond->params.arp_interval); ++ seq_printf(seq, "ARP Missed Max: %u\n", ++ bond->params.missed_max); + + seq_printf(seq, "ARP IP target/s (n.n.n.n form):"); + +diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c +index b9e9842fed94e..22aa22f4e0882 100644 +--- a/drivers/net/bonding/bond_sysfs.c ++++ b/drivers/net/bonding/bond_sysfs.c +@@ -303,6 +303,18 @@ static ssize_t bonding_show_arp_targets(struct device *d, + static DEVICE_ATTR(arp_ip_target, 0644, + bonding_show_arp_targets, bonding_sysfs_store_option); + ++/* Show the arp missed max. */ ++static ssize_t bonding_show_missed_max(struct device *d, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ struct bonding *bond = to_bond(d); ++ ++ return sprintf(buf, "%u\n", bond->params.missed_max); ++} ++static DEVICE_ATTR(arp_missed_max, 0644, ++ bonding_show_missed_max, bonding_sysfs_store_option); ++ + /* Show the up and down delays. */ + static ssize_t bonding_show_downdelay(struct device *d, + struct device_attribute *attr, +@@ -779,6 +791,7 @@ static struct attribute *per_bond_attrs[] = { + &dev_attr_ad_actor_sys_prio.attr, + &dev_attr_ad_actor_system.attr, + &dev_attr_ad_user_port_key.attr, ++ &dev_attr_arp_missed_max.attr, + NULL, + }; + +diff --git a/include/net/bond_options.h b/include/net/bond_options.h +index e64833a674eb8..dd75c071f67e2 100644 +--- a/include/net/bond_options.h ++++ b/include/net/bond_options.h +@@ -65,6 +65,7 @@ enum { + BOND_OPT_NUM_PEER_NOTIF_ALIAS, + BOND_OPT_PEER_NOTIF_DELAY, + BOND_OPT_LACP_ACTIVE, ++ BOND_OPT_MISSED_MAX, + BOND_OPT_LAST + }; + +diff --git a/include/net/bonding.h b/include/net/bonding.h +index de0bdcc7dc7f9..0db3c5f36868b 100644 +--- a/include/net/bonding.h ++++ b/include/net/bonding.h +@@ -121,6 +121,7 @@ struct bond_params { + int xmit_policy; + int miimon; + u8 num_peer_notif; ++ u8 missed_max; + int arp_interval; + int arp_validate; + int arp_all_targets; +diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h +index eebd3894fe89a..4ac53b30b6dc9 100644 +--- a/include/uapi/linux/if_link.h ++++ b/include/uapi/linux/if_link.h +@@ -858,6 +858,7 @@ enum { + IFLA_BOND_TLB_DYNAMIC_LB, + IFLA_BOND_PEER_NOTIF_DELAY, + IFLA_BOND_AD_LACP_ACTIVE, ++ IFLA_BOND_MISSED_MAX, + __IFLA_BOND_MAX, + }; + +diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h +index b3610fdd1feec..4772a115231ae 100644 +--- a/tools/include/uapi/linux/if_link.h ++++ b/tools/include/uapi/linux/if_link.h +@@ -655,6 +655,7 @@ enum { + IFLA_BOND_TLB_DYNAMIC_LB, + IFLA_BOND_PEER_NOTIF_DELAY, + IFLA_BOND_AD_LACP_ACTIVE, ++ IFLA_BOND_MISSED_MAX, + __IFLA_BOND_MAX, + }; + +-- +2.39.2 + diff --git a/queue-5.15/bonding-fix-send_peer_notif-overflow.patch b/queue-5.15/bonding-fix-send_peer_notif-overflow.patch new file mode 100644 index 00000000000..a6fb6acee5b --- /dev/null +++ b/queue-5.15/bonding-fix-send_peer_notif-overflow.patch @@ -0,0 +1,99 @@ +From 8cb75e55f5f4ca16f879c792a73d89576761ff4b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 May 2023 11:11:57 +0800 +Subject: bonding: fix send_peer_notif overflow + +From: Hangbin Liu + +[ Upstream commit 9949e2efb54eb3001cb2f6512ff3166dddbfb75d ] + +Bonding send_peer_notif was defined as u8. Since commit 07a4ddec3ce9 +("bonding: add an option to specify a delay between peer notifications"). +the bond->send_peer_notif will be num_peer_notif multiplied by +peer_notif_delay, which is u8 * u32. This would cause the send_peer_notif +overflow easily. e.g. + + ip link add bond0 type bond mode 1 miimon 100 num_grat_arp 30 peer_notify_delay 1000 + +To fix the overflow, let's set the send_peer_notif to u32 and limit +peer_notif_delay to 300s. + +Reported-by: Liang Li +Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2090053 +Fixes: 07a4ddec3ce9 ("bonding: add an option to specify a delay between peer notifications") +Signed-off-by: Hangbin Liu +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/bonding/bond_netlink.c | 7 ++++++- + drivers/net/bonding/bond_options.c | 8 +++++++- + include/net/bonding.h | 2 +- + 3 files changed, 14 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c +index 1007bf6d385d4..7398accd46805 100644 +--- a/drivers/net/bonding/bond_netlink.c ++++ b/drivers/net/bonding/bond_netlink.c +@@ -79,6 +79,11 @@ static int bond_fill_slave_info(struct sk_buff *skb, + return -EMSGSIZE; + } + ++/* Limit the max delay range to 300s */ ++static struct netlink_range_validation delay_range = { ++ .max = 300000, ++}; ++ + static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { + [IFLA_BOND_MODE] = { .type = NLA_U8 }, + [IFLA_BOND_ACTIVE_SLAVE] = { .type = NLA_U32 }, +@@ -109,7 +114,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { + [IFLA_BOND_AD_ACTOR_SYSTEM] = { .type = NLA_BINARY, + .len = ETH_ALEN }, + [IFLA_BOND_TLB_DYNAMIC_LB] = { .type = NLA_U8 }, +- [IFLA_BOND_PEER_NOTIF_DELAY] = { .type = NLA_U32 }, ++ [IFLA_BOND_PEER_NOTIF_DELAY] = NLA_POLICY_FULL_RANGE(NLA_U32, &delay_range), + [IFLA_BOND_MISSED_MAX] = { .type = NLA_U8 }, + }; + +diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c +index 2e8484a91a0e7..5f883a18bbabd 100644 +--- a/drivers/net/bonding/bond_options.c ++++ b/drivers/net/bonding/bond_options.c +@@ -165,6 +165,12 @@ static const struct bond_opt_value bond_num_peer_notif_tbl[] = { + { NULL, -1, 0} + }; + ++static const struct bond_opt_value bond_peer_notif_delay_tbl[] = { ++ { "off", 0, 0}, ++ { "maxval", 300000, BOND_VALFLAG_MAX}, ++ { NULL, -1, 0} ++}; ++ + static const struct bond_opt_value bond_primary_reselect_tbl[] = { + { "always", BOND_PRI_RESELECT_ALWAYS, BOND_VALFLAG_DEFAULT}, + { "better", BOND_PRI_RESELECT_BETTER, 0}, +@@ -467,7 +473,7 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { + .id = BOND_OPT_PEER_NOTIF_DELAY, + .name = "peer_notif_delay", + .desc = "Delay between each peer notification on failover event, in milliseconds", +- .values = bond_intmax_tbl, ++ .values = bond_peer_notif_delay_tbl, + .set = bond_option_peer_notif_delay_set + } + }; +diff --git a/include/net/bonding.h b/include/net/bonding.h +index 0db3c5f36868b..e4453cf4f0171 100644 +--- a/include/net/bonding.h ++++ b/include/net/bonding.h +@@ -228,7 +228,7 @@ struct bonding { + */ + spinlock_t mode_lock; + spinlock_t stats_lock; +- u8 send_peer_notif; ++ u32 send_peer_notif; + u8 igmp_retrans; + #ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc_entry; +-- +2.39.2 + diff --git a/queue-5.15/dmaengine-at_xdmac-disable-enable-clock-directly-on-.patch b/queue-5.15/dmaengine-at_xdmac-disable-enable-clock-directly-on-.patch new file mode 100644 index 00000000000..58df9c5b22d --- /dev/null +++ b/queue-5.15/dmaengine-at_xdmac-disable-enable-clock-directly-on-.patch @@ -0,0 +1,56 @@ +From 024bbf116ed34702a8bfa8f983a0b6fed9194a38 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 Feb 2023 17:18:21 +0200 +Subject: dmaengine: at_xdmac: disable/enable clock directly on suspend/resume + +From: Claudiu Beznea + +[ Upstream commit 2de5ddb5e68c94b781b3789bca1ce52000d7d0e0 ] + +Runtime PM APIs for at_xdmac just plays with clk_enable()/clk_disable() +letting aside the clk_prepare()/clk_unprepare() that needs to be +executed as the clock is also prepared on probe. Thus instead of using +runtime PM force suspend/resume APIs use +clk_disable_unprepare() + pm_runtime_put_noidle() on suspend and +clk_prepare_enable() + pm_runtime_get_noresume() on resume. This +approach as been chosen instead of using runtime PM force suspend/resume +with clk_unprepare()/clk_prepare() as it looks simpler and the final +code is better. + +While at it added the missing pm_runtime_mark_last_busy() on suspend before +decrementing the reference counter. + +Fixes: 650b0e990cbd ("dmaengine: at_xdmac: add runtime pm support") +Signed-off-by: Claudiu Beznea +Link: https://lore.kernel.org/r/20230214151827.1050280-2-claudiu.beznea@microchip.com +Signed-off-by: Vinod Koul +Stable-dep-of: 44fe8440bda5 ("dmaengine: at_xdmac: do not resume channels paused by consumers") +Signed-off-by: Sasha Levin +--- + drivers/dma/at_xdmac.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c +index f9aa5396c0f8e..af52429af9172 100644 +--- a/drivers/dma/at_xdmac.c ++++ b/drivers/dma/at_xdmac.c +@@ -1992,6 +1992,7 @@ static int atmel_xdmac_suspend(struct device *dev) + + at_xdmac_off(atxdmac); + clk_disable_unprepare(atxdmac->clk); ++ + return 0; + } + +@@ -2008,6 +2009,8 @@ static int atmel_xdmac_resume(struct device *dev) + if (ret) + return ret; + ++ pm_runtime_get_noresume(atxdmac->dev); ++ + at_xdmac_axi_config(pdev); + + /* Clear pending interrupts. */ +-- +2.39.2 + diff --git a/queue-5.15/dmaengine-at_xdmac-do-not-resume-channels-paused-by-.patch b/queue-5.15/dmaengine-at_xdmac-do-not-resume-channels-paused-by-.patch new file mode 100644 index 00000000000..160bd33d927 --- /dev/null +++ b/queue-5.15/dmaengine-at_xdmac-do-not-resume-channels-paused-by-.patch @@ -0,0 +1,134 @@ +From 15b8174c6745a9adccbc11ac92f38dcd867f0b3b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 Feb 2023 17:18:23 +0200 +Subject: dmaengine: at_xdmac: do not resume channels paused by consumers + +From: Claudiu Beznea + +[ Upstream commit 44fe8440bda545b5d167329df88c47609a645168 ] + +In case there are DMA channels not paused by consumers in suspend +process (valid on AT91 SoCs for serial driver when no_console_suspend) the +driver pauses them (using at_xdmac_device_pause() which is also the same +function called by dmaengine_pause()) and then in the resume process the +driver resumes them calling at_xdmac_device_resume() which is the same +function called by dmaengine_resume()). This is good for DMA channels +not paused by consumers but for drivers that calls +dmaengine_pause()/dmaegine_resume() on suspend/resume path this may lead to +DMA channel being enabled before the IP is enabled. For IPs that needs +strict ordering with regards to DMA channel enablement this will lead to +wrong behavior. To fix this add a new set of functions +at_xdmac_device_pause_internal()/at_xdmac_device_resume_internal() to be +called only on suspend/resume. + +Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") +Signed-off-by: Claudiu Beznea +Link: https://lore.kernel.org/r/20230214151827.1050280-4-claudiu.beznea@microchip.com +Signed-off-by: Vinod Koul +Signed-off-by: Sasha Levin +--- + drivers/dma/at_xdmac.c | 48 ++++++++++++++++++++++++++++++++++++------ + 1 file changed, 42 insertions(+), 6 deletions(-) + +diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c +index af52429af9172..4965961f55aa2 100644 +--- a/drivers/dma/at_xdmac.c ++++ b/drivers/dma/at_xdmac.c +@@ -186,6 +186,7 @@ + enum atc_status { + AT_XDMAC_CHAN_IS_CYCLIC = 0, + AT_XDMAC_CHAN_IS_PAUSED, ++ AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, + }; + + struct at_xdmac_layout { +@@ -346,6 +347,11 @@ static inline int at_xdmac_chan_is_paused(struct at_xdmac_chan *atchan) + return test_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status); + } + ++static inline int at_xdmac_chan_is_paused_internal(struct at_xdmac_chan *atchan) ++{ ++ return test_bit(AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, &atchan->status); ++} ++ + static inline bool at_xdmac_chan_is_peripheral_xfer(u32 cfg) + { + return cfg & AT_XDMAC_CC_TYPE_PER_TRAN; +@@ -1801,6 +1807,26 @@ static int at_xdmac_device_config(struct dma_chan *chan, + return ret; + } + ++static void at_xdmac_device_pause_set(struct at_xdmac *atxdmac, ++ struct at_xdmac_chan *atchan) ++{ ++ at_xdmac_write(atxdmac, atxdmac->layout->grws, atchan->mask); ++ while (at_xdmac_chan_read(atchan, AT_XDMAC_CC) & ++ (AT_XDMAC_CC_WRIP | AT_XDMAC_CC_RDIP)) ++ cpu_relax(); ++} ++ ++static void at_xdmac_device_pause_internal(struct at_xdmac_chan *atchan) ++{ ++ struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); ++ unsigned long flags; ++ ++ spin_lock_irqsave(&atchan->lock, flags); ++ set_bit(AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, &atchan->status); ++ at_xdmac_device_pause_set(atxdmac, atchan); ++ spin_unlock_irqrestore(&atchan->lock, flags); ++} ++ + static int at_xdmac_device_pause(struct dma_chan *chan) + { + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); +@@ -1813,15 +1839,25 @@ static int at_xdmac_device_pause(struct dma_chan *chan) + return 0; + + spin_lock_irqsave(&atchan->lock, flags); +- at_xdmac_write(atxdmac, atxdmac->layout->grws, atchan->mask); +- while (at_xdmac_chan_read(atchan, AT_XDMAC_CC) +- & (AT_XDMAC_CC_WRIP | AT_XDMAC_CC_RDIP)) +- cpu_relax(); ++ ++ at_xdmac_device_pause_set(atxdmac, atchan); ++ /* Decrement runtime PM ref counter for each active descriptor. */ + spin_unlock_irqrestore(&atchan->lock, flags); + + return 0; + } + ++static void at_xdmac_device_resume_internal(struct at_xdmac_chan *atchan) ++{ ++ struct at_xdmac *atxdmac = to_at_xdmac(atchan->chan.device); ++ unsigned long flags; ++ ++ spin_lock_irqsave(&atchan->lock, flags); ++ at_xdmac_write(atxdmac, atxdmac->layout->grwr, atchan->mask); ++ clear_bit(AT_XDMAC_CHAN_IS_PAUSED_INTERNAL, &atchan->status); ++ spin_unlock_irqrestore(&atchan->lock, flags); ++} ++ + static int at_xdmac_device_resume(struct dma_chan *chan) + { + struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan); +@@ -1981,7 +2017,7 @@ static int atmel_xdmac_suspend(struct device *dev) + atchan->save_cc = at_xdmac_chan_read(atchan, AT_XDMAC_CC); + if (at_xdmac_chan_is_cyclic(atchan)) { + if (!at_xdmac_chan_is_paused(atchan)) +- at_xdmac_device_pause(chan); ++ at_xdmac_device_pause_internal(atchan); + atchan->save_cim = at_xdmac_chan_read(atchan, AT_XDMAC_CIM); + atchan->save_cnda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA); + atchan->save_cndc = at_xdmac_chan_read(atchan, AT_XDMAC_CNDC); +@@ -2026,7 +2062,7 @@ static int atmel_xdmac_resume(struct device *dev) + at_xdmac_chan_write(atchan, AT_XDMAC_CC, atchan->save_cc); + if (at_xdmac_chan_is_cyclic(atchan)) { + if (at_xdmac_chan_is_paused(atchan)) +- at_xdmac_device_resume(chan); ++ at_xdmac_device_resume_internal(atchan); + at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda); + at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc); + at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim); +-- +2.39.2 + diff --git a/queue-5.15/dmaengine-at_xdmac-move-the-free-desc-to-the-tail-of.patch b/queue-5.15/dmaengine-at_xdmac-move-the-free-desc-to-the-tail-of.patch new file mode 100644 index 00000000000..c3528edf808 --- /dev/null +++ b/queue-5.15/dmaengine-at_xdmac-move-the-free-desc-to-the-tail-of.patch @@ -0,0 +1,103 @@ +From c4756066793ffa30d7265aef3b158e6076186a7e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Dec 2021 13:01:09 +0200 +Subject: dmaengine: at_xdmac: Move the free desc to the tail of the desc list + +From: Tudor Ambarus + +[ Upstream commit 801db90bf294f647b967e8d99b9ae121bea63d0d ] + +Move the free desc to the tail of the list, so that the sequence of +descriptors is more track-able in case of debug. One would know which +descriptor should come next and could easier catch concurrency over +descriptors for example. virt-dma uses list_splice_tail_init() as well, +follow the core driver. + +Signed-off-by: Tudor Ambarus +Link: https://lore.kernel.org/r/20211215110115.191749-7-tudor.ambarus@microchip.com +Signed-off-by: Vinod Koul +Stable-dep-of: 44fe8440bda5 ("dmaengine: at_xdmac: do not resume channels paused by consumers") +Signed-off-by: Sasha Levin +--- + drivers/dma/at_xdmac.c | 23 ++++++++++++++--------- + 1 file changed, 14 insertions(+), 9 deletions(-) + +diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c +index 80c609aa2a91c..b45437aab1434 100644 +--- a/drivers/dma/at_xdmac.c ++++ b/drivers/dma/at_xdmac.c +@@ -732,7 +732,8 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + if (!desc) { + dev_err(chan2dev(chan), "can't get descriptor\n"); + if (first) +- list_splice_init(&first->descs_list, &atchan->free_descs_list); ++ list_splice_tail_init(&first->descs_list, ++ &atchan->free_descs_list); + goto spin_unlock; + } + +@@ -820,7 +821,8 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, + if (!desc) { + dev_err(chan2dev(chan), "can't get descriptor\n"); + if (first) +- list_splice_init(&first->descs_list, &atchan->free_descs_list); ++ list_splice_tail_init(&first->descs_list, ++ &atchan->free_descs_list); + spin_unlock_irqrestore(&atchan->lock, irqflags); + return NULL; + } +@@ -1054,8 +1056,8 @@ at_xdmac_prep_interleaved(struct dma_chan *chan, + src_addr, dst_addr, + xt, chunk); + if (!desc) { +- list_splice_init(&first->descs_list, +- &atchan->free_descs_list); ++ list_splice_tail_init(&first->descs_list, ++ &atchan->free_descs_list); + return NULL; + } + +@@ -1135,7 +1137,8 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + if (!desc) { + dev_err(chan2dev(chan), "can't get descriptor\n"); + if (first) +- list_splice_init(&first->descs_list, &atchan->free_descs_list); ++ list_splice_tail_init(&first->descs_list, ++ &atchan->free_descs_list); + return NULL; + } + +@@ -1311,8 +1314,8 @@ at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl, + sg_dma_len(sg), + value); + if (!desc && first) +- list_splice_init(&first->descs_list, +- &atchan->free_descs_list); ++ list_splice_tail_init(&first->descs_list, ++ &atchan->free_descs_list); + + if (!first) + first = desc; +@@ -1709,7 +1712,8 @@ static void at_xdmac_tasklet(struct tasklet_struct *t) + + spin_lock_irq(&atchan->lock); + /* Move the xfer descriptors into the free descriptors list. */ +- list_splice_init(&desc->descs_list, &atchan->free_descs_list); ++ list_splice_tail_init(&desc->descs_list, ++ &atchan->free_descs_list); + at_xdmac_advance_work(atchan); + spin_unlock_irq(&atchan->lock); + } +@@ -1858,7 +1862,8 @@ static int at_xdmac_device_terminate_all(struct dma_chan *chan) + /* Cancel all pending transfers. */ + list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) { + list_del(&desc->xfer_node); +- list_splice_init(&desc->descs_list, &atchan->free_descs_list); ++ list_splice_tail_init(&desc->descs_list, ++ &atchan->free_descs_list); + } + + clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status); +-- +2.39.2 + diff --git a/queue-5.15/dmaengine-at_xdmac-remove-a-level-of-indentation-in-.patch b/queue-5.15/dmaengine-at_xdmac-remove-a-level-of-indentation-in-.patch new file mode 100644 index 00000000000..a74240666b2 --- /dev/null +++ b/queue-5.15/dmaengine-at_xdmac-remove-a-level-of-indentation-in-.patch @@ -0,0 +1,118 @@ +From 15801e22ff52f67f250cef04d8fdd46de1799b55 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Dec 2021 13:01:14 +0200 +Subject: dmaengine: at_xdmac: Remove a level of indentation in + at_xdmac_tasklet() + +From: Tudor Ambarus + +[ Upstream commit a61210cae80cac0701d5aca9551466a389717fd2 ] + +Apart of making the code easier to read, this patch is a prerequisite for +a functional change: tasklets run with interrupts enabled, so we need to +protect atchan->irq_status with spin_lock_irq() otherwise the tasklet can +be interrupted by the IRQ that modifies irq_status. atchan->irq_status +will be protected in a further patch. + +Signed-off-by: Tudor Ambarus +Link: https://lore.kernel.org/r/20211215110115.191749-12-tudor.ambarus@microchip.com +Signed-off-by: Vinod Koul +Stable-dep-of: 44fe8440bda5 ("dmaengine: at_xdmac: do not resume channels paused by consumers") +Signed-off-by: Sasha Levin +--- + drivers/dma/at_xdmac.c | 66 ++++++++++++++++++++---------------------- + 1 file changed, 32 insertions(+), 34 deletions(-) + +diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c +index b45437aab1434..f9aa5396c0f8e 100644 +--- a/drivers/dma/at_xdmac.c ++++ b/drivers/dma/at_xdmac.c +@@ -1670,53 +1670,51 @@ static void at_xdmac_tasklet(struct tasklet_struct *t) + { + struct at_xdmac_chan *atchan = from_tasklet(atchan, t, tasklet); + struct at_xdmac_desc *desc; ++ struct dma_async_tx_descriptor *txd; + u32 error_mask; + + dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08x\n", + __func__, atchan->irq_status); + +- error_mask = AT_XDMAC_CIS_RBEIS +- | AT_XDMAC_CIS_WBEIS +- | AT_XDMAC_CIS_ROIS; ++ if (at_xdmac_chan_is_cyclic(atchan)) ++ return at_xdmac_handle_cyclic(atchan); + +- if (at_xdmac_chan_is_cyclic(atchan)) { +- at_xdmac_handle_cyclic(atchan); +- } else if ((atchan->irq_status & AT_XDMAC_CIS_LIS) +- || (atchan->irq_status & error_mask)) { +- struct dma_async_tx_descriptor *txd; ++ error_mask = AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS | ++ AT_XDMAC_CIS_ROIS; + +- if (atchan->irq_status & error_mask) +- at_xdmac_handle_error(atchan); ++ if (!(atchan->irq_status & AT_XDMAC_CIS_LIS) && ++ !(atchan->irq_status & error_mask)) ++ return; + +- spin_lock_irq(&atchan->lock); +- desc = list_first_entry(&atchan->xfers_list, +- struct at_xdmac_desc, +- xfer_node); +- dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc); +- if (!desc->active_xfer) { +- dev_err(chan2dev(&atchan->chan), "Xfer not active: exiting"); +- spin_unlock_irq(&atchan->lock); +- return; +- } ++ if (atchan->irq_status & error_mask) ++ at_xdmac_handle_error(atchan); + +- txd = &desc->tx_dma_desc; +- dma_cookie_complete(txd); +- /* Remove the transfer from the transfer list. */ +- list_del(&desc->xfer_node); ++ spin_lock_irq(&atchan->lock); ++ desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, ++ xfer_node); ++ dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc); ++ if (!desc->active_xfer) { ++ dev_err(chan2dev(&atchan->chan), "Xfer not active: exiting"); + spin_unlock_irq(&atchan->lock); ++ return; ++ } + +- if (txd->flags & DMA_PREP_INTERRUPT) +- dmaengine_desc_get_callback_invoke(txd, NULL); ++ txd = &desc->tx_dma_desc; ++ dma_cookie_complete(txd); ++ /* Remove the transfer from the transfer list. */ ++ list_del(&desc->xfer_node); ++ spin_unlock_irq(&atchan->lock); + +- dma_run_dependencies(txd); ++ if (txd->flags & DMA_PREP_INTERRUPT) ++ dmaengine_desc_get_callback_invoke(txd, NULL); + +- spin_lock_irq(&atchan->lock); +- /* Move the xfer descriptors into the free descriptors list. */ +- list_splice_tail_init(&desc->descs_list, +- &atchan->free_descs_list); +- at_xdmac_advance_work(atchan); +- spin_unlock_irq(&atchan->lock); +- } ++ dma_run_dependencies(txd); ++ ++ spin_lock_irq(&atchan->lock); ++ /* Move the xfer descriptors into the free descriptors list. */ ++ list_splice_tail_init(&desc->descs_list, &atchan->free_descs_list); ++ at_xdmac_advance_work(atchan); ++ spin_unlock_irq(&atchan->lock); + } + + static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id) +-- +2.39.2 + diff --git a/queue-5.15/dmaengine-at_xdmac-restore-the-content-of-grws-regis.patch b/queue-5.15/dmaengine-at_xdmac-restore-the-content-of-grws-regis.patch new file mode 100644 index 00000000000..dd7428d8eb1 --- /dev/null +++ b/queue-5.15/dmaengine-at_xdmac-restore-the-content-of-grws-regis.patch @@ -0,0 +1,47 @@ +From 8aea70b579cb012b7be3da86bef668398d4a3ddf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 Feb 2023 17:18:24 +0200 +Subject: dmaengine: at_xdmac: restore the content of grws register + +From: Claudiu Beznea + +[ Upstream commit 7c5eb63d16b01c202aaa95f374ae15a807745a73 ] + +In case the system suspends to a deep sleep state where power to DMA +controller is cut-off we need to restore the content of GRWS register. +This is a write only register and writing bit X tells the controller +to suspend read and write requests for channel X. Thus set GRWS before +restoring the content of GE (Global Enable) regiter. + +Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") +Signed-off-by: Claudiu Beznea +Link: https://lore.kernel.org/r/20230214151827.1050280-5-claudiu.beznea@microchip.com +Signed-off-by: Vinod Koul +Signed-off-by: Sasha Levin +--- + drivers/dma/at_xdmac.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c +index 4965961f55aa2..66bf570a8bd98 100644 +--- a/drivers/dma/at_xdmac.c ++++ b/drivers/dma/at_xdmac.c +@@ -2063,6 +2063,15 @@ static int atmel_xdmac_resume(struct device *dev) + if (at_xdmac_chan_is_cyclic(atchan)) { + if (at_xdmac_chan_is_paused(atchan)) + at_xdmac_device_resume_internal(atchan); ++ ++ /* ++ * We may resume from a deep sleep state where power ++ * to DMA controller is cut-off. Thus, restore the ++ * suspend state of channels set though dmaengine API. ++ */ ++ else if (at_xdmac_chan_is_paused(atchan)) ++ at_xdmac_device_pause_set(atxdmac, atchan); ++ + at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda); + at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc); + at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim); +-- +2.39.2 + diff --git a/queue-5.15/irqchip-mips-gic-don-t-touch-vl_map-if-a-local-inter.patch b/queue-5.15/irqchip-mips-gic-don-t-touch-vl_map-if-a-local-inter.patch new file mode 100644 index 00000000000..24cf322328f --- /dev/null +++ b/queue-5.15/irqchip-mips-gic-don-t-touch-vl_map-if-a-local-inter.patch @@ -0,0 +1,51 @@ +From 994edc5d6690bdf2ade16fd8d7622c9796e4127d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Apr 2023 11:31:55 +0100 +Subject: irqchip/mips-gic: Don't touch vl_map if a local interrupt is not + routable + +From: Jiaxun Yang + +[ Upstream commit 2c6c9c049510163090b979ea5f92a68ae8d93c45 ] + +When a GIC local interrupt is not routable, it's vl_map will be used +to control some internal states for core (providing IPTI, IPPCI, IPFDC +input signal for core). Overriding it will interfere core's intetrupt +controller. + +Do not touch vl_map if a local interrupt is not routable, we are not +going to remap it. + +Before dd098a0e0319 (" irqchip/mips-gic: Get rid of the reliance on +irq_cpu_online()"), if a local interrupt is not routable, then it won't +be requested from GIC Local domain, and thus gic_all_vpes_irq_cpu_online +won't be called for that particular interrupt. + +Fixes: dd098a0e0319 (" irqchip/mips-gic: Get rid of the reliance on irq_cpu_online()") +Cc: stable@vger.kernel.org +Signed-off-by: Jiaxun Yang +Reviewed-by: Serge Semin +Tested-by: Serge Semin +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20230424103156.66753-2-jiaxun.yang@flygoat.com +Signed-off-by: Sasha Levin +--- + drivers/irqchip/irq-mips-gic.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c +index 0d4515257c59c..c654fe22fcf33 100644 +--- a/drivers/irqchip/irq-mips-gic.c ++++ b/drivers/irqchip/irq-mips-gic.c +@@ -399,6 +399,8 @@ static void gic_all_vpes_irq_cpu_online(void) + unsigned int intr = local_intrs[i]; + struct gic_all_vpes_chip_data *cd; + ++ if (!gic_local_irq_is_routable(intr)) ++ continue; + cd = &gic_all_vpes_chip_data[intr]; + write_gic_vl_map(mips_gic_vx_map_reg(intr), cd->map); + if (cd->mask) +-- +2.39.2 + diff --git a/queue-5.15/irqchip-mips-gic-get-rid-of-the-reliance-on-irq_cpu_.patch b/queue-5.15/irqchip-mips-gic-get-rid-of-the-reliance-on-irq_cpu_.patch new file mode 100644 index 00000000000..a38ee8796c7 --- /dev/null +++ b/queue-5.15/irqchip-mips-gic-get-rid-of-the-reliance-on-irq_cpu_.patch @@ -0,0 +1,102 @@ +From ba4f2809852fc034b35df5106eca4974cce892df Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 21 Oct 2021 18:04:13 +0100 +Subject: irqchip/mips-gic: Get rid of the reliance on irq_cpu_online() + +From: Marc Zyngier + +[ Upstream commit dd098a0e031928cf88c89f7577d31821e1f0e6de ] + +The MIPS GIC driver uses irq_cpu_online() to go and program the +per-CPU interrupts. However, this method iterates over all IRQs +in the system, despite only 3 per-CPU interrupts being of interest. + +Let's be terribly bold and do the iteration ourselves. To ensure +mutual exclusion, hold the gic_lock spinlock that is otherwise +taken while dealing with these interrupts. + +Signed-off-by: Marc Zyngier +Reviewed-by: Serge Semin +Reviewed-by: Florian Fainelli +Tested-by: Serge Semin +Link: https://lore.kernel.org/r/20211021170414.3341522-3-maz@kernel.org +Stable-dep-of: 3d6a0e4197c0 ("irqchip/mips-gic: Use raw spinlock for gic_lock") +Signed-off-by: Sasha Levin +--- + drivers/irqchip/irq-mips-gic.c | 37 ++++++++++++++++++++++++---------- + 1 file changed, 26 insertions(+), 11 deletions(-) + +diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c +index d815285f1efe3..0f14b2d7b19cb 100644 +--- a/drivers/irqchip/irq-mips-gic.c ++++ b/drivers/irqchip/irq-mips-gic.c +@@ -383,24 +383,35 @@ static void gic_unmask_local_irq_all_vpes(struct irq_data *d) + spin_unlock_irqrestore(&gic_lock, flags); + } + +-static void gic_all_vpes_irq_cpu_online(struct irq_data *d) ++static void gic_all_vpes_irq_cpu_online(void) + { +- struct gic_all_vpes_chip_data *cd; +- unsigned int intr; ++ static const unsigned int local_intrs[] = { ++ GIC_LOCAL_INT_TIMER, ++ GIC_LOCAL_INT_PERFCTR, ++ GIC_LOCAL_INT_FDC, ++ }; ++ unsigned long flags; ++ int i; + +- intr = GIC_HWIRQ_TO_LOCAL(d->hwirq); +- cd = irq_data_get_irq_chip_data(d); ++ spin_lock_irqsave(&gic_lock, flags); + +- write_gic_vl_map(mips_gic_vx_map_reg(intr), cd->map); +- if (cd->mask) +- write_gic_vl_smask(BIT(intr)); ++ for (i = 0; i < ARRAY_SIZE(local_intrs); i++) { ++ unsigned int intr = local_intrs[i]; ++ struct gic_all_vpes_chip_data *cd; ++ ++ cd = &gic_all_vpes_chip_data[intr]; ++ write_gic_vl_map(mips_gic_vx_map_reg(intr), cd->map); ++ if (cd->mask) ++ write_gic_vl_smask(BIT(intr)); ++ } ++ ++ spin_unlock_irqrestore(&gic_lock, flags); + } + + static struct irq_chip gic_all_vpes_local_irq_controller = { + .name = "MIPS GIC Local", + .irq_mask = gic_mask_local_irq_all_vpes, + .irq_unmask = gic_unmask_local_irq_all_vpes, +- .irq_cpu_online = gic_all_vpes_irq_cpu_online, + }; + + static void __gic_irq_dispatch(void) +@@ -481,6 +492,10 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq, + intr = GIC_HWIRQ_TO_LOCAL(hwirq); + map = GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin; + ++ /* ++ * If adding support for more per-cpu interrupts, keep the the ++ * array in gic_all_vpes_irq_cpu_online() in sync. ++ */ + switch (intr) { + case GIC_LOCAL_INT_TIMER: + /* CONFIG_MIPS_CMP workaround (see __gic_init) */ +@@ -711,8 +726,8 @@ static int gic_cpu_startup(unsigned int cpu) + /* Clear all local IRQ masks (ie. disable all local interrupts) */ + write_gic_vl_rmask(~0); + +- /* Invoke irq_cpu_online callbacks to enable desired interrupts */ +- irq_cpu_online(); ++ /* Enable desired interrupts */ ++ gic_all_vpes_irq_cpu_online(); + + return 0; + } +-- +2.39.2 + diff --git a/queue-5.15/irqchip-mips-gic-use-raw-spinlock-for-gic_lock.patch b/queue-5.15/irqchip-mips-gic-use-raw-spinlock-for-gic_lock.patch new file mode 100644 index 00000000000..c4415e5fe95 --- /dev/null +++ b/queue-5.15/irqchip-mips-gic-use-raw-spinlock-for-gic_lock.patch @@ -0,0 +1,163 @@ +From 3781c10e598bbbfa2b41e96e22bc15f241357b1b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Apr 2023 11:31:56 +0100 +Subject: irqchip/mips-gic: Use raw spinlock for gic_lock + +From: Jiaxun Yang + +[ Upstream commit 3d6a0e4197c04599d75d85a608c8bb16a630a38c ] + +Since we may hold gic_lock in hardirq context, use raw spinlock +makes more sense given that it is for low-level interrupt handling +routine and the critical section is small. + +Fixes BUG: + +[ 0.426106] ============================= +[ 0.426257] [ BUG: Invalid wait context ] +[ 0.426422] 6.3.0-rc7-next-20230421-dirty #54 Not tainted +[ 0.426638] ----------------------------- +[ 0.426766] swapper/0/1 is trying to lock: +[ 0.426954] ffffffff8104e7b8 (gic_lock){....}-{3:3}, at: gic_set_type+0x30/08 + +Fixes: 95150ae8b330 ("irqchip: mips-gic: Implement irq_set_type callback") +Cc: stable@vger.kernel.org +Signed-off-by: Jiaxun Yang +Reviewed-by: Serge Semin +Tested-by: Serge Semin +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20230424103156.66753-3-jiaxun.yang@flygoat.com +Signed-off-by: Sasha Levin +--- + drivers/irqchip/irq-mips-gic.c | 30 +++++++++++++++--------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + +diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c +index 0f14b2d7b19cb..0d4515257c59c 100644 +--- a/drivers/irqchip/irq-mips-gic.c ++++ b/drivers/irqchip/irq-mips-gic.c +@@ -49,7 +49,7 @@ void __iomem *mips_gic_base; + + static DEFINE_PER_CPU_READ_MOSTLY(unsigned long[GIC_MAX_LONGS], pcpu_masks); + +-static DEFINE_SPINLOCK(gic_lock); ++static DEFINE_RAW_SPINLOCK(gic_lock); + static struct irq_domain *gic_irq_domain; + static int gic_shared_intrs; + static unsigned int gic_cpu_pin; +@@ -210,7 +210,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type) + + irq = GIC_HWIRQ_TO_SHARED(d->hwirq); + +- spin_lock_irqsave(&gic_lock, flags); ++ raw_spin_lock_irqsave(&gic_lock, flags); + switch (type & IRQ_TYPE_SENSE_MASK) { + case IRQ_TYPE_EDGE_FALLING: + pol = GIC_POL_FALLING_EDGE; +@@ -250,7 +250,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type) + else + irq_set_chip_handler_name_locked(d, &gic_level_irq_controller, + handle_level_irq, NULL); +- spin_unlock_irqrestore(&gic_lock, flags); ++ raw_spin_unlock_irqrestore(&gic_lock, flags); + + return 0; + } +@@ -268,7 +268,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *cpumask, + return -EINVAL; + + /* Assumption : cpumask refers to a single CPU */ +- spin_lock_irqsave(&gic_lock, flags); ++ raw_spin_lock_irqsave(&gic_lock, flags); + + /* Re-route this IRQ */ + write_gic_map_vp(irq, BIT(mips_cm_vp_id(cpu))); +@@ -279,7 +279,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *cpumask, + set_bit(irq, per_cpu_ptr(pcpu_masks, cpu)); + + irq_data_update_effective_affinity(d, cpumask_of(cpu)); +- spin_unlock_irqrestore(&gic_lock, flags); ++ raw_spin_unlock_irqrestore(&gic_lock, flags); + + return IRQ_SET_MASK_OK; + } +@@ -357,12 +357,12 @@ static void gic_mask_local_irq_all_vpes(struct irq_data *d) + cd = irq_data_get_irq_chip_data(d); + cd->mask = false; + +- spin_lock_irqsave(&gic_lock, flags); ++ raw_spin_lock_irqsave(&gic_lock, flags); + for_each_online_cpu(cpu) { + write_gic_vl_other(mips_cm_vp_id(cpu)); + write_gic_vo_rmask(BIT(intr)); + } +- spin_unlock_irqrestore(&gic_lock, flags); ++ raw_spin_unlock_irqrestore(&gic_lock, flags); + } + + static void gic_unmask_local_irq_all_vpes(struct irq_data *d) +@@ -375,12 +375,12 @@ static void gic_unmask_local_irq_all_vpes(struct irq_data *d) + cd = irq_data_get_irq_chip_data(d); + cd->mask = true; + +- spin_lock_irqsave(&gic_lock, flags); ++ raw_spin_lock_irqsave(&gic_lock, flags); + for_each_online_cpu(cpu) { + write_gic_vl_other(mips_cm_vp_id(cpu)); + write_gic_vo_smask(BIT(intr)); + } +- spin_unlock_irqrestore(&gic_lock, flags); ++ raw_spin_unlock_irqrestore(&gic_lock, flags); + } + + static void gic_all_vpes_irq_cpu_online(void) +@@ -393,7 +393,7 @@ static void gic_all_vpes_irq_cpu_online(void) + unsigned long flags; + int i; + +- spin_lock_irqsave(&gic_lock, flags); ++ raw_spin_lock_irqsave(&gic_lock, flags); + + for (i = 0; i < ARRAY_SIZE(local_intrs); i++) { + unsigned int intr = local_intrs[i]; +@@ -405,7 +405,7 @@ static void gic_all_vpes_irq_cpu_online(void) + write_gic_vl_smask(BIT(intr)); + } + +- spin_unlock_irqrestore(&gic_lock, flags); ++ raw_spin_unlock_irqrestore(&gic_lock, flags); + } + + static struct irq_chip gic_all_vpes_local_irq_controller = { +@@ -435,11 +435,11 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq, + + data = irq_get_irq_data(virq); + +- spin_lock_irqsave(&gic_lock, flags); ++ raw_spin_lock_irqsave(&gic_lock, flags); + write_gic_map_pin(intr, GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin); + write_gic_map_vp(intr, BIT(mips_cm_vp_id(cpu))); + irq_data_update_effective_affinity(data, cpumask_of(cpu)); +- spin_unlock_irqrestore(&gic_lock, flags); ++ raw_spin_unlock_irqrestore(&gic_lock, flags); + + return 0; + } +@@ -534,12 +534,12 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq, + if (!gic_local_irq_is_routable(intr)) + return -EPERM; + +- spin_lock_irqsave(&gic_lock, flags); ++ raw_spin_lock_irqsave(&gic_lock, flags); + for_each_online_cpu(cpu) { + write_gic_vl_other(mips_cm_vp_id(cpu)); + write_gic_vo_map(mips_gic_vx_map_reg(intr), map); + } +- spin_unlock_irqrestore(&gic_lock, flags); ++ raw_spin_unlock_irqrestore(&gic_lock, flags); + + return 0; + } +-- +2.39.2 + diff --git a/queue-5.15/kvm-s390-fix-race-in-gmap_make_secure.patch b/queue-5.15/kvm-s390-fix-race-in-gmap_make_secure.patch new file mode 100644 index 00000000000..a5d9eb8d2ee --- /dev/null +++ b/queue-5.15/kvm-s390-fix-race-in-gmap_make_secure.patch @@ -0,0 +1,94 @@ +From f153850a9af201873191546dd40bc36088c06b66 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Apr 2023 11:27:53 +0200 +Subject: KVM: s390: fix race in gmap_make_secure() + +From: Claudio Imbrenda + +[ Upstream commit c148dc8e2fa403be501612ee409db866eeed35c0 ] + +Fix a potential race in gmap_make_secure() and remove the last user of +follow_page() without FOLL_GET. + +The old code is locking something it doesn't have a reference to, and +as explained by Jason and David in this discussion: +https://lore.kernel.org/linux-mm/Y9J4P%2FRNvY1Ztn0Q@nvidia.com/ +it can lead to all kind of bad things, including the page getting +unmapped (MADV_DONTNEED), freed, reallocated as a larger folio and the +unlock_page() would target the wrong bit. +There is also another race with the FOLL_WRITE, which could race +between the follow_page() and the get_locked_pte(). + +The main point is to remove the last use of follow_page() without +FOLL_GET or FOLL_PIN, removing the races can be considered a nice +bonus. + +Link: https://lore.kernel.org/linux-mm/Y9J4P%2FRNvY1Ztn0Q@nvidia.com/ +Suggested-by: Jason Gunthorpe +Fixes: 214d9bbcd3a6 ("s390/mm: provide memory management functions for protected KVM guests") +Reviewed-by: Jason Gunthorpe +Signed-off-by: Claudio Imbrenda +Message-Id: <20230428092753.27913-2-imbrenda@linux.ibm.com> +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/uv.c | 32 +++++++++++--------------------- + 1 file changed, 11 insertions(+), 21 deletions(-) + +diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c +index 7d7961c7b1281..66d1248c8c923 100644 +--- a/arch/s390/kernel/uv.c ++++ b/arch/s390/kernel/uv.c +@@ -160,21 +160,10 @@ static int expected_page_refs(struct page *page) + return res; + } + +-static int make_secure_pte(pte_t *ptep, unsigned long addr, +- struct page *exp_page, struct uv_cb_header *uvcb) ++static int make_page_secure(struct page *page, struct uv_cb_header *uvcb) + { +- pte_t entry = READ_ONCE(*ptep); +- struct page *page; + int expected, rc = 0; + +- if (!pte_present(entry)) +- return -ENXIO; +- if (pte_val(entry) & _PAGE_INVALID) +- return -ENXIO; +- +- page = pte_page(entry); +- if (page != exp_page) +- return -ENXIO; + if (PageWriteback(page)) + return -EAGAIN; + expected = expected_page_refs(page); +@@ -252,17 +241,18 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) + goto out; + + rc = -ENXIO; +- page = follow_page(vma, uaddr, FOLL_WRITE); +- if (IS_ERR_OR_NULL(page)) +- goto out; +- +- lock_page(page); + ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); +- if (should_export_before_import(uvcb, gmap->mm)) +- uv_convert_from_secure(page_to_phys(page)); +- rc = make_secure_pte(ptep, uaddr, page, uvcb); ++ if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) { ++ page = pte_page(*ptep); ++ rc = -EAGAIN; ++ if (trylock_page(page)) { ++ if (should_export_before_import(uvcb, gmap->mm)) ++ uv_convert_from_secure(page_to_phys(page)); ++ rc = make_page_secure(page, uvcb); ++ unlock_page(page); ++ } ++ } + pte_unmap_unlock(ptep, ptelock); +- unlock_page(page); + out: + mmap_read_unlock(gmap->mm); + +-- +2.39.2 + diff --git a/queue-5.15/kvm-s390-pv-add-export-before-import.patch b/queue-5.15/kvm-s390-pv-add-export-before-import.patch new file mode 100644 index 00000000000..bf29f843077 --- /dev/null +++ b/queue-5.15/kvm-s390-pv-add-export-before-import.patch @@ -0,0 +1,81 @@ +From 3c20c539670f2e36a9a52b8db8aef56467e20f4b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Jun 2022 15:56:07 +0200 +Subject: KVM: s390: pv: add export before import + +From: Claudio Imbrenda + +[ Upstream commit 72b1daff2671cef2c8cccc6c4e52f8d5ce4ebe58 ] + +Due to upcoming changes, it will be possible to temporarily have +multiple protected VMs in the same address space, although only one +will be actually active. + +In that scenario, it is necessary to perform an export of every page +that is to be imported, since the hardware does not allow a page +belonging to a protected guest to be imported into a different +protected guest. + +This also applies to pages that are shared, and thus accessible by the +host. + +Signed-off-by: Claudio Imbrenda +Reviewed-by: Janosch Frank +Link: https://lore.kernel.org/r/20220628135619.32410-7-imbrenda@linux.ibm.com +Message-Id: <20220628135619.32410-7-imbrenda@linux.ibm.com> +Signed-off-by: Janosch Frank +Stable-dep-of: c148dc8e2fa4 ("KVM: s390: fix race in gmap_make_secure()") +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/uv.c | 28 ++++++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) + +diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c +index f95ccbd396925..7d7961c7b1281 100644 +--- a/arch/s390/kernel/uv.c ++++ b/arch/s390/kernel/uv.c +@@ -189,6 +189,32 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr, + return rc; + } + ++/** ++ * should_export_before_import - Determine whether an export is needed ++ * before an import-like operation ++ * @uvcb: the Ultravisor control block of the UVC to be performed ++ * @mm: the mm of the process ++ * ++ * Returns whether an export is needed before every import-like operation. ++ * This is needed for shared pages, which don't trigger a secure storage ++ * exception when accessed from a different guest. ++ * ++ * Although considered as one, the Unpin Page UVC is not an actual import, ++ * so it is not affected. ++ * ++ * No export is needed also when there is only one protected VM, because the ++ * page cannot belong to the wrong VM in that case (there is no "other VM" ++ * it can belong to). ++ * ++ * Return: true if an export is needed before every import, otherwise false. ++ */ ++static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) ++{ ++ if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) ++ return false; ++ return atomic_read(&mm->context.protected_count) > 1; ++} ++ + /* + * Requests the Ultravisor to make a page accessible to a guest. + * If it's brought in the first time, it will be cleared. If +@@ -232,6 +258,8 @@ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) + + lock_page(page); + ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); ++ if (should_export_before_import(uvcb, gmap->mm)) ++ uv_convert_from_secure(page_to_phys(page)); + rc = make_secure_pte(ptep, uaddr, page, uvcb); + pte_unmap_unlock(ptep, ptelock); + unlock_page(page); +-- +2.39.2 + diff --git a/queue-5.15/net-dsa-introduce-helpers-for-iterating-through-port.patch b/queue-5.15/net-dsa-introduce-helpers-for-iterating-through-port.patch new file mode 100644 index 00000000000..59b804e2334 --- /dev/null +++ b/queue-5.15/net-dsa-introduce-helpers-for-iterating-through-port.patch @@ -0,0 +1,78 @@ +From 6c8dc66118143fb7f454122b6ca7eced432567fd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Oct 2021 20:49:49 +0300 +Subject: net: dsa: introduce helpers for iterating through ports using dp + +From: Vladimir Oltean + +[ Upstream commit 82b318983c515f29b8b3a0dad9f6a5fe8a68a7f4 ] + +Since the DSA conversion from the ds->ports array into the dst->ports +list, the DSA API has encouraged driver writers, as well as the core +itself, to write inefficient code. + +Currently, code that wants to filter by a specific type of port when +iterating, like {!unused, user, cpu, dsa}, uses the dsa_is_*_port helper. +Under the hood, this uses dsa_to_port which iterates again through +dst->ports. But the driver iterates through the port list already, so +the complexity is quadratic for the typical case of a single-switch +tree. + +This patch introduces some iteration helpers where the iterator is +already a struct dsa_port *dp, so that the other variant of the +filtering functions, dsa_port_is_{unused,user,cpu_dsa}, can be used +directly on the iterator. This eliminates the second lookup. + +These functions can be used both by the core and by drivers. + +Signed-off-by: Vladimir Oltean +Reviewed-by: Florian Fainelli +Signed-off-by: David S. Miller +Stable-dep-of: 120a56b01bee ("net: dsa: mt7530: fix network connectivity with multiple CPU ports") +Signed-off-by: Sasha Levin +--- + include/net/dsa.h | 28 ++++++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) + +diff --git a/include/net/dsa.h b/include/net/dsa.h +index d784e76113b8d..bec439c4a0859 100644 +--- a/include/net/dsa.h ++++ b/include/net/dsa.h +@@ -472,6 +472,34 @@ static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) + return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_USER; + } + ++#define dsa_tree_for_each_user_port(_dp, _dst) \ ++ list_for_each_entry((_dp), &(_dst)->ports, list) \ ++ if (dsa_port_is_user((_dp))) ++ ++#define dsa_switch_for_each_port(_dp, _ds) \ ++ list_for_each_entry((_dp), &(_ds)->dst->ports, list) \ ++ if ((_dp)->ds == (_ds)) ++ ++#define dsa_switch_for_each_port_safe(_dp, _next, _ds) \ ++ list_for_each_entry_safe((_dp), (_next), &(_ds)->dst->ports, list) \ ++ if ((_dp)->ds == (_ds)) ++ ++#define dsa_switch_for_each_port_continue_reverse(_dp, _ds) \ ++ list_for_each_entry_continue_reverse((_dp), &(_ds)->dst->ports, list) \ ++ if ((_dp)->ds == (_ds)) ++ ++#define dsa_switch_for_each_available_port(_dp, _ds) \ ++ dsa_switch_for_each_port((_dp), (_ds)) \ ++ if (!dsa_port_is_unused((_dp))) ++ ++#define dsa_switch_for_each_user_port(_dp, _ds) \ ++ dsa_switch_for_each_port((_dp), (_ds)) \ ++ if (dsa_port_is_user((_dp))) ++ ++#define dsa_switch_for_each_cpu_port(_dp, _ds) \ ++ dsa_switch_for_each_port((_dp), (_ds)) \ ++ if (dsa_port_is_cpu((_dp))) ++ + static inline u32 dsa_user_ports(struct dsa_switch *ds) + { + u32 mask = 0; +-- +2.39.2 + diff --git a/queue-5.15/net-dsa-mt7530-fix-network-connectivity-with-multipl.patch b/queue-5.15/net-dsa-mt7530-fix-network-connectivity-with-multipl.patch new file mode 100644 index 00000000000..9797ab5c739 --- /dev/null +++ b/queue-5.15/net-dsa-mt7530-fix-network-connectivity-with-multipl.patch @@ -0,0 +1,78 @@ +From aa17947fb45c977368cab9e0e6b2e1c953ee4dbf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 3 May 2023 00:09:47 +0300 +Subject: net: dsa: mt7530: fix network connectivity with multiple CPU ports +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Arınç ÜNAL + +[ Upstream commit 120a56b01beed51ab5956a734adcfd2760307107 ] + +On mt753x_cpu_port_enable() there's code that enables flooding for the CPU +port only. Since mt753x_cpu_port_enable() runs twice when both CPU ports +are enabled, port 6 becomes the only port to forward the frames to. But +port 5 is the active port, so no frames received from the user ports will +be forwarded to port 5 which breaks network connectivity. + +Every bit of the BC_FFP, UNM_FFP, and UNU_FFP bits represents a port. Fix +this issue by setting the bit that corresponds to the CPU port without +overwriting the other bits. + +Clear the bits beforehand only for the MT7531 switch. According to the +documents MT7621 Giga Switch Programming Guide v0.3 and MT7531 Reference +Manual for Development Board v1.0, after reset, the BC_FFP, UNM_FFP, and +UNU_FFP bits are set to 1 for MT7531, 0 for MT7530. + +The commit 5e5502e012b8 ("net: dsa: mt7530: fix roaming from DSA user +ports") silently changed the method to set the bits on the MT7530_MFC. +Instead of clearing the relevant bits before mt7530_cpu_port_enable() +which runs under a for loop, the commit started doing it on +mt7530_cpu_port_enable(). + +Back then, this didn't really matter as only a single CPU port could be +used since the CPU port number was hardcoded. The driver was later changed +with commit 1f9a6abecf53 ("net: dsa: mt7530: get cpu-port via dp->cpu_dp +instead of constant") to retrieve the CPU port via dp->cpu_dp. With that, +this silent change became an issue for when using multiple CPU ports. + +Fixes: 5e5502e012b8 ("net: dsa: mt7530: fix roaming from DSA user ports") +Signed-off-by: Arınç ÜNAL +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/mt7530.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c +index ae156b14b57d5..4ec598efc3332 100644 +--- a/drivers/net/dsa/mt7530.c ++++ b/drivers/net/dsa/mt7530.c +@@ -1010,9 +1010,9 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port) + mt7530_write(priv, MT7530_PVC_P(port), + PORT_SPEC_TAG); + +- /* Disable flooding by default */ +- mt7530_rmw(priv, MT7530_MFC, BC_FFP_MASK | UNM_FFP_MASK | UNU_FFP_MASK, +- BC_FFP(BIT(port)) | UNM_FFP(BIT(port)) | UNU_FFP(BIT(port))); ++ /* Enable flooding on the CPU port */ ++ mt7530_set(priv, MT7530_MFC, BC_FFP(BIT(port)) | UNM_FFP(BIT(port)) | ++ UNU_FFP(BIT(port))); + + /* Set CPU port number */ + if (priv->id == ID_MT7621) +@@ -2306,6 +2306,10 @@ mt7531_setup_common(struct dsa_switch *ds) + /* Enable and reset MIB counters */ + mt7530_mib_reset(ds); + ++ /* Disable flooding on all ports */ ++ mt7530_clear(priv, MT7530_MFC, BC_FFP_MASK | UNM_FFP_MASK | ++ UNU_FFP_MASK); ++ + for (i = 0; i < MT7530_NUM_PORTS; i++) { + /* Disable forwarding by default on all ports */ + mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK, +-- +2.39.2 + diff --git a/queue-5.15/net-dsa-mt7530-rework-mt753-01-_setup.patch b/queue-5.15/net-dsa-mt7530-rework-mt753-01-_setup.patch new file mode 100644 index 00000000000..1130bd976ed --- /dev/null +++ b/queue-5.15/net-dsa-mt7530-rework-mt753-01-_setup.patch @@ -0,0 +1,86 @@ +From 7eaabe80c6c9cdf25d6d00a1c45f30dd0fe048b0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 10 Jun 2022 19:05:38 +0200 +Subject: net: dsa: mt7530: rework mt753[01]_setup + +From: Frank Wunderlich + +[ Upstream commit 6e19bc26cccdd34739b8c42aba2758777d18b211 ] + +Enumerate available cpu-ports instead of using hardcoded constant. + +Suggested-by: Vladimir Oltean +Signed-off-by: Frank Wunderlich +Reviewed-by: Vladimir Oltean +Reviewed-by: Florian Fainelli +Signed-off-by: Jakub Kicinski +Stable-dep-of: 120a56b01bee ("net: dsa: mt7530: fix network connectivity with multiple CPU ports") +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/mt7530.c | 25 +++++++++++++++++++++---- + 1 file changed, 21 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c +index e7a551570cf3c..0e64873fbc37b 100644 +--- a/drivers/net/dsa/mt7530.c ++++ b/drivers/net/dsa/mt7530.c +@@ -2094,11 +2094,12 @@ static int + mt7530_setup(struct dsa_switch *ds) + { + struct mt7530_priv *priv = ds->priv; ++ struct device_node *dn = NULL; + struct device_node *phy_node; + struct device_node *mac_np; + struct mt7530_dummy_poll p; + phy_interface_t interface; +- struct device_node *dn; ++ struct dsa_port *cpu_dp; + u32 id, val; + int ret, i; + +@@ -2106,7 +2107,19 @@ mt7530_setup(struct dsa_switch *ds) + * controller also is the container for two GMACs nodes representing + * as two netdev instances. + */ +- dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent; ++ dsa_switch_for_each_cpu_port(cpu_dp, ds) { ++ dn = cpu_dp->master->dev.of_node->parent; ++ /* It doesn't matter which CPU port is found first, ++ * their masters should share the same parent OF node ++ */ ++ break; ++ } ++ ++ if (!dn) { ++ dev_err(ds->dev, "parent OF node of DSA master not found"); ++ return -EINVAL; ++ } ++ + ds->assisted_learning_on_cpu_port = true; + ds->mtu_enforcement_ingress = true; + +@@ -2279,6 +2292,7 @@ mt7531_setup(struct dsa_switch *ds) + { + struct mt7530_priv *priv = ds->priv; + struct mt7530_dummy_poll p; ++ struct dsa_port *cpu_dp; + u32 val, id; + int ret, i; + +@@ -2353,8 +2367,11 @@ mt7531_setup(struct dsa_switch *ds) + CORE_PLL_GROUP4, val); + + /* BPDU to CPU port */ +- mt7530_rmw(priv, MT7531_CFC, MT7531_CPU_PMAP_MASK, +- BIT(MT7530_CPU_PORT)); ++ dsa_switch_for_each_cpu_port(cpu_dp, ds) { ++ mt7530_rmw(priv, MT7531_CFC, MT7531_CPU_PMAP_MASK, ++ BIT(cpu_dp->index)); ++ break; ++ } + mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK, + MT753X_BPDU_CPU_ONLY); + +-- +2.39.2 + diff --git a/queue-5.15/net-dsa-mt7530-split-off-common-parts-from-mt7531_se.patch b/queue-5.15/net-dsa-mt7530-split-off-common-parts-from-mt7531_se.patch new file mode 100644 index 00000000000..6302309e28e --- /dev/null +++ b/queue-5.15/net-dsa-mt7530-split-off-common-parts-from-mt7531_se.patch @@ -0,0 +1,157 @@ +From c11b8ebb6364896b0801c36138848b9071b55eb5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 3 Apr 2023 02:19:02 +0100 +Subject: net: dsa: mt7530: split-off common parts from mt7531_setup + +From: Daniel Golle + +[ Upstream commit 7f54cc9772ced2d76ac11832f0ada43798443ac9 ] + +MT7988 shares a significant part of the setup function with MT7531. +Split-off those parts into a shared function which is going to be used +also by mt7988_setup. + +Signed-off-by: Daniel Golle +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Stable-dep-of: 120a56b01bee ("net: dsa: mt7530: fix network connectivity with multiple CPU ports") +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/mt7530.c | 101 ++++++++++++++++++++++----------------- + 1 file changed, 56 insertions(+), 45 deletions(-) + +diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c +index 0e64873fbc37b..ae156b14b57d5 100644 +--- a/drivers/net/dsa/mt7530.c ++++ b/drivers/net/dsa/mt7530.c +@@ -2287,14 +2287,67 @@ mt7530_setup(struct dsa_switch *ds) + return 0; + } + ++static int ++mt7531_setup_common(struct dsa_switch *ds) ++{ ++ struct mt7530_priv *priv = ds->priv; ++ struct dsa_port *cpu_dp; ++ int ret, i; ++ ++ /* BPDU to CPU port */ ++ dsa_switch_for_each_cpu_port(cpu_dp, ds) { ++ mt7530_rmw(priv, MT7531_CFC, MT7531_CPU_PMAP_MASK, ++ BIT(cpu_dp->index)); ++ break; ++ } ++ mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK, ++ MT753X_BPDU_CPU_ONLY); ++ ++ /* Enable and reset MIB counters */ ++ mt7530_mib_reset(ds); ++ ++ for (i = 0; i < MT7530_NUM_PORTS; i++) { ++ /* Disable forwarding by default on all ports */ ++ mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK, ++ PCR_MATRIX_CLR); ++ ++ /* Disable learning by default on all ports */ ++ mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); ++ ++ mt7530_set(priv, MT7531_DBG_CNT(i), MT7531_DIS_CLR); ++ ++ if (dsa_is_cpu_port(ds, i)) { ++ ret = mt753x_cpu_port_enable(ds, i); ++ if (ret) ++ return ret; ++ } else { ++ mt7530_port_disable(ds, i); ++ ++ /* Set default PVID to 0 on all user ports */ ++ mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK, ++ G0_PORT_VID_DEF); ++ } ++ ++ /* Enable consistent egress tag */ ++ mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK, ++ PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); ++ } ++ ++ /* Flush the FDB table */ ++ ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL); ++ if (ret < 0) ++ return ret; ++ ++ return 0; ++} ++ + static int + mt7531_setup(struct dsa_switch *ds) + { + struct mt7530_priv *priv = ds->priv; + struct mt7530_dummy_poll p; +- struct dsa_port *cpu_dp; + u32 val, id; +- int ret, i; ++ int ret; + + /* Reset whole chip through gpio pin or memory-mapped registers for + * different type of hardware +@@ -2366,44 +2419,7 @@ mt7531_setup(struct dsa_switch *ds) + mt7531_ind_c45_phy_write(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2, + CORE_PLL_GROUP4, val); + +- /* BPDU to CPU port */ +- dsa_switch_for_each_cpu_port(cpu_dp, ds) { +- mt7530_rmw(priv, MT7531_CFC, MT7531_CPU_PMAP_MASK, +- BIT(cpu_dp->index)); +- break; +- } +- mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK, +- MT753X_BPDU_CPU_ONLY); +- +- /* Enable and reset MIB counters */ +- mt7530_mib_reset(ds); +- +- for (i = 0; i < MT7530_NUM_PORTS; i++) { +- /* Disable forwarding by default on all ports */ +- mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK, +- PCR_MATRIX_CLR); +- +- /* Disable learning by default on all ports */ +- mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); +- +- mt7530_set(priv, MT7531_DBG_CNT(i), MT7531_DIS_CLR); +- +- if (dsa_is_cpu_port(ds, i)) { +- ret = mt753x_cpu_port_enable(ds, i); +- if (ret) +- return ret; +- } else { +- mt7530_port_disable(ds, i); +- +- /* Set default PVID to 0 on all user ports */ +- mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK, +- G0_PORT_VID_DEF); +- } +- +- /* Enable consistent egress tag */ +- mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK, +- PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); +- } ++ mt7531_setup_common(ds); + + /* Setup VLAN ID 0 for VLAN-unaware bridges */ + ret = mt7530_setup_vlan0(priv); +@@ -2413,11 +2429,6 @@ mt7531_setup(struct dsa_switch *ds) + ds->assisted_learning_on_cpu_port = true; + ds->mtu_enforcement_ingress = true; + +- /* Flush the FDB table */ +- ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL); +- if (ret < 0) +- return ret; +- + return 0; + } + +-- +2.39.2 + diff --git a/queue-5.15/net-mlx5e-fix-sq-wake-logic-in-ptp-napi_poll-context.patch b/queue-5.15/net-mlx5e-fix-sq-wake-logic-in-ptp-napi_poll-context.patch new file mode 100644 index 00000000000..768998d3ccc --- /dev/null +++ b/queue-5.15/net-mlx5e-fix-sq-wake-logic-in-ptp-napi_poll-context.patch @@ -0,0 +1,90 @@ +From bb5bd0966de5549d74aa2eefb61c3b50df6e9999 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 Feb 2023 16:18:48 -0800 +Subject: net/mlx5e: Fix SQ wake logic in ptp napi_poll context + +From: Rahul Rameshbabu + +[ Upstream commit 7aa50380191635e5897a773f272829cc961a2be5 ] + +Check in the mlx5e_ptp_poll_ts_cq context if the ptp tx sq should be woken +up. Before change, the ptp tx sq may never wake up if the ptp tx ts skb +fifo is full when mlx5e_poll_tx_cq checks if the queue should be woken up. + +Fixes: 1880bc4e4a96 ("net/mlx5e: Add TX port timestamp support") +Signed-off-by: Rahul Rameshbabu +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + .../net/ethernet/mellanox/mlx5/core/en/ptp.c | 2 ++ + .../net/ethernet/mellanox/mlx5/core/en/txrx.h | 2 ++ + .../net/ethernet/mellanox/mlx5/core/en_tx.c | 19 ++++++++++++------- + 3 files changed, 16 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +index 3a86f66d12955..ee95cc3a03786 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +@@ -126,6 +126,8 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget) + /* ensure cq space is freed before enabling more cqes */ + wmb(); + ++ mlx5e_txqsq_wake(&ptpsq->txqsq); ++ + return work_done == budget; + } + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +index f5c872043bcbd..cf62d1f6d7f20 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +@@ -172,6 +172,8 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size) + return pi; + } + ++void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq); ++ + struct mlx5e_icosq_wqe_info { + u8 wqe_type; + u8 num_wqebbs; +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +index e18fa5ae0fd84..6813279b57f89 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +@@ -810,6 +810,17 @@ static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_t + } + } + ++void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq) ++{ ++ if (netif_tx_queue_stopped(sq->txq) && ++ mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && ++ mlx5e_ptpsq_fifo_has_room(sq) && ++ !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { ++ netif_tx_wake_queue(sq->txq); ++ sq->stats->wake++; ++ } ++} ++ + bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) + { + struct mlx5e_sq_stats *stats; +@@ -909,13 +920,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) + + netdev_tx_completed_queue(sq->txq, npkts, nbytes); + +- if (netif_tx_queue_stopped(sq->txq) && +- mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && +- mlx5e_ptpsq_fifo_has_room(sq) && +- !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { +- netif_tx_wake_queue(sq->txq); +- stats->wake++; +- } ++ mlx5e_txqsq_wake(sq); + + return (i == MLX5E_TX_CQ_POLL_BUDGET); + } +-- +2.39.2 + diff --git a/queue-5.15/net-page_pool-use-in_softirq-instead.patch b/queue-5.15/net-page_pool-use-in_softirq-instead.patch new file mode 100644 index 00000000000..57f4140e8c0 --- /dev/null +++ b/queue-5.15/net-page_pool-use-in_softirq-instead.patch @@ -0,0 +1,75 @@ +From f7d31332f760cbbbccb887894f6a0e7fd7001de0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Feb 2023 09:16:11 +0800 +Subject: net: page_pool: use in_softirq() instead + +From: Qingfang DENG + +[ Upstream commit 542bcea4be866b14b3a5c8e90773329066656c43 ] + +We use BH context only for synchronization, so we don't care if it's +actually serving softirq or not. + +As a side node, in case of threaded NAPI, in_serving_softirq() will +return false because it's in process context with BH off, making +page_pool_recycle_in_cache() unreachable. + +Signed-off-by: Qingfang DENG +Tested-by: Felix Fietkau +Signed-off-by: David S. Miller +Stable-dep-of: 368d3cb406cd ("page_pool: fix inconsistency for page_pool_ring_[un]lock()") +Signed-off-by: Sasha Levin +--- + include/net/page_pool.h | 4 ++-- + net/core/page_pool.c | 6 +++--- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/include/net/page_pool.h b/include/net/page_pool.h +index a4082406a0039..80d987419436e 100644 +--- a/include/net/page_pool.h ++++ b/include/net/page_pool.h +@@ -285,7 +285,7 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) + static inline void page_pool_ring_lock(struct page_pool *pool) + __acquires(&pool->ring.producer_lock) + { +- if (in_serving_softirq()) ++ if (in_softirq()) + spin_lock(&pool->ring.producer_lock); + else + spin_lock_bh(&pool->ring.producer_lock); +@@ -294,7 +294,7 @@ static inline void page_pool_ring_lock(struct page_pool *pool) + static inline void page_pool_ring_unlock(struct page_pool *pool) + __releases(&pool->ring.producer_lock) + { +- if (in_serving_softirq()) ++ if (in_softirq()) + spin_unlock(&pool->ring.producer_lock); + else + spin_unlock_bh(&pool->ring.producer_lock); +diff --git a/net/core/page_pool.c b/net/core/page_pool.c +index 1a6978427d6c8..1d520fa1b98a8 100644 +--- a/net/core/page_pool.c ++++ b/net/core/page_pool.c +@@ -390,8 +390,8 @@ static void page_pool_return_page(struct page_pool *pool, struct page *page) + static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page) + { + int ret; +- /* BH protection not needed if current is serving softirq */ +- if (in_serving_softirq()) ++ /* BH protection not needed if current is softirq */ ++ if (in_softirq()) + ret = ptr_ring_produce(&pool->ring, page); + else + ret = ptr_ring_produce_bh(&pool->ring, page); +@@ -446,7 +446,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, + page_pool_dma_sync_for_device(pool, page, + dma_sync_size); + +- if (allow_direct && in_serving_softirq() && ++ if (allow_direct && in_softirq() && + page_pool_recycle_in_cache(page, pool)) + return NULL; + +-- +2.39.2 + diff --git a/queue-5.15/page_pool-fix-inconsistency-for-page_pool_ring_-un-l.patch b/queue-5.15/page_pool-fix-inconsistency-for-page_pool_ring_-un-l.patch new file mode 100644 index 00000000000..dcbc096f03d --- /dev/null +++ b/queue-5.15/page_pool-fix-inconsistency-for-page_pool_ring_-un-l.patch @@ -0,0 +1,126 @@ +From 90cd21baba9a4336765bccf5f9049d7bf45de7df Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 May 2023 11:17:14 +0800 +Subject: page_pool: fix inconsistency for page_pool_ring_[un]lock() + +From: Yunsheng Lin + +[ Upstream commit 368d3cb406cdd074d1df2ad9ec06d1bfcb664882 ] + +page_pool_ring_[un]lock() use in_softirq() to decide which +spin lock variant to use, and when they are called in the +context with in_softirq() being false, spin_lock_bh() is +called in page_pool_ring_lock() while spin_unlock() is +called in page_pool_ring_unlock(), because spin_lock_bh() +has disabled the softirq in page_pool_ring_lock(), which +causes inconsistency for spin lock pair calling. + +This patch fixes it by returning in_softirq state from +page_pool_producer_lock(), and use it to decide which +spin lock variant to use in page_pool_producer_unlock(). + +As pool->ring has both producer and consumer lock, so +rename it to page_pool_producer_[un]lock() to reflect +the actual usage. Also move them to page_pool.c as they +are only used there, and remove the 'inline' as the +compiler may have better idea to do inlining or not. + +Fixes: 7886244736a4 ("net: page_pool: Add bulk support for ptr_ring") +Signed-off-by: Yunsheng Lin +Acked-by: Jesper Dangaard Brouer +Acked-by: Ilias Apalodimas +Link: https://lore.kernel.org/r/20230522031714.5089-1-linyunsheng@huawei.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/page_pool.h | 18 ------------------ + net/core/page_pool.c | 28 ++++++++++++++++++++++++++-- + 2 files changed, 26 insertions(+), 20 deletions(-) + +diff --git a/include/net/page_pool.h b/include/net/page_pool.h +index 80d987419436e..edcc22605842e 100644 +--- a/include/net/page_pool.h ++++ b/include/net/page_pool.h +@@ -282,22 +282,4 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) + page_pool_update_nid(pool, new_nid); + } + +-static inline void page_pool_ring_lock(struct page_pool *pool) +- __acquires(&pool->ring.producer_lock) +-{ +- if (in_softirq()) +- spin_lock(&pool->ring.producer_lock); +- else +- spin_lock_bh(&pool->ring.producer_lock); +-} +- +-static inline void page_pool_ring_unlock(struct page_pool *pool) +- __releases(&pool->ring.producer_lock) +-{ +- if (in_softirq()) +- spin_unlock(&pool->ring.producer_lock); +- else +- spin_unlock_bh(&pool->ring.producer_lock); +-} +- + #endif /* _NET_PAGE_POOL_H */ +diff --git a/net/core/page_pool.c b/net/core/page_pool.c +index 1d520fa1b98a8..069d6ba0e33fb 100644 +--- a/net/core/page_pool.c ++++ b/net/core/page_pool.c +@@ -26,6 +26,29 @@ + + #define BIAS_MAX LONG_MAX + ++static bool page_pool_producer_lock(struct page_pool *pool) ++ __acquires(&pool->ring.producer_lock) ++{ ++ bool in_softirq = in_softirq(); ++ ++ if (in_softirq) ++ spin_lock(&pool->ring.producer_lock); ++ else ++ spin_lock_bh(&pool->ring.producer_lock); ++ ++ return in_softirq; ++} ++ ++static void page_pool_producer_unlock(struct page_pool *pool, ++ bool in_softirq) ++ __releases(&pool->ring.producer_lock) ++{ ++ if (in_softirq) ++ spin_unlock(&pool->ring.producer_lock); ++ else ++ spin_unlock_bh(&pool->ring.producer_lock); ++} ++ + static int page_pool_init(struct page_pool *pool, + const struct page_pool_params *params) + { +@@ -489,6 +512,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, + int count) + { + int i, bulk_len = 0; ++ bool in_softirq; + + for (i = 0; i < count; i++) { + struct page *page = virt_to_head_page(data[i]); +@@ -503,12 +527,12 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, + return; + + /* Bulk producer into ptr_ring page_pool cache */ +- page_pool_ring_lock(pool); ++ in_softirq = page_pool_producer_lock(pool); + for (i = 0; i < bulk_len; i++) { + if (__ptr_ring_produce(&pool->ring, data[i])) + break; /* ring full */ + } +- page_pool_ring_unlock(pool); ++ page_pool_producer_unlock(pool, in_softirq); + + /* Hopefully all pages was return into ptr_ring */ + if (likely(i == bulk_len)) +-- +2.39.2 + diff --git a/queue-5.15/series b/queue-5.15/series index 4459f8042f1..011c12ec102 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -11,3 +11,25 @@ net-mlx5-devcom-serialize-devcom-registration.patch platform-x86-isst-punit-device-mapping-with-sub-numa.patch platform-x86-isst-remove-8-socket-limit.patch net-phy-mscc-enable-vsc8501-2-rgmii-rx-clock.patch +dmaengine-at_xdmac-move-the-free-desc-to-the-tail-of.patch +dmaengine-at_xdmac-remove-a-level-of-indentation-in-.patch +dmaengine-at_xdmac-disable-enable-clock-directly-on-.patch +dmaengine-at_xdmac-do-not-resume-channels-paused-by-.patch +dmaengine-at_xdmac-restore-the-content-of-grws-regis.patch +kvm-s390-pv-add-export-before-import.patch +kvm-s390-fix-race-in-gmap_make_secure.patch +net-dsa-introduce-helpers-for-iterating-through-port.patch +net-dsa-mt7530-rework-mt753-01-_setup.patch +net-dsa-mt7530-split-off-common-parts-from-mt7531_se.patch +net-dsa-mt7530-fix-network-connectivity-with-multipl.patch +bonding-add-arp_missed_max-option.patch +bonding-fix-send_peer_notif-overflow.patch +binder-fix-uaf-caused-by-faulty-buffer-cleanup.patch +irqchip-mips-gic-get-rid-of-the-reliance-on-irq_cpu_.patch +irqchip-mips-gic-use-raw-spinlock-for-gic_lock.patch +net-mlx5e-fix-sq-wake-logic-in-ptp-napi_poll-context.patch +xdp-allow-registering-memory-model-without-rxq-refer.patch +net-page_pool-use-in_softirq-instead.patch +page_pool-fix-inconsistency-for-page_pool_ring_-un-l.patch +irqchip-mips-gic-don-t-touch-vl_map-if-a-local-inter.patch +xdp-xdp_mem_allocator-can-be-null-in-trace_mem_conne.patch diff --git a/queue-5.15/xdp-allow-registering-memory-model-without-rxq-refer.patch b/queue-5.15/xdp-allow-registering-memory-model-without-rxq-refer.patch new file mode 100644 index 00000000000..ee87dbf0efc --- /dev/null +++ b/queue-5.15/xdp-allow-registering-memory-model-without-rxq-refer.patch @@ -0,0 +1,214 @@ +From 9c05056ec28e7a8350265d383203f2fc5366d05d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 3 Jan 2022 16:08:06 +0100 +Subject: xdp: Allow registering memory model without rxq reference +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Toke Høiland-Jørgensen + +[ Upstream commit 4a48ef70b93b8c7ed5190adfca18849e76387b80 ] + +The functions that register an XDP memory model take a struct xdp_rxq as +parameter, but the RXQ is not actually used for anything other than pulling +out the struct xdp_mem_info that it embeds. So refactor the register +functions and export variants that just take a pointer to the xdp_mem_info. + +This is in preparation for enabling XDP_REDIRECT in bpf_prog_run(), using a +page_pool instance that is not connected to any network device. + +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: Alexei Starovoitov +Link: https://lore.kernel.org/bpf/20220103150812.87914-2-toke@redhat.com +Stable-dep-of: 368d3cb406cd ("page_pool: fix inconsistency for page_pool_ring_[un]lock()") +Signed-off-by: Sasha Levin +--- + include/net/xdp.h | 3 ++ + net/core/xdp.c | 92 +++++++++++++++++++++++++++++++---------------- + 2 files changed, 65 insertions(+), 30 deletions(-) + +diff --git a/include/net/xdp.h b/include/net/xdp.h +index ad5b02dcb6f4c..b2ac69cb30b3d 100644 +--- a/include/net/xdp.h ++++ b/include/net/xdp.h +@@ -260,6 +260,9 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); + int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, + enum xdp_mem_type type, void *allocator); + void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq); ++int xdp_reg_mem_model(struct xdp_mem_info *mem, ++ enum xdp_mem_type type, void *allocator); ++void xdp_unreg_mem_model(struct xdp_mem_info *mem); + + /* Drivers not supporting XDP metadata can use this helper, which + * rejects any room expansion for metadata as a result. +diff --git a/net/core/xdp.c b/net/core/xdp.c +index cc92ccb384325..5ed2e9d5a3191 100644 +--- a/net/core/xdp.c ++++ b/net/core/xdp.c +@@ -110,20 +110,15 @@ static void mem_allocator_disconnect(void *allocator) + mutex_unlock(&mem_id_lock); + } + +-void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) ++void xdp_unreg_mem_model(struct xdp_mem_info *mem) + { + struct xdp_mem_allocator *xa; +- int type = xdp_rxq->mem.type; +- int id = xdp_rxq->mem.id; ++ int type = mem->type; ++ int id = mem->id; + + /* Reset mem info to defaults */ +- xdp_rxq->mem.id = 0; +- xdp_rxq->mem.type = 0; +- +- if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { +- WARN(1, "Missing register, driver bug"); +- return; +- } ++ mem->id = 0; ++ mem->type = 0; + + if (id == 0) + return; +@@ -135,6 +130,17 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) + rcu_read_unlock(); + } + } ++EXPORT_SYMBOL_GPL(xdp_unreg_mem_model); ++ ++void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) ++{ ++ if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { ++ WARN(1, "Missing register, driver bug"); ++ return; ++ } ++ ++ xdp_unreg_mem_model(&xdp_rxq->mem); ++} + EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model); + + void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) +@@ -261,28 +267,24 @@ static bool __is_supported_mem_type(enum xdp_mem_type type) + return true; + } + +-int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, +- enum xdp_mem_type type, void *allocator) ++static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem, ++ enum xdp_mem_type type, ++ void *allocator) + { + struct xdp_mem_allocator *xdp_alloc; + gfp_t gfp = GFP_KERNEL; + int id, errno, ret; + void *ptr; + +- if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { +- WARN(1, "Missing register, driver bug"); +- return -EFAULT; +- } +- + if (!__is_supported_mem_type(type)) +- return -EOPNOTSUPP; ++ return ERR_PTR(-EOPNOTSUPP); + +- xdp_rxq->mem.type = type; ++ mem->type = type; + + if (!allocator) { + if (type == MEM_TYPE_PAGE_POOL) +- return -EINVAL; /* Setup time check page_pool req */ +- return 0; ++ return ERR_PTR(-EINVAL); /* Setup time check page_pool req */ ++ return NULL; + } + + /* Delay init of rhashtable to save memory if feature isn't used */ +@@ -292,13 +294,13 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, + mutex_unlock(&mem_id_lock); + if (ret < 0) { + WARN_ON(1); +- return ret; ++ return ERR_PTR(ret); + } + } + + xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp); + if (!xdp_alloc) +- return -ENOMEM; ++ return ERR_PTR(-ENOMEM); + + mutex_lock(&mem_id_lock); + id = __mem_id_cyclic_get(gfp); +@@ -306,15 +308,15 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, + errno = id; + goto err; + } +- xdp_rxq->mem.id = id; +- xdp_alloc->mem = xdp_rxq->mem; ++ mem->id = id; ++ xdp_alloc->mem = *mem; + xdp_alloc->allocator = allocator; + + /* Insert allocator into ID lookup table */ + ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node); + if (IS_ERR(ptr)) { +- ida_simple_remove(&mem_id_pool, xdp_rxq->mem.id); +- xdp_rxq->mem.id = 0; ++ ida_simple_remove(&mem_id_pool, mem->id); ++ mem->id = 0; + errno = PTR_ERR(ptr); + goto err; + } +@@ -324,13 +326,43 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, + + mutex_unlock(&mem_id_lock); + +- trace_mem_connect(xdp_alloc, xdp_rxq); +- return 0; ++ return xdp_alloc; + err: + mutex_unlock(&mem_id_lock); + kfree(xdp_alloc); +- return errno; ++ return ERR_PTR(errno); ++} ++ ++int xdp_reg_mem_model(struct xdp_mem_info *mem, ++ enum xdp_mem_type type, void *allocator) ++{ ++ struct xdp_mem_allocator *xdp_alloc; ++ ++ xdp_alloc = __xdp_reg_mem_model(mem, type, allocator); ++ if (IS_ERR(xdp_alloc)) ++ return PTR_ERR(xdp_alloc); ++ return 0; ++} ++EXPORT_SYMBOL_GPL(xdp_reg_mem_model); ++ ++int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, ++ enum xdp_mem_type type, void *allocator) ++{ ++ struct xdp_mem_allocator *xdp_alloc; ++ ++ if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { ++ WARN(1, "Missing register, driver bug"); ++ return -EFAULT; ++ } ++ ++ xdp_alloc = __xdp_reg_mem_model(&xdp_rxq->mem, type, allocator); ++ if (IS_ERR(xdp_alloc)) ++ return PTR_ERR(xdp_alloc); ++ ++ trace_mem_connect(xdp_alloc, xdp_rxq); ++ return 0; + } ++ + EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); + + /* XDP RX runs under NAPI protection, and in different delivery error +-- +2.39.2 + diff --git a/queue-5.15/xdp-xdp_mem_allocator-can-be-null-in-trace_mem_conne.patch b/queue-5.15/xdp-xdp_mem_allocator-can-be-null-in-trace_mem_conne.patch new file mode 100644 index 00000000000..74c08d3660c --- /dev/null +++ b/queue-5.15/xdp-xdp_mem_allocator-can-be-null-in-trace_mem_conne.patch @@ -0,0 +1,53 @@ +From da98b03bf70e200143f86b1f18a48fd68368eccc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Mar 2022 23:13:45 +0100 +Subject: xdp: xdp_mem_allocator can be NULL in trace_mem_connect(). +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Sebastian Andrzej Siewior + +[ Upstream commit e0ae713023a9d09d6e1b454bdc8e8c1dd32c586e ] + +Since the commit mentioned below __xdp_reg_mem_model() can return a NULL +pointer. This pointer is dereferenced in trace_mem_connect() which leads +to segfault. + +The trace points (mem_connect + mem_disconnect) were put in place to +pair connect/disconnect using the IDs. The ID is only assigned if +__xdp_reg_mem_model() does not return NULL. That connect trace point is +of no use if there is no ID. + +Skip that connect trace point if xdp_alloc is NULL. + +[ Toke Høiland-Jørgensen delivered the reasoning for skipping the trace + point ] + +Fixes: 4a48ef70b93b8 ("xdp: Allow registering memory model without rxq reference") +Signed-off-by: Sebastian Andrzej Siewior +Acked-by: Toke Høiland-Jørgensen +Link: https://lore.kernel.org/r/YikmmXsffE+QajTB@linutronix.de +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/core/xdp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/core/xdp.c b/net/core/xdp.c +index 5ed2e9d5a3191..a3e3d2538a3a8 100644 +--- a/net/core/xdp.c ++++ b/net/core/xdp.c +@@ -359,7 +359,8 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, + if (IS_ERR(xdp_alloc)) + return PTR_ERR(xdp_alloc); + +- trace_mem_connect(xdp_alloc, xdp_rxq); ++ if (trace_mem_connect_enabled() && xdp_alloc) ++ trace_mem_connect(xdp_alloc, xdp_rxq); + return 0; + } + +-- +2.39.2 +