From: Sasha Levin Date: Sat, 26 Aug 2023 13:50:13 +0000 (-0400) Subject: Fixes for 6.4 X-Git-Tag: v6.1.49~76 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=cbdd3809b497dc2ffe3a9a83c2f17ee23ca6f066;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.4 Signed-off-by: Sasha Levin --- diff --git a/queue-6.4/bonding-fix-macvlan-over-alb-bond-support.patch b/queue-6.4/bonding-fix-macvlan-over-alb-bond-support.patch new file mode 100644 index 00000000000..657ae0e7574 --- /dev/null +++ b/queue-6.4/bonding-fix-macvlan-over-alb-bond-support.patch @@ -0,0 +1,90 @@ +From 4134048f4cf458baf72a737205b9ee327476c780 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Aug 2023 15:19:04 +0800 +Subject: bonding: fix macvlan over alb bond support + +From: Hangbin Liu + +[ Upstream commit e74216b8def3803e98ae536de78733e9d7f3b109 ] + +The commit 14af9963ba1e ("bonding: Support macvlans on top of tlb/rlb mode +bonds") aims to enable the use of macvlans on top of rlb bond mode. However, +the current rlb bond mode only handles ARP packets to update remote neighbor +entries. This causes an issue when a macvlan is on top of the bond, and +remote devices send packets to the macvlan using the bond's MAC address +as the destination. After delivering the packets to the macvlan, the macvlan +will rejects them as the MAC address is incorrect. Consequently, this commit +makes macvlan over bond non-functional. + +To address this problem, one potential solution is to check for the presence +of a macvlan port on the bond device using netif_is_macvlan_port(bond->dev) +and return NULL in the rlb_arp_xmit() function. However, this approach +doesn't fully resolve the situation when a VLAN exists between the bond and +macvlan. + +So let's just do a partial revert for commit 14af9963ba1e in rlb_arp_xmit(). +As the comment said, Don't modify or load balance ARPs that do not originate +locally. + +Fixes: 14af9963ba1e ("bonding: Support macvlans on top of tlb/rlb mode bonds") +Reported-by: susan.zheng@veritas.com +Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2117816 +Signed-off-by: Hangbin Liu +Acked-by: Jay Vosburgh +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/bonding/bond_alb.c | 6 +++--- + include/net/bonding.h | 11 +---------- + 2 files changed, 4 insertions(+), 13 deletions(-) + +diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c +index b9dbad3a8af82..fc5da5d7744da 100644 +--- a/drivers/net/bonding/bond_alb.c ++++ b/drivers/net/bonding/bond_alb.c +@@ -660,10 +660,10 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) + return NULL; + arp = (struct arp_pkt *)skb_network_header(skb); + +- /* Don't modify or load balance ARPs that do not originate locally +- * (e.g.,arrive via a bridge). ++ /* Don't modify or load balance ARPs that do not originate ++ * from the bond itself or a VLAN directly above the bond. + */ +- if (!bond_slave_has_mac_rx(bond, arp->mac_src)) ++ if (!bond_slave_has_mac_rcu(bond, arp->mac_src)) + return NULL; + + dev = ip_dev_find(dev_net(bond->dev), arp->ip_src); +diff --git a/include/net/bonding.h b/include/net/bonding.h +index 59955ac331578..6e4e406d8cd20 100644 +--- a/include/net/bonding.h ++++ b/include/net/bonding.h +@@ -724,23 +724,14 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond, + } + + /* Caller must hold rcu_read_lock() for read */ +-static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac) ++static inline bool bond_slave_has_mac_rcu(struct bonding *bond, const u8 *mac) + { + struct list_head *iter; + struct slave *tmp; +- struct netdev_hw_addr *ha; + + bond_for_each_slave_rcu(bond, tmp, iter) + if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) + return true; +- +- if (netdev_uc_empty(bond->dev)) +- return false; +- +- netdev_for_each_uc_addr(ha, bond->dev) +- if (ether_addr_equal_64bits(mac, ha->addr)) +- return true; +- + return false; + } + +-- +2.40.1 + diff --git a/queue-6.4/can-isotp-fix-support-for-transmission-of-sf-without.patch b/queue-6.4/can-isotp-fix-support-for-transmission-of-sf-without.patch new file mode 100644 index 00000000000..e7072adfa71 --- /dev/null +++ b/queue-6.4/can-isotp-fix-support-for-transmission-of-sf-without.patch @@ -0,0 +1,87 @@ +From 1c28b760514dbf8dd906454740f54a316f3b1aab Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Aug 2023 16:45:46 +0200 +Subject: can: isotp: fix support for transmission of SF without flow control + +From: Oliver Hartkopp + +[ Upstream commit 0bfe71159230bab79ee230225ae12ffecbb69f3e ] + +The original implementation had a very simple handling for single frame +transmissions as it just sent the single frame without a timeout handling. + +With the new echo frame handling the echo frame was also introduced for +single frames but the former exception ('simple without timers') has been +maintained by accident. This leads to a 1 second timeout when closing the +socket and to an -ECOMM error when CAN_ISOTP_WAIT_TX_DONE is selected. + +As the echo handling is always active (also for single frames) remove the +wrong extra condition for single frames. + +Fixes: 9f39d36530e5 ("can: isotp: add support for transmission without flow control") +Signed-off-by: Oliver Hartkopp +Link: https://lore.kernel.org/r/20230821144547.6658-2-socketcan@hartkopp.net +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/can/isotp.c | 22 +++++++--------------- + 1 file changed, 7 insertions(+), 15 deletions(-) + +diff --git a/net/can/isotp.c b/net/can/isotp.c +index ca9d728d6d727..9d498a886a586 100644 +--- a/net/can/isotp.c ++++ b/net/can/isotp.c +@@ -188,12 +188,6 @@ static bool isotp_register_rxid(struct isotp_sock *so) + return (isotp_bc_flags(so) == 0); + } + +-static bool isotp_register_txecho(struct isotp_sock *so) +-{ +- /* all modes but SF_BROADCAST register for tx echo skbs */ +- return (isotp_bc_flags(so) != CAN_ISOTP_SF_BROADCAST); +-} +- + static enum hrtimer_restart isotp_rx_timer_handler(struct hrtimer *hrtimer) + { + struct isotp_sock *so = container_of(hrtimer, struct isotp_sock, +@@ -1209,7 +1203,7 @@ static int isotp_release(struct socket *sock) + lock_sock(sk); + + /* remove current filters & unregister */ +- if (so->bound && isotp_register_txecho(so)) { ++ if (so->bound) { + if (so->ifindex) { + struct net_device *dev; + +@@ -1332,14 +1326,12 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) + can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id), + isotp_rcv, sk, "isotp", sk); + +- if (isotp_register_txecho(so)) { +- /* no consecutive frame echo skb in flight */ +- so->cfecho = 0; ++ /* no consecutive frame echo skb in flight */ ++ so->cfecho = 0; + +- /* register for echo skb's */ +- can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id), +- isotp_rcv_echo, sk, "isotpe", sk); +- } ++ /* register for echo skb's */ ++ can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id), ++ isotp_rcv_echo, sk, "isotpe", sk); + + dev_put(dev); + +@@ -1560,7 +1552,7 @@ static void isotp_notify(struct isotp_sock *so, unsigned long msg, + case NETDEV_UNREGISTER: + lock_sock(sk); + /* remove current filters & unregister */ +- if (so->bound && isotp_register_txecho(so)) { ++ if (so->bound) { + if (isotp_register_rxid(so)) + can_rx_unregister(dev_net(dev), dev, so->rxid, + SINGLE_MASK(so->rxid), +-- +2.40.1 + diff --git a/queue-6.4/can-raw-fix-lockdep-issue-in-raw_release.patch b/queue-6.4/can-raw-fix-lockdep-issue-in-raw_release.patch new file mode 100644 index 00000000000..6f953f6b839 --- /dev/null +++ b/queue-6.4/can-raw-fix-lockdep-issue-in-raw_release.patch @@ -0,0 +1,159 @@ +From c38281d865eab2948e2fa494f49aa64676c99c00 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 20 Jul 2023 11:44:38 +0000 +Subject: can: raw: fix lockdep issue in raw_release() + +From: Eric Dumazet + +[ Upstream commit 11c9027c983e9e4b408ee5613b6504d24ebd85be ] + +syzbot complained about a lockdep issue [1] + +Since raw_bind() and raw_setsockopt() first get RTNL +before locking the socket, we must adopt the same order in raw_release() + +[1] +WARNING: possible circular locking dependency detected +6.5.0-rc1-syzkaller-00192-g78adb4bcf99e #0 Not tainted +------------------------------------------------------ +syz-executor.0/14110 is trying to acquire lock: +ffff88804e4b6130 (sk_lock-AF_CAN){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1708 [inline] +ffff88804e4b6130 (sk_lock-AF_CAN){+.+.}-{0:0}, at: raw_bind+0xb1/0xab0 net/can/raw.c:435 + +but task is already holding lock: +ffffffff8e3df368 (rtnl_mutex){+.+.}-{3:3}, at: raw_bind+0xa7/0xab0 net/can/raw.c:434 + +which lock already depends on the new lock. + +the existing dependency chain (in reverse order) is: + +-> #1 (rtnl_mutex){+.+.}-{3:3}: +__mutex_lock_common kernel/locking/mutex.c:603 [inline] +__mutex_lock+0x181/0x1340 kernel/locking/mutex.c:747 +raw_release+0x1c6/0x9b0 net/can/raw.c:391 +__sock_release+0xcd/0x290 net/socket.c:654 +sock_close+0x1c/0x20 net/socket.c:1386 +__fput+0x3fd/0xac0 fs/file_table.c:384 +task_work_run+0x14d/0x240 kernel/task_work.c:179 +resume_user_mode_work include/linux/resume_user_mode.h:49 [inline] +exit_to_user_mode_loop kernel/entry/common.c:171 [inline] +exit_to_user_mode_prepare+0x210/0x240 kernel/entry/common.c:204 +__syscall_exit_to_user_mode_work kernel/entry/common.c:286 [inline] +syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:297 +do_syscall_64+0x44/0xb0 arch/x86/entry/common.c:86 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +-> #0 (sk_lock-AF_CAN){+.+.}-{0:0}: +check_prev_add kernel/locking/lockdep.c:3142 [inline] +check_prevs_add kernel/locking/lockdep.c:3261 [inline] +validate_chain kernel/locking/lockdep.c:3876 [inline] +__lock_acquire+0x2e3d/0x5de0 kernel/locking/lockdep.c:5144 +lock_acquire kernel/locking/lockdep.c:5761 [inline] +lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5726 +lock_sock_nested+0x3a/0xf0 net/core/sock.c:3492 +lock_sock include/net/sock.h:1708 [inline] +raw_bind+0xb1/0xab0 net/can/raw.c:435 +__sys_bind+0x1ec/0x220 net/socket.c:1792 +__do_sys_bind net/socket.c:1803 [inline] +__se_sys_bind net/socket.c:1801 [inline] +__x64_sys_bind+0x72/0xb0 net/socket.c:1801 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +other info that might help us debug this: + +Possible unsafe locking scenario: + +CPU0 CPU1 +---- ---- +lock(rtnl_mutex); + lock(sk_lock-AF_CAN); + lock(rtnl_mutex); +lock(sk_lock-AF_CAN); + +*** DEADLOCK *** + +1 lock held by syz-executor.0/14110: + +stack backtrace: +CPU: 0 PID: 14110 Comm: syz-executor.0 Not tainted 6.5.0-rc1-syzkaller-00192-g78adb4bcf99e #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/03/2023 +Call Trace: + +__dump_stack lib/dump_stack.c:88 [inline] +dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 +check_noncircular+0x311/0x3f0 kernel/locking/lockdep.c:2195 +check_prev_add kernel/locking/lockdep.c:3142 [inline] +check_prevs_add kernel/locking/lockdep.c:3261 [inline] +validate_chain kernel/locking/lockdep.c:3876 [inline] +__lock_acquire+0x2e3d/0x5de0 kernel/locking/lockdep.c:5144 +lock_acquire kernel/locking/lockdep.c:5761 [inline] +lock_acquire+0x1ae/0x510 kernel/locking/lockdep.c:5726 +lock_sock_nested+0x3a/0xf0 net/core/sock.c:3492 +lock_sock include/net/sock.h:1708 [inline] +raw_bind+0xb1/0xab0 net/can/raw.c:435 +__sys_bind+0x1ec/0x220 net/socket.c:1792 +__do_sys_bind net/socket.c:1803 [inline] +__se_sys_bind net/socket.c:1801 [inline] +__x64_sys_bind+0x72/0xb0 net/socket.c:1801 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd +RIP: 0033:0x7fd89007cb29 +Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007fd890d2a0c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000031 +RAX: ffffffffffffffda RBX: 00007fd89019bf80 RCX: 00007fd89007cb29 +RDX: 0000000000000010 RSI: 0000000020000040 RDI: 0000000000000003 +RBP: 00007fd8900c847a R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 +R13: 000000000000000b R14: 00007fd89019bf80 R15: 00007ffebf8124f8 + + +Fixes: ee8b94c8510c ("can: raw: fix receiver memory leak") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Cc: Ziyang Xuan +Cc: Oliver Hartkopp +Cc: stable@vger.kernel.org +Cc: Marc Kleine-Budde +Link: https://lore.kernel.org/all/20230720114438.172434-1-edumazet@google.com +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Sasha Levin +--- + net/can/raw.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/net/can/raw.c b/net/can/raw.c +index 9fdad12d16325..9fbbf6e00287f 100644 +--- a/net/can/raw.c ++++ b/net/can/raw.c +@@ -386,9 +386,9 @@ static int raw_release(struct socket *sock) + list_del(&ro->notifier); + spin_unlock(&raw_notifier_lock); + ++ rtnl_lock(); + lock_sock(sk); + +- rtnl_lock(); + /* remove current filters & unregister */ + if (ro->bound) { + if (ro->dev) +@@ -405,12 +405,13 @@ static int raw_release(struct socket *sock) + ro->dev = NULL; + ro->count = 0; + free_percpu(ro->uniq); +- rtnl_unlock(); + + sock_orphan(sk); + sock->sk = NULL; + + release_sock(sk); ++ rtnl_unlock(); ++ + sock_put(sk); + + return 0; +-- +2.40.1 + diff --git a/queue-6.4/can-raw-fix-receiver-memory-leak.patch b/queue-6.4/can-raw-fix-receiver-memory-leak.patch new file mode 100644 index 00000000000..5014394e17b --- /dev/null +++ b/queue-6.4/can-raw-fix-receiver-memory-leak.patch @@ -0,0 +1,238 @@ +From 1fe544bce4b5ccec0dfed362e629ab1dec72c0c2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Jul 2023 09:17:37 +0800 +Subject: can: raw: fix receiver memory leak + +From: Ziyang Xuan + +[ Upstream commit ee8b94c8510ce64afe0b87ef548d23e00915fb10 ] + +Got kmemleak errors with the following ltp can_filter testcase: + +for ((i=1; i<=100; i++)) +do + ./can_filter & + sleep 0.1 +done + +============================================================== +[<00000000db4a4943>] can_rx_register+0x147/0x360 [can] +[<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw] +[<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0 +[<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70 +[<00000000fd468496>] do_syscall_64+0x33/0x40 +[<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6 + +It's a bug in the concurrent scenario of unregister_netdevice_many() +and raw_release() as following: + + cpu0 cpu1 +unregister_netdevice_many(can_dev) + unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this + net_set_todo(can_dev) + raw_release(can_socket) + dev = dev_get_by_index(, ro->ifindex); // dev == NULL + if (dev) { // receivers in dev_rcv_lists not free because dev is NULL + raw_disable_allfilters(, dev, ); + dev_put(dev); + } + ... + ro->bound = 0; + ... + +call_netdevice_notifiers(NETDEV_UNREGISTER, ) + raw_notify(, NETDEV_UNREGISTER, ) + if (ro->bound) // invalid because ro->bound has been set 0 + raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed + +Add a net_device pointer member in struct raw_sock to record bound +can_dev, and use rtnl_lock to serialize raw_socket members between +raw_bind(), raw_release(), raw_setsockopt() and raw_notify(). Use +ro->dev to decide whether to free receivers in dev_rcv_lists. + +Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier") +Reviewed-by: Oliver Hartkopp +Acked-by: Oliver Hartkopp +Signed-off-by: Ziyang Xuan +Link: https://lore.kernel.org/all/20230711011737.1969582-1-william.xuanziyang@huawei.com +Cc: stable@vger.kernel.org +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Sasha Levin +--- + net/can/raw.c | 57 ++++++++++++++++++++++----------------------------- + 1 file changed, 24 insertions(+), 33 deletions(-) + +diff --git a/net/can/raw.c b/net/can/raw.c +index f8e3866157a33..9fdad12d16325 100644 +--- a/net/can/raw.c ++++ b/net/can/raw.c +@@ -84,6 +84,7 @@ struct raw_sock { + struct sock sk; + int bound; + int ifindex; ++ struct net_device *dev; + struct list_head notifier; + int loopback; + int recv_own_msgs; +@@ -277,7 +278,7 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg, + if (!net_eq(dev_net(dev), sock_net(sk))) + return; + +- if (ro->ifindex != dev->ifindex) ++ if (ro->dev != dev) + return; + + switch (msg) { +@@ -292,6 +293,7 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg, + + ro->ifindex = 0; + ro->bound = 0; ++ ro->dev = NULL; + ro->count = 0; + release_sock(sk); + +@@ -337,6 +339,7 @@ static int raw_init(struct sock *sk) + + ro->bound = 0; + ro->ifindex = 0; ++ ro->dev = NULL; + + /* set default filter to single entry dfilter */ + ro->dfilter.can_id = 0; +@@ -385,19 +388,13 @@ static int raw_release(struct socket *sock) + + lock_sock(sk); + ++ rtnl_lock(); + /* remove current filters & unregister */ + if (ro->bound) { +- if (ro->ifindex) { +- struct net_device *dev; +- +- dev = dev_get_by_index(sock_net(sk), ro->ifindex); +- if (dev) { +- raw_disable_allfilters(dev_net(dev), dev, sk); +- dev_put(dev); +- } +- } else { ++ if (ro->dev) ++ raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk); ++ else + raw_disable_allfilters(sock_net(sk), NULL, sk); +- } + } + + if (ro->count > 1) +@@ -405,8 +402,10 @@ static int raw_release(struct socket *sock) + + ro->ifindex = 0; + ro->bound = 0; ++ ro->dev = NULL; + ro->count = 0; + free_percpu(ro->uniq); ++ rtnl_unlock(); + + sock_orphan(sk); + sock->sk = NULL; +@@ -422,6 +421,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) + struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; + struct sock *sk = sock->sk; + struct raw_sock *ro = raw_sk(sk); ++ struct net_device *dev = NULL; + int ifindex; + int err = 0; + int notify_enetdown = 0; +@@ -431,14 +431,13 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) + if (addr->can_family != AF_CAN) + return -EINVAL; + ++ rtnl_lock(); + lock_sock(sk); + + if (ro->bound && addr->can_ifindex == ro->ifindex) + goto out; + + if (addr->can_ifindex) { +- struct net_device *dev; +- + dev = dev_get_by_index(sock_net(sk), addr->can_ifindex); + if (!dev) { + err = -ENODEV; +@@ -467,26 +466,20 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) + if (!err) { + if (ro->bound) { + /* unregister old filters */ +- if (ro->ifindex) { +- struct net_device *dev; +- +- dev = dev_get_by_index(sock_net(sk), +- ro->ifindex); +- if (dev) { +- raw_disable_allfilters(dev_net(dev), +- dev, sk); +- dev_put(dev); +- } +- } else { ++ if (ro->dev) ++ raw_disable_allfilters(dev_net(ro->dev), ++ ro->dev, sk); ++ else + raw_disable_allfilters(sock_net(sk), NULL, sk); +- } + } + ro->ifindex = ifindex; + ro->bound = 1; ++ ro->dev = dev; + } + + out: + release_sock(sk); ++ rtnl_unlock(); + + if (notify_enetdown) { + sk->sk_err = ENETDOWN; +@@ -553,9 +546,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, + rtnl_lock(); + lock_sock(sk); + +- if (ro->bound && ro->ifindex) { +- dev = dev_get_by_index(sock_net(sk), ro->ifindex); +- if (!dev) { ++ dev = ro->dev; ++ if (ro->bound && dev) { ++ if (dev->reg_state != NETREG_REGISTERED) { + if (count > 1) + kfree(filter); + err = -ENODEV; +@@ -596,7 +589,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, + ro->count = count; + + out_fil: +- dev_put(dev); + release_sock(sk); + rtnl_unlock(); + +@@ -614,9 +606,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, + rtnl_lock(); + lock_sock(sk); + +- if (ro->bound && ro->ifindex) { +- dev = dev_get_by_index(sock_net(sk), ro->ifindex); +- if (!dev) { ++ dev = ro->dev; ++ if (ro->bound && dev) { ++ if (dev->reg_state != NETREG_REGISTERED) { + err = -ENODEV; + goto out_err; + } +@@ -640,7 +632,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, + ro->err_mask = err_mask; + + out_err: +- dev_put(dev); + release_sock(sk); + rtnl_unlock(); + +-- +2.40.1 + diff --git a/queue-6.4/dccp-annotate-data-races-in-dccp_poll.patch b/queue-6.4/dccp-annotate-data-races-in-dccp_poll.patch new file mode 100644 index 00000000000..c2cd2d073a0 --- /dev/null +++ b/queue-6.4/dccp-annotate-data-races-in-dccp_poll.patch @@ -0,0 +1,82 @@ +From 508b85fc89563c7933e1f60e1bfeb76add98c68b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Aug 2023 01:58:20 +0000 +Subject: dccp: annotate data-races in dccp_poll() + +From: Eric Dumazet + +[ Upstream commit cba3f1786916063261e3e5ccbb803abc325b24ef ] + +We changed tcp_poll() over time, bug never updated dccp. + +Note that we also could remove dccp instead of maintaining it. + +Fixes: 7c657876b63c ("[DCCP]: Initial implementation") +Signed-off-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230818015820.2701595-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/dccp/proto.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/net/dccp/proto.c b/net/dccp/proto.c +index 18873f2308ec8..f3494cb5fab04 100644 +--- a/net/dccp/proto.c ++++ b/net/dccp/proto.c +@@ -315,11 +315,15 @@ EXPORT_SYMBOL_GPL(dccp_disconnect); + __poll_t dccp_poll(struct file *file, struct socket *sock, + poll_table *wait) + { +- __poll_t mask; + struct sock *sk = sock->sk; ++ __poll_t mask; ++ u8 shutdown; ++ int state; + + sock_poll_wait(file, sock, wait); +- if (sk->sk_state == DCCP_LISTEN) ++ ++ state = inet_sk_state_load(sk); ++ if (state == DCCP_LISTEN) + return inet_csk_listen_poll(sk); + + /* Socket is not locked. We are protected from async events +@@ -328,20 +332,21 @@ __poll_t dccp_poll(struct file *file, struct socket *sock, + */ + + mask = 0; +- if (sk->sk_err) ++ if (READ_ONCE(sk->sk_err)) + mask = EPOLLERR; ++ shutdown = READ_ONCE(sk->sk_shutdown); + +- if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED) ++ if (shutdown == SHUTDOWN_MASK || state == DCCP_CLOSED) + mask |= EPOLLHUP; +- if (sk->sk_shutdown & RCV_SHUTDOWN) ++ if (shutdown & RCV_SHUTDOWN) + mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; + + /* Connected? */ +- if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { ++ if ((1 << state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { + if (atomic_read(&sk->sk_rmem_alloc) > 0) + mask |= EPOLLIN | EPOLLRDNORM; + +- if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { ++ if (!(shutdown & SEND_SHUTDOWN)) { + if (sk_stream_is_writeable(sk)) { + mask |= EPOLLOUT | EPOLLWRNORM; + } else { /* send SIGIO later */ +@@ -359,7 +364,6 @@ __poll_t dccp_poll(struct file *file, struct socket *sock, + } + return mask; + } +- + EXPORT_SYMBOL_GPL(dccp_poll); + + int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) +-- +2.40.1 + diff --git a/queue-6.4/devlink-add-missing-unregister-linecard-notification.patch b/queue-6.4/devlink-add-missing-unregister-linecard-notification.patch new file mode 100644 index 00000000000..55eae4fc705 --- /dev/null +++ b/queue-6.4/devlink-add-missing-unregister-linecard-notification.patch @@ -0,0 +1,46 @@ +From f0f3a27144a2bbef33c56e5b17b2f6e52fa13b92 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Aug 2023 14:52:40 +0200 +Subject: devlink: add missing unregister linecard notification + +From: Jiri Pirko + +[ Upstream commit 2ebbc9752d06bb1d01201fe632cb6da033b0248d ] + +Cited fixes commit introduced linecard notifications for register, +however it didn't add them for unregister. Fix that by adding them. + +Fixes: c246f9b5fd61 ("devlink: add support to create line card and expose to user") +Signed-off-by: Jiri Pirko +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20230817125240.2144794-1-jiri@resnulli.us +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/devlink/leftover.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c +index 790e61b2a9404..6ef6090eeffe5 100644 +--- a/net/devlink/leftover.c ++++ b/net/devlink/leftover.c +@@ -6739,6 +6739,7 @@ void devlink_notify_unregister(struct devlink *devlink) + struct devlink_param_item *param_item; + struct devlink_trap_item *trap_item; + struct devlink_port *devlink_port; ++ struct devlink_linecard *linecard; + struct devlink_rate *rate_node; + struct devlink_region *region; + unsigned long port_index; +@@ -6767,6 +6768,8 @@ void devlink_notify_unregister(struct devlink *devlink) + + xa_for_each(&devlink->ports, port_index, devlink_port) + devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); ++ list_for_each_entry_reverse(linecard, &devlink->linecard_list, list) ++ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL); + devlink_notify(devlink, DEVLINK_CMD_DEL); + } + +-- +2.40.1 + diff --git a/queue-6.4/i40e-fix-potential-null-pointer-dereferencing-of-pf-.patch b/queue-6.4/i40e-fix-potential-null-pointer-dereferencing-of-pf-.patch new file mode 100644 index 00000000000..559c6a1c762 --- /dev/null +++ b/queue-6.4/i40e-fix-potential-null-pointer-dereferencing-of-pf-.patch @@ -0,0 +1,52 @@ +From c226a24a48c7fc35faa9f331f02245c79f651c45 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Aug 2023 15:16:53 -0700 +Subject: i40e: fix potential NULL pointer dereferencing of pf->vf + i40e_sync_vsi_filters() + +From: Andrii Staikov + +[ Upstream commit 9525a3c38accd2e186f52443e35e633e296cc7f5 ] + +Add check for pf->vf not being NULL before dereferencing +pf->vf[vsi->vf_id] in updating VSI filter sync. +Add a similar check before dereferencing !pf->vf[vsi->vf_id].trusted +in the condition for clearing promisc mode bit. + +Fixes: c87c938f62d8 ("i40e: Add VF VLAN pruning") +Signed-off-by: Andrii Staikov +Signed-off-by: Aleksandr Loktionov +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/i40e/i40e_main.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c +index b847bd105b16e..5d21cb4ef6301 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_main.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c +@@ -2615,7 +2615,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) + retval = i40e_correct_mac_vlan_filters + (vsi, &tmp_add_list, &tmp_del_list, + vlan_filters); +- else ++ else if (pf->vf) + retval = i40e_correct_vf_mac_vlan_filters + (vsi, &tmp_add_list, &tmp_del_list, + vlan_filters, pf->vf[vsi->vf_id].trusted); +@@ -2788,7 +2788,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) + } + + /* if the VF is not trusted do not do promisc */ +- if ((vsi->type == I40E_VSI_SRIOV) && !pf->vf[vsi->vf_id].trusted) { ++ if (vsi->type == I40E_VSI_SRIOV && pf->vf && ++ !pf->vf[vsi->vf_id].trusted) { + clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); + goto out; + } +-- +2.40.1 + diff --git a/queue-6.4/ice-fix-null-pointer-deref-during-vf-reset.patch b/queue-6.4/ice-fix-null-pointer-deref-during-vf-reset.patch new file mode 100644 index 00000000000..288f01e4a1d --- /dev/null +++ b/queue-6.4/ice-fix-null-pointer-deref-during-vf-reset.patch @@ -0,0 +1,132 @@ +From 22289ea808424cf0513d03ccb9443819d9d270da Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 11 Aug 2023 10:07:02 +0200 +Subject: ice: Fix NULL pointer deref during VF reset + +From: Petr Oros + +[ Upstream commit 67f6317dfa609846a227a706532439a22828c24b ] + +During stress test with attaching and detaching VF from KVM and +simultaneously changing VFs spoofcheck and trust there was a +NULL pointer dereference in ice_reset_vf that VF's VSI is null. + +More than one instance of ice_reset_vf() can be running at a given +time. When we rebuild the VSI in ice_reset_vf, another reset can be +triaged from ice_service_task. In this case we can access the currently +uninitialized VSI and cause panic. The window for this racing condition +has been around for a long time but it's much worse after commit +227bf4500aaa ("ice: move VSI delete outside deconfig") because +the reset runs faster. ice_reset_vf() using vf->cfg_lock and when +we move this lock before accessing to the VF VSI, we can fix +BUG for all cases. + +Panic occurs sometimes in ice_vsi_is_rx_queue_active() and sometimes +in ice_vsi_stop_all_rx_rings() + +With our reproducer, we can hit BUG: +~8h before commit 227bf4500aaa ("ice: move VSI delete outside deconfig"). +~20m after commit 227bf4500aaa ("ice: move VSI delete outside deconfig"). +After this fix we are not able to reproduce it after ~48h + +There was commit cf90b74341ee ("ice: Fix call trace with null VSI during +VF reset") which also tried to fix this issue, but it was only +partially resolved and the bug still exists. + +[ 6420.658415] BUG: kernel NULL pointer dereference, address: 0000000000000000 +[ 6420.665382] #PF: supervisor read access in kernel mode +[ 6420.670521] #PF: error_code(0x0000) - not-present page +[ 6420.675659] PGD 0 +[ 6420.677679] Oops: 0000 [#1] PREEMPT SMP NOPTI +[ 6420.682038] CPU: 53 PID: 326472 Comm: kworker/53:0 Kdump: loaded Not tainted 5.14.0-317.el9.x86_64 #1 +[ 6420.691250] Hardware name: Dell Inc. PowerEdge R750/04V528, BIOS 1.6.5 04/15/2022 +[ 6420.698729] Workqueue: ice ice_service_task [ice] +[ 6420.703462] RIP: 0010:ice_vsi_is_rx_queue_active+0x2d/0x60 [ice] +[ 6420.705860] ice 0000:ca:00.0: VF 0 is now untrusted +[ 6420.709494] Code: 00 00 66 83 bf 76 04 00 00 00 48 8b 77 10 74 3e 31 c0 eb 0f 0f b7 97 76 04 00 00 48 83 c0 01 39 c2 7e 2b 48 8b 97 68 04 00 00 <0f> b7 0c 42 48 8b 96 20 13 00 00 48 8d 94 8a 00 00 12 00 8b 12 83 +[ 6420.714426] ice 0000:ca:00.0 ens7f0: Setting MAC 22:22:22:22:22:00 on VF 0. VF driver will be reinitialized +[ 6420.733120] RSP: 0018:ff778d2ff383fdd8 EFLAGS: 00010246 +[ 6420.733123] RAX: 0000000000000000 RBX: ff2acf1916294000 RCX: 0000000000000000 +[ 6420.733125] RDX: 0000000000000000 RSI: ff2acf1f2c6401a0 RDI: ff2acf1a27301828 +[ 6420.762346] RBP: ff2acf1a27301828 R08: 0000000000000010 R09: 0000000000001000 +[ 6420.769476] R10: ff2acf1916286000 R11: 00000000019eba3f R12: ff2acf19066460d0 +[ 6420.776611] R13: ff2acf1f2c6401a0 R14: ff2acf1f2c6401a0 R15: 00000000ffffffff +[ 6420.783742] FS: 0000000000000000(0000) GS:ff2acf28ffa80000(0000) knlGS:0000000000000000 +[ 6420.791829] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 6420.797575] CR2: 0000000000000000 CR3: 00000016ad410003 CR4: 0000000000773ee0 +[ 6420.804708] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 6420.811034] vfio-pci 0000:ca:01.0: enabling device (0000 -> 0002) +[ 6420.811840] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 6420.811841] PKRU: 55555554 +[ 6420.811842] Call Trace: +[ 6420.811843] +[ 6420.811844] ice_reset_vf+0x9a/0x450 [ice] +[ 6420.811876] ice_process_vflr_event+0x8f/0xc0 [ice] +[ 6420.841343] ice_service_task+0x23b/0x600 [ice] +[ 6420.845884] ? __schedule+0x212/0x550 +[ 6420.849550] process_one_work+0x1e2/0x3b0 +[ 6420.853563] ? rescuer_thread+0x390/0x390 +[ 6420.857577] worker_thread+0x50/0x3a0 +[ 6420.861242] ? rescuer_thread+0x390/0x390 +[ 6420.865253] kthread+0xdd/0x100 +[ 6420.868400] ? kthread_complete_and_exit+0x20/0x20 +[ 6420.873194] ret_from_fork+0x1f/0x30 +[ 6420.876774] +[ 6420.878967] Modules linked in: vfio_pci vfio_pci_core vfio_iommu_type1 vfio iavf vhost_net vhost vhost_iotlb tap tun xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nft_counter nf_tables bridge stp llc sctp ip6_udp_tunnel udp_tunnel nfp tls nfnetlink bluetooth mlx4_en mlx4_core rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs rfkill sunrpc intel_rapl_msr intel_rapl_common i10nm_edac nfit libnvdimm ipmi_ssif x86_pkg_temp_thermal intel_powerclamp coretemp irdma kvm_intel i40e kvm iTCO_wdt dcdbas ib_uverbs irqbypass iTCO_vendor_support mgag200 mei_me ib_core dell_smbios isst_if_mmio isst_if_mbox_pci rapl i2c_algo_bit drm_shmem_helper intel_cstate drm_kms_helper syscopyarea sysfillrect isst_if_common sysimgblt intel_uncore fb_sys_fops dell_wmi_descriptor wmi_bmof intel_vsec mei i2c_i801 acpi_ipmi ipmi_si i2c_smbus ipmi_devintf intel_pch_thermal acpi_power_meter pcspk + r + +Fixes: efe41860008e ("ice: Fix memory corruption in VF driver") +Fixes: f23df5220d2b ("ice: Fix spurious interrupt during removal of trusted VF") +Signed-off-by: Petr Oros +Reviewed-by: Simon Horman +Reviewed-by: Przemek Kitszel +Reviewed-by: Jacob Keller +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_vf_lib.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c +index 89fd6982df093..14da7ebaaead7 100644 +--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c ++++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c +@@ -612,11 +612,17 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) + return 0; + } + ++ if (flags & ICE_VF_RESET_LOCK) ++ mutex_lock(&vf->cfg_lock); ++ else ++ lockdep_assert_held(&vf->cfg_lock); ++ + if (ice_is_vf_disabled(vf)) { + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + dev_dbg(dev, "VF is already removed\n"); +- return -EINVAL; ++ err = -EINVAL; ++ goto out_unlock; + } + ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id); + +@@ -625,14 +631,9 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) + + dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n", + vf->vf_id); +- return 0; ++ goto out_unlock; + } + +- if (flags & ICE_VF_RESET_LOCK) +- mutex_lock(&vf->cfg_lock); +- else +- lockdep_assert_held(&vf->cfg_lock); +- + /* Set VF disable bit state here, before triggering reset */ + set_bit(ICE_VF_STATE_DIS, vf->vf_states); + ice_trigger_vf_reset(vf, flags & ICE_VF_RESET_VFLR, false); +-- +2.40.1 + diff --git a/queue-6.4/ice-fix-receive-buffer-size-miscalculation.patch b/queue-6.4/ice-fix-receive-buffer-size-miscalculation.patch new file mode 100644 index 00000000000..afa1235322d --- /dev/null +++ b/queue-6.4/ice-fix-receive-buffer-size-miscalculation.patch @@ -0,0 +1,50 @@ +From 9affd2d7345938475ca68db170e3599052faed11 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 10 Aug 2023 16:51:10 -0700 +Subject: ice: fix receive buffer size miscalculation + +From: Jesse Brandeburg + +[ Upstream commit 10083aef784031fa9f06c19a1b182e6fad5338d9 ] + +The driver is misconfiguring the hardware for some values of MTU such that +it could use multiple descriptors to receive a packet when it could have +simply used one. + +Change the driver to use a round-up instead of the result of a shift, as +the shift can truncate the lower bits of the size, and result in the +problem noted above. It also aligns this driver with similar code in i40e. + +The insidiousness of this problem is that everything works with the wrong +size, it's just not working as well as it could, as some MTU sizes end up +using two or more descriptors, and there is no way to tell that is +happening without looking at ice_trace or a bus analyzer. + +Fixes: efc2214b6047 ("ice: Add support for XDP") +Reviewed-by: Przemek Kitszel +Signed-off-by: Jesse Brandeburg +Reviewed-by: Leon Romanovsky +Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_base.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c +index 619cb07a40691..25e09ab708ca1 100644 +--- a/drivers/net/ethernet/intel/ice/ice_base.c ++++ b/drivers/net/ethernet/intel/ice/ice_base.c +@@ -393,7 +393,8 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) + /* Receive Packet Data Buffer Size. + * The Packet Data Buffer Size is defined in 128 byte units. + */ +- rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S; ++ rlan_ctx.dbuf = DIV_ROUND_UP(ring->rx_buf_len, ++ BIT_ULL(ICE_RLAN_CTX_DBUF_S)); + + /* use 32 byte descriptors */ + rlan_ctx.dsize = 1; +-- +2.40.1 + diff --git a/queue-6.4/igb-avoid-starting-unnecessary-workqueues.patch b/queue-6.4/igb-avoid-starting-unnecessary-workqueues.patch new file mode 100644 index 00000000000..aa975d7a8a8 --- /dev/null +++ b/queue-6.4/igb-avoid-starting-unnecessary-workqueues.patch @@ -0,0 +1,91 @@ +From f04b3f2decf21383673ed398623c8006ced9ba2c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Aug 2023 10:19:27 -0700 +Subject: igb: Avoid starting unnecessary workqueues + +From: Alessio Igor Bogani + +[ Upstream commit b888c510f7b3d64ca75fc0f43b4a4bd1a611312f ] + +If ptp_clock_register() fails or CONFIG_PTP isn't enabled, avoid starting +PTP related workqueues. + +In this way we can fix this: + BUG: unable to handle page fault for address: ffffc9000440b6f8 + #PF: supervisor read access in kernel mode + #PF: error_code(0x0000) - not-present page + PGD 100000067 P4D 100000067 PUD 1001e0067 PMD 107dc5067 PTE 0 + Oops: 0000 [#1] PREEMPT SMP + [...] + Workqueue: events igb_ptp_overflow_check + RIP: 0010:igb_rd32+0x1f/0x60 + [...] + Call Trace: + igb_ptp_read_82580+0x20/0x50 + timecounter_read+0x15/0x60 + igb_ptp_overflow_check+0x1a/0x50 + process_one_work+0x1cb/0x3c0 + worker_thread+0x53/0x3f0 + ? rescuer_thread+0x370/0x370 + kthread+0x142/0x160 + ? kthread_associate_blkcg+0xc0/0xc0 + ret_from_fork+0x1f/0x30 + +Fixes: 1f6e8178d685 ("igb: Prevent dropped Tx timestamps via work items and interrupts.") +Fixes: d339b1331616 ("igb: add PTP Hardware Clock code") +Signed-off-by: Alessio Igor Bogani +Tested-by: Arpana Arland (A Contingent worker at Intel) +Signed-off-by: Tony Nguyen +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20230821171927.2203644-1-anthony.l.nguyen@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igb/igb_ptp.c | 24 ++++++++++++------------ + 1 file changed, 12 insertions(+), 12 deletions(-) + +diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c +index 405886ee52615..319c544b9f04c 100644 +--- a/drivers/net/ethernet/intel/igb/igb_ptp.c ++++ b/drivers/net/ethernet/intel/igb/igb_ptp.c +@@ -1385,18 +1385,6 @@ void igb_ptp_init(struct igb_adapter *adapter) + return; + } + +- spin_lock_init(&adapter->tmreg_lock); +- INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); +- +- if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK) +- INIT_DELAYED_WORK(&adapter->ptp_overflow_work, +- igb_ptp_overflow_check); +- +- adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; +- adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; +- +- igb_ptp_reset(adapter); +- + adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, + &adapter->pdev->dev); + if (IS_ERR(adapter->ptp_clock)) { +@@ -1406,6 +1394,18 @@ void igb_ptp_init(struct igb_adapter *adapter) + dev_info(&adapter->pdev->dev, "added PHC on %s\n", + adapter->netdev->name); + adapter->ptp_flags |= IGB_PTP_ENABLED; ++ ++ spin_lock_init(&adapter->tmreg_lock); ++ INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); ++ ++ if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK) ++ INIT_DELAYED_WORK(&adapter->ptp_overflow_work, ++ igb_ptp_overflow_check); ++ ++ adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; ++ adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; ++ ++ igb_ptp_reset(adapter); + } + } + +-- +2.40.1 + diff --git a/queue-6.4/igc-fix-the-typo-in-the-ptm-control-macro.patch b/queue-6.4/igc-fix-the-typo-in-the-ptm-control-macro.patch new file mode 100644 index 00000000000..cd45f921d40 --- /dev/null +++ b/queue-6.4/igc-fix-the-typo-in-the-ptm-control-macro.patch @@ -0,0 +1,43 @@ +From d9d1fe8712e454507bd55a5d5b68c2552dcdea68 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Aug 2023 10:17:21 -0700 +Subject: igc: Fix the typo in the PTM Control macro + +From: Sasha Neftin + +[ Upstream commit de43975721b97283d5f17eea4228faddf08f2681 ] + +The IGC_PTM_CTRL_SHRT_CYC defines the time between two consecutive PTM +requests. The bit resolution of this field is six bits. That bit five was +missing in the mask. This patch comes to correct the typo in the +IGC_PTM_CTRL_SHRT_CYC macro. + +Fixes: a90ec8483732 ("igc: Add support for PTP getcrosststamp()") +Signed-off-by: Sasha Neftin +Tested-by: Naama Meir +Signed-off-by: Tony Nguyen +Reviewed-by: Simon Horman +Reviewed-by: Kalesh AP +Link: https://lore.kernel.org/r/20230821171721.2203572-1-anthony.l.nguyen@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/igc/igc_defines.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h +index 44a5070299465..2f780cc90883c 100644 +--- a/drivers/net/ethernet/intel/igc/igc_defines.h ++++ b/drivers/net/ethernet/intel/igc/igc_defines.h +@@ -546,7 +546,7 @@ + #define IGC_PTM_CTRL_START_NOW BIT(29) /* Start PTM Now */ + #define IGC_PTM_CTRL_EN BIT(30) /* Enable PTM */ + #define IGC_PTM_CTRL_TRIG BIT(31) /* PTM Cycle trigger */ +-#define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x2f) << 2) ++#define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x3f) << 2) + #define IGC_PTM_CTRL_PTM_TO(usec) (((usec) & 0xff) << 8) + + #define IGC_PTM_SHORT_CYC_DEFAULT 10 /* Default Short/interrupted cycle interval */ +-- +2.40.1 + diff --git a/queue-6.4/ipv4-fix-data-races-around-inet-inet_id.patch b/queue-6.4/ipv4-fix-data-races-around-inet-inet_id.patch new file mode 100644 index 00000000000..285bf7bf8e1 --- /dev/null +++ b/queue-6.4/ipv4-fix-data-races-around-inet-inet_id.patch @@ -0,0 +1,228 @@ +From 5fc1939a7228d769ce7570979667fb75d525d6d3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 19 Aug 2023 03:17:07 +0000 +Subject: ipv4: fix data-races around inet->inet_id + +From: Eric Dumazet + +[ Upstream commit f866fbc842de5976e41ba874b76ce31710b634b5 ] + +UDP sendmsg() is lockless, so ip_select_ident_segs() +can very well be run from multiple cpus [1] + +Convert inet->inet_id to an atomic_t, but implement +a dedicated path for TCP, avoiding cost of a locked +instruction (atomic_add_return()) + +Note that this patch will cause a trivial merge conflict +because we added inet->flags in net-next tree. + +v2: added missing change in +drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +(David Ahern) + +[1] + +BUG: KCSAN: data-race in __ip_make_skb / __ip_make_skb + +read-write to 0xffff888145af952a of 2 bytes by task 7803 on cpu 1: +ip_select_ident_segs include/net/ip.h:542 [inline] +ip_select_ident include/net/ip.h:556 [inline] +__ip_make_skb+0x844/0xc70 net/ipv4/ip_output.c:1446 +ip_make_skb+0x233/0x2c0 net/ipv4/ip_output.c:1560 +udp_sendmsg+0x1199/0x1250 net/ipv4/udp.c:1260 +inet_sendmsg+0x63/0x80 net/ipv4/af_inet.c:830 +sock_sendmsg_nosec net/socket.c:725 [inline] +sock_sendmsg net/socket.c:748 [inline] +____sys_sendmsg+0x37c/0x4d0 net/socket.c:2494 +___sys_sendmsg net/socket.c:2548 [inline] +__sys_sendmmsg+0x269/0x500 net/socket.c:2634 +__do_sys_sendmmsg net/socket.c:2663 [inline] +__se_sys_sendmmsg net/socket.c:2660 [inline] +__x64_sys_sendmmsg+0x57/0x60 net/socket.c:2660 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +read to 0xffff888145af952a of 2 bytes by task 7804 on cpu 0: +ip_select_ident_segs include/net/ip.h:541 [inline] +ip_select_ident include/net/ip.h:556 [inline] +__ip_make_skb+0x817/0xc70 net/ipv4/ip_output.c:1446 +ip_make_skb+0x233/0x2c0 net/ipv4/ip_output.c:1560 +udp_sendmsg+0x1199/0x1250 net/ipv4/udp.c:1260 +inet_sendmsg+0x63/0x80 net/ipv4/af_inet.c:830 +sock_sendmsg_nosec net/socket.c:725 [inline] +sock_sendmsg net/socket.c:748 [inline] +____sys_sendmsg+0x37c/0x4d0 net/socket.c:2494 +___sys_sendmsg net/socket.c:2548 [inline] +__sys_sendmmsg+0x269/0x500 net/socket.c:2634 +__do_sys_sendmmsg net/socket.c:2663 [inline] +__se_sys_sendmmsg net/socket.c:2660 [inline] +__x64_sys_sendmmsg+0x57/0x60 net/socket.c:2660 +do_syscall_x64 arch/x86/entry/common.c:50 [inline] +do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80 +entry_SYSCALL_64_after_hwframe+0x63/0xcd + +value changed: 0x184d -> 0x184e + +Reported by Kernel Concurrency Sanitizer on: +CPU: 0 PID: 7804 Comm: syz-executor.1 Not tainted 6.5.0-rc6-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023 +================================================================== + +Fixes: 23f57406b82d ("ipv4: avoid using shared IP generator for connected sockets") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../chelsio/inline_crypto/chtls/chtls_cm.c | 2 +- + include/net/inet_sock.h | 2 +- + include/net/ip.h | 15 +++++++++++++-- + net/dccp/ipv4.c | 4 ++-- + net/ipv4/af_inet.c | 2 +- + net/ipv4/datagram.c | 2 +- + net/ipv4/tcp_ipv4.c | 4 ++-- + net/sctp/socket.c | 2 +- + 8 files changed, 22 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +index c2e7037c7ba1c..7750702900fa6 100644 +--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c ++++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +@@ -1466,7 +1466,7 @@ static void make_established(struct sock *sk, u32 snd_isn, unsigned int opt) + tp->write_seq = snd_isn; + tp->snd_nxt = snd_isn; + tp->snd_una = snd_isn; +- inet_sk(sk)->inet_id = get_random_u16(); ++ atomic_set(&inet_sk(sk)->inet_id, get_random_u16()); + assign_rxopt(sk, opt); + + if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10)) +diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h +index 0bb32bfc61832..491ceb7ebe5d1 100644 +--- a/include/net/inet_sock.h ++++ b/include/net/inet_sock.h +@@ -222,8 +222,8 @@ struct inet_sock { + __s16 uc_ttl; + __u16 cmsg_flags; + struct ip_options_rcu __rcu *inet_opt; ++ atomic_t inet_id; + __be16 inet_sport; +- __u16 inet_id; + + __u8 tos; + __u8 min_ttl; +diff --git a/include/net/ip.h b/include/net/ip.h +index 530e7257e4389..1872f570abeda 100644 +--- a/include/net/ip.h ++++ b/include/net/ip.h +@@ -532,8 +532,19 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, + * generator as much as we can. + */ + if (sk && inet_sk(sk)->inet_daddr) { +- iph->id = htons(inet_sk(sk)->inet_id); +- inet_sk(sk)->inet_id += segs; ++ int val; ++ ++ /* avoid atomic operations for TCP, ++ * as we hold socket lock at this point. ++ */ ++ if (sk_is_tcp(sk)) { ++ sock_owned_by_me(sk); ++ val = atomic_read(&inet_sk(sk)->inet_id); ++ atomic_set(&inet_sk(sk)->inet_id, val + segs); ++ } else { ++ val = atomic_add_return(segs, &inet_sk(sk)->inet_id); ++ } ++ iph->id = htons(val); + return; + } + if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { +diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c +index 3ab68415d121c..e7b9703bd1a1a 100644 +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -130,7 +130,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) + inet->inet_daddr, + inet->inet_sport, + inet->inet_dport); +- inet->inet_id = get_random_u16(); ++ atomic_set(&inet->inet_id, get_random_u16()); + + err = dccp_connect(sk); + rt = NULL; +@@ -432,7 +432,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, + RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); + newinet->mc_index = inet_iif(skb); + newinet->mc_ttl = ip_hdr(skb)->ttl; +- newinet->inet_id = get_random_u16(); ++ atomic_set(&newinet->inet_id, get_random_u16()); + + if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) + goto put_and_exit; +diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c +index 10ebe39dcc873..9dde8e842befe 100644 +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -340,7 +340,7 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, + else + inet->pmtudisc = IP_PMTUDISC_WANT; + +- inet->inet_id = 0; ++ atomic_set(&inet->inet_id, 0); + + sock_init_data(sock, sk); + +diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c +index 4d1af0cd7d99e..cb5dbee9e018f 100644 +--- a/net/ipv4/datagram.c ++++ b/net/ipv4/datagram.c +@@ -73,7 +73,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len + reuseport_has_conns_set(sk); + sk->sk_state = TCP_ESTABLISHED; + sk_set_txhash(sk); +- inet->inet_id = get_random_u16(); ++ atomic_set(&inet->inet_id, get_random_u16()); + + sk_dst_set(sk, &rt->dst); + err = 0; +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +index 498dd4acdeec8..caecb4d1e424a 100644 +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -312,7 +312,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) + inet->inet_daddr)); + } + +- inet->inet_id = get_random_u16(); ++ atomic_set(&inet->inet_id, get_random_u16()); + + if (tcp_fastopen_defer_connect(sk, &err)) + return err; +@@ -1596,7 +1596,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, + inet_csk(newsk)->icsk_ext_hdr_len = 0; + if (inet_opt) + inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; +- newinet->inet_id = get_random_u16(); ++ atomic_set(&newinet->inet_id, get_random_u16()); + + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). +diff --git a/net/sctp/socket.c b/net/sctp/socket.c +index de52045774303..d77561d97a1ed 100644 +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -9479,7 +9479,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, + newinet->inet_rcv_saddr = inet->inet_rcv_saddr; + newinet->inet_dport = htons(asoc->peer.port); + newinet->pmtudisc = inet->pmtudisc; +- newinet->inet_id = get_random_u16(); ++ atomic_set(&newinet->inet_id, get_random_u16()); + + newinet->uc_ttl = inet->uc_ttl; + newinet->mc_loop = 1; +-- +2.40.1 + diff --git a/queue-6.4/ipvlan-fix-a-reference-count-leak-warning-in-ipvlan_.patch b/queue-6.4/ipvlan-fix-a-reference-count-leak-warning-in-ipvlan_.patch new file mode 100644 index 00000000000..4fec5236f43 --- /dev/null +++ b/queue-6.4/ipvlan-fix-a-reference-count-leak-warning-in-ipvlan_.patch @@ -0,0 +1,90 @@ +From 3b5e4cdf5ec8dcd075940c706a313015d2bcd8e4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Aug 2023 22:54:49 +0800 +Subject: ipvlan: Fix a reference count leak warning in ipvlan_ns_exit() + +From: Lu Wei + +[ Upstream commit 043d5f68d0ccdda91029b4b6dce7eeffdcfad281 ] + +There are two network devices(veth1 and veth3) in ns1, and ipvlan1 with +L3S mode and ipvlan2 with L2 mode are created based on them as +figure (1). In this case, ipvlan_register_nf_hook() will be called to +register nf hook which is needed by ipvlans in L3S mode in ns1 and value +of ipvl_nf_hook_refcnt is set to 1. + +(1) + ns1 ns2 + ------------ ------------ + + veth1--ipvlan1 (L3S) + + veth3--ipvlan2 (L2) + +(2) + ns1 ns2 + ------------ ------------ + + veth1--ipvlan1 (L3S) + + ipvlan2 (L2) veth3 + | | + |------->-------->--------->-------- + migrate + +When veth3 migrates from ns1 to ns2 as figure (2), veth3 will register in +ns2 and calls call_netdevice_notifiers with NETDEV_REGISTER event: + +dev_change_net_namespace + call_netdevice_notifiers + ipvlan_device_event + ipvlan_migrate_l3s_hook + ipvlan_register_nf_hook(newnet) (I) + ipvlan_unregister_nf_hook(oldnet) (II) + +In function ipvlan_migrate_l3s_hook(), ipvl_nf_hook_refcnt in ns1 is not 0 +since veth1 with ipvlan1 still in ns1, (I) and (II) will be called to +register nf_hook in ns2 and unregister nf_hook in ns1. As a result, +ipvl_nf_hook_refcnt in ns1 is decreased incorrectly and this in ns2 +is increased incorrectly. When the second net namespace is removed, a +reference count leak warning in ipvlan_ns_exit() will be triggered. + +This patch add a check before ipvlan_migrate_l3s_hook() is called. The +warning can be triggered as follows: + +$ ip netns add ns1 +$ ip netns add ns2 +$ ip netns exec ns1 ip link add veth1 type veth peer name veth2 +$ ip netns exec ns1 ip link add veth3 type veth peer name veth4 +$ ip netns exec ns1 ip link add ipv1 link veth1 type ipvlan mode l3s +$ ip netns exec ns1 ip link add ipv2 link veth3 type ipvlan mode l2 +$ ip netns exec ns1 ip link set veth3 netns ns2 +$ ip net del ns2 + +Fixes: 3133822f5ac1 ("ipvlan: use pernet operations and restrict l3s hooks to master netns") +Signed-off-by: Lu Wei +Reviewed-by: Florian Westphal +Link: https://lore.kernel.org/r/20230817145449.141827-1-luwei32@huawei.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ipvlan/ipvlan_main.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c +index b15dd9a3ad540..1b55928e89b8a 100644 +--- a/drivers/net/ipvlan/ipvlan_main.c ++++ b/drivers/net/ipvlan/ipvlan_main.c +@@ -748,7 +748,8 @@ static int ipvlan_device_event(struct notifier_block *unused, + + write_pnet(&port->pnet, newnet); + +- ipvlan_migrate_l3s_hook(oldnet, newnet); ++ if (port->mode == IPVLAN_MODE_L3S) ++ ipvlan_migrate_l3s_hook(oldnet, newnet); + break; + } + case NETDEV_UNREGISTER: +-- +2.40.1 + diff --git a/queue-6.4/jbd2-fix-a-race-when-checking-checkpoint-buffer-busy.patch b/queue-6.4/jbd2-fix-a-race-when-checking-checkpoint-buffer-busy.patch new file mode 100644 index 00000000000..adaf5558cf5 --- /dev/null +++ b/queue-6.4/jbd2-fix-a-race-when-checking-checkpoint-buffer-busy.patch @@ -0,0 +1,150 @@ +From 37e69eb558e6e706e84f5e6a1d4bdb4a354e0714 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Jun 2023 21:59:27 +0800 +Subject: jbd2: fix a race when checking checkpoint buffer busy + +From: Zhang Yi + +[ Upstream commit 46f881b5b1758dc4a35fba4a643c10717d0cf427 ] + +Before removing checkpoint buffer from the t_checkpoint_list, we have to +check both BH_Dirty and BH_Lock bits together to distinguish buffers +have not been or were being written back. But __cp_buffer_busy() checks +them separately, it first check lock state and then check dirty, the +window between these two checks could be raced by writing back +procedure, which locks buffer and clears buffer dirty before I/O +completes. So it cannot guarantee checkpointing buffers been written +back to disk if some error happens later. Finally, it may clean +checkpoint transactions and lead to inconsistent filesystem. + +jbd2_journal_forget() and __journal_try_to_free_buffer() also have the +same problem (journal_unmap_buffer() escape from this issue since it's +running under the buffer lock), so fix them through introducing a new +helper to try holding the buffer lock and remove really clean buffer. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=217490 +Cc: stable@vger.kernel.org +Suggested-by: Jan Kara +Signed-off-by: Zhang Yi +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20230606135928.434610-6-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/jbd2/checkpoint.c | 38 +++++++++++++++++++++++++++++++++++--- + fs/jbd2/transaction.c | 17 +++++------------ + include/linux/jbd2.h | 1 + + 3 files changed, 41 insertions(+), 15 deletions(-) + +diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c +index 42b34cab64fbd..9ec91017a7f3c 100644 +--- a/fs/jbd2/checkpoint.c ++++ b/fs/jbd2/checkpoint.c +@@ -376,11 +376,15 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh, + jh = next_jh; + next_jh = jh->b_cpnext; + +- if (!destroy && __cp_buffer_busy(jh)) +- continue; ++ if (destroy) { ++ ret = __jbd2_journal_remove_checkpoint(jh); ++ } else { ++ ret = jbd2_journal_try_remove_checkpoint(jh); ++ if (ret < 0) ++ continue; ++ } + + nr_freed++; +- ret = __jbd2_journal_remove_checkpoint(jh); + if (ret) { + *released = true; + break; +@@ -616,6 +620,34 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) + return 1; + } + ++/* ++ * Check the checkpoint buffer and try to remove it from the checkpoint ++ * list if it's clean. Returns -EBUSY if it is not clean, returns 1 if ++ * it frees the transaction, 0 otherwise. ++ * ++ * This function is called with j_list_lock held. ++ */ ++int jbd2_journal_try_remove_checkpoint(struct journal_head *jh) ++{ ++ struct buffer_head *bh = jh2bh(jh); ++ ++ if (!trylock_buffer(bh)) ++ return -EBUSY; ++ if (buffer_dirty(bh)) { ++ unlock_buffer(bh); ++ return -EBUSY; ++ } ++ unlock_buffer(bh); ++ ++ /* ++ * Buffer is clean and the IO has finished (we held the buffer ++ * lock) so the checkpoint is done. We can safely remove the ++ * buffer from this transaction. ++ */ ++ JBUFFER_TRACE(jh, "remove from checkpoint list"); ++ return __jbd2_journal_remove_checkpoint(jh); ++} ++ + /* + * journal_insert_checkpoint: put a committed buffer onto a checkpoint + * list so that we know when it is safe to clean the transaction out of +diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c +index 18611241f4513..6ef5022949c46 100644 +--- a/fs/jbd2/transaction.c ++++ b/fs/jbd2/transaction.c +@@ -1784,8 +1784,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh) + * Otherwise, if the buffer has been written to disk, + * it is safe to remove the checkpoint and drop it. + */ +- if (!buffer_dirty(bh)) { +- __jbd2_journal_remove_checkpoint(jh); ++ if (jbd2_journal_try_remove_checkpoint(jh) >= 0) { + spin_unlock(&journal->j_list_lock); + goto drop; + } +@@ -2112,20 +2111,14 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) + + jh = bh2jh(bh); + +- if (buffer_locked(bh) || buffer_dirty(bh)) +- goto out; +- + if (jh->b_next_transaction != NULL || jh->b_transaction != NULL) +- goto out; ++ return; + + spin_lock(&journal->j_list_lock); +- if (jh->b_cp_transaction != NULL) { +- /* written-back checkpointed metadata buffer */ +- JBUFFER_TRACE(jh, "remove from checkpoint list"); +- __jbd2_journal_remove_checkpoint(jh); +- } ++ /* Remove written-back checkpointed metadata buffer */ ++ if (jh->b_cp_transaction != NULL) ++ jbd2_journal_try_remove_checkpoint(jh); + spin_unlock(&journal->j_list_lock); +-out: + return; + } + +diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h +index 91a2cf4bc5756..c212da35a052c 100644 +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -1443,6 +1443,7 @@ extern void jbd2_journal_commit_transaction(journal_t *); + void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy); + unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan); + int __jbd2_journal_remove_checkpoint(struct journal_head *); ++int jbd2_journal_try_remove_checkpoint(struct journal_head *jh); + void jbd2_journal_destroy_checkpoint(journal_t *journal); + void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); + +-- +2.40.1 + diff --git a/queue-6.4/jbd2-remove-journal_clean_one_cp_list.patch b/queue-6.4/jbd2-remove-journal_clean_one_cp_list.patch new file mode 100644 index 00000000000..ae8be59d749 --- /dev/null +++ b/queue-6.4/jbd2-remove-journal_clean_one_cp_list.patch @@ -0,0 +1,235 @@ +From d72ce54562db245715c0cde915b699e322fed0ef Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Jun 2023 21:59:25 +0800 +Subject: jbd2: remove journal_clean_one_cp_list() + +From: Zhang Yi + +[ Upstream commit b98dba273a0e47dbfade89c9af73c5b012a4eabb ] + +journal_clean_one_cp_list() and journal_shrink_one_cp_list() are almost +the same, so merge them into journal_shrink_one_cp_list(), remove the +nr_to_scan parameter, always scan and try to free the whole checkpoint +list. + +Signed-off-by: Zhang Yi +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20230606135928.434610-4-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 46f881b5b175 ("jbd2: fix a race when checking checkpoint buffer busy") +Signed-off-by: Sasha Levin +--- + fs/jbd2/checkpoint.c | 75 +++++++++---------------------------- + include/trace/events/jbd2.h | 12 ++---- + 2 files changed, 21 insertions(+), 66 deletions(-) + +diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c +index 723b4eb112828..42b34cab64fbd 100644 +--- a/fs/jbd2/checkpoint.c ++++ b/fs/jbd2/checkpoint.c +@@ -349,50 +349,10 @@ int jbd2_cleanup_journal_tail(journal_t *journal) + + /* Checkpoint list management */ + +-/* +- * journal_clean_one_cp_list +- * +- * Find all the written-back checkpoint buffers in the given list and +- * release them. If 'destroy' is set, clean all buffers unconditionally. +- * +- * Called with j_list_lock held. +- * Returns 1 if we freed the transaction, 0 otherwise. +- */ +-static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy) +-{ +- struct journal_head *last_jh; +- struct journal_head *next_jh = jh; +- +- if (!jh) +- return 0; +- +- last_jh = jh->b_cpprev; +- do { +- jh = next_jh; +- next_jh = jh->b_cpnext; +- +- if (!destroy && __cp_buffer_busy(jh)) +- return 0; +- +- if (__jbd2_journal_remove_checkpoint(jh)) +- return 1; +- /* +- * This function only frees up some memory +- * if possible so we dont have an obligation +- * to finish processing. Bail out if preemption +- * requested: +- */ +- if (need_resched()) +- return 0; +- } while (jh != last_jh); +- +- return 0; +-} +- + /* + * journal_shrink_one_cp_list + * +- * Find 'nr_to_scan' written-back checkpoint buffers in the given list ++ * Find all the written-back checkpoint buffers in the given list + * and try to release them. If the whole transaction is released, set + * the 'released' parameter. Return the number of released checkpointed + * buffers. +@@ -400,15 +360,15 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy) + * Called with j_list_lock held. + */ + static unsigned long journal_shrink_one_cp_list(struct journal_head *jh, +- unsigned long *nr_to_scan, +- bool *released) ++ bool destroy, bool *released) + { + struct journal_head *last_jh; + struct journal_head *next_jh = jh; + unsigned long nr_freed = 0; + int ret; + +- if (!jh || *nr_to_scan == 0) ++ *released = false; ++ if (!jh) + return 0; + + last_jh = jh->b_cpprev; +@@ -416,8 +376,7 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh, + jh = next_jh; + next_jh = jh->b_cpnext; + +- (*nr_to_scan)--; +- if (__cp_buffer_busy(jh)) ++ if (!destroy && __cp_buffer_busy(jh)) + continue; + + nr_freed++; +@@ -429,7 +388,7 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh, + + if (need_resched()) + break; +- } while (jh != last_jh && *nr_to_scan); ++ } while (jh != last_jh); + + return nr_freed; + } +@@ -447,11 +406,11 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, + unsigned long *nr_to_scan) + { + transaction_t *transaction, *last_transaction, *next_transaction; +- bool released; ++ bool __maybe_unused released; + tid_t first_tid = 0, last_tid = 0, next_tid = 0; + tid_t tid = 0; + unsigned long nr_freed = 0; +- unsigned long nr_scanned = *nr_to_scan; ++ unsigned long freed; + + again: + spin_lock(&journal->j_list_lock); +@@ -480,10 +439,11 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, + transaction = next_transaction; + next_transaction = transaction->t_cpnext; + tid = transaction->t_tid; +- released = false; + +- nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_list, +- nr_to_scan, &released); ++ freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list, ++ false, &released); ++ nr_freed += freed; ++ (*nr_to_scan) -= min(*nr_to_scan, freed); + if (*nr_to_scan == 0) + break; + if (need_resched() || spin_needbreak(&journal->j_list_lock)) +@@ -504,9 +464,8 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, + if (*nr_to_scan && next_tid) + goto again; + out: +- nr_scanned -= *nr_to_scan; + trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid, +- nr_freed, nr_scanned, next_tid); ++ nr_freed, next_tid); + + return nr_freed; + } +@@ -522,7 +481,7 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, + void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) + { + transaction_t *transaction, *last_transaction, *next_transaction; +- int ret; ++ bool released; + + transaction = journal->j_checkpoint_transactions; + if (!transaction) +@@ -533,8 +492,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) + do { + transaction = next_transaction; + next_transaction = transaction->t_cpnext; +- ret = journal_clean_one_cp_list(transaction->t_checkpoint_list, +- destroy); ++ journal_shrink_one_cp_list(transaction->t_checkpoint_list, ++ destroy, &released); + /* + * This function only frees up some memory if possible so we + * dont have an obligation to finish processing. Bail out if +@@ -547,7 +506,7 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) + * avoids pointless scanning of transactions which still + * weren't checkpointed. + */ +- if (!ret) ++ if (!released) + return; + } while (transaction != last_transaction); + } +diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h +index 8f5ee380d3093..5646ae15a957a 100644 +--- a/include/trace/events/jbd2.h ++++ b/include/trace/events/jbd2.h +@@ -462,11 +462,9 @@ TRACE_EVENT(jbd2_shrink_scan_exit, + TRACE_EVENT(jbd2_shrink_checkpoint_list, + + TP_PROTO(journal_t *journal, tid_t first_tid, tid_t tid, tid_t last_tid, +- unsigned long nr_freed, unsigned long nr_scanned, +- tid_t next_tid), ++ unsigned long nr_freed, tid_t next_tid), + +- TP_ARGS(journal, first_tid, tid, last_tid, nr_freed, +- nr_scanned, next_tid), ++ TP_ARGS(journal, first_tid, tid, last_tid, nr_freed, next_tid), + + TP_STRUCT__entry( + __field(dev_t, dev) +@@ -474,7 +472,6 @@ TRACE_EVENT(jbd2_shrink_checkpoint_list, + __field(tid_t, tid) + __field(tid_t, last_tid) + __field(unsigned long, nr_freed) +- __field(unsigned long, nr_scanned) + __field(tid_t, next_tid) + ), + +@@ -484,15 +481,14 @@ TRACE_EVENT(jbd2_shrink_checkpoint_list, + __entry->tid = tid; + __entry->last_tid = last_tid; + __entry->nr_freed = nr_freed; +- __entry->nr_scanned = nr_scanned; + __entry->next_tid = next_tid; + ), + + TP_printk("dev %d,%d shrink transaction %u-%u(%u) freed %lu " +- "scanned %lu next transaction %u", ++ "next transaction %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->first_tid, __entry->tid, __entry->last_tid, +- __entry->nr_freed, __entry->nr_scanned, __entry->next_tid) ++ __entry->nr_freed, __entry->next_tid) + ); + + #endif /* _TRACE_JBD2_H */ +-- +2.40.1 + diff --git a/queue-6.4/jbd2-remove-t_checkpoint_io_list.patch b/queue-6.4/jbd2-remove-t_checkpoint_io_list.patch new file mode 100644 index 00000000000..43a5436be25 --- /dev/null +++ b/queue-6.4/jbd2-remove-t_checkpoint_io_list.patch @@ -0,0 +1,146 @@ +From dce09da699b4832f4f39ddc7176d8b146d70843a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Jun 2023 21:59:24 +0800 +Subject: jbd2: remove t_checkpoint_io_list + +From: Zhang Yi + +[ Upstream commit be22255360f80d3af789daad00025171a65424a5 ] + +Since t_checkpoint_io_list was stop using in jbd2_log_do_checkpoint() +now, it's time to remove the whole t_checkpoint_io_list logic. + +Signed-off-by: Zhang Yi +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20230606135928.434610-3-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 46f881b5b175 ("jbd2: fix a race when checking checkpoint buffer busy") +Signed-off-by: Sasha Levin +--- + fs/jbd2/checkpoint.c | 42 ++---------------------------------------- + fs/jbd2/commit.c | 3 +-- + include/linux/jbd2.h | 6 ------ + 3 files changed, 3 insertions(+), 48 deletions(-) + +diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c +index c4e0da6db7195..723b4eb112828 100644 +--- a/fs/jbd2/checkpoint.c ++++ b/fs/jbd2/checkpoint.c +@@ -27,7 +27,7 @@ + * + * Called with j_list_lock held. + */ +-static inline void __buffer_unlink_first(struct journal_head *jh) ++static inline void __buffer_unlink(struct journal_head *jh) + { + transaction_t *transaction = jh->b_cp_transaction; + +@@ -40,23 +40,6 @@ static inline void __buffer_unlink_first(struct journal_head *jh) + } + } + +-/* +- * Unlink a buffer from a transaction checkpoint(io) list. +- * +- * Called with j_list_lock held. +- */ +-static inline void __buffer_unlink(struct journal_head *jh) +-{ +- transaction_t *transaction = jh->b_cp_transaction; +- +- __buffer_unlink_first(jh); +- if (transaction->t_checkpoint_io_list == jh) { +- transaction->t_checkpoint_io_list = jh->b_cpnext; +- if (transaction->t_checkpoint_io_list == jh) +- transaction->t_checkpoint_io_list = NULL; +- } +-} +- + /* + * Check a checkpoint buffer could be release or not. + * +@@ -505,15 +488,6 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, + break; + if (need_resched() || spin_needbreak(&journal->j_list_lock)) + break; +- if (released) +- continue; +- +- nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_io_list, +- nr_to_scan, &released); +- if (*nr_to_scan == 0) +- break; +- if (need_resched() || spin_needbreak(&journal->j_list_lock)) +- break; + } while (transaction != last_transaction); + + if (transaction != last_transaction) { +@@ -568,17 +542,6 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) + */ + if (need_resched()) + return; +- if (ret) +- continue; +- /* +- * It is essential that we are as careful as in the case of +- * t_checkpoint_list with removing the buffer from the list as +- * we can possibly see not yet submitted buffers on io_list +- */ +- ret = journal_clean_one_cp_list(transaction-> +- t_checkpoint_io_list, destroy); +- if (need_resched()) +- return; + /* + * Stop scanning if we couldn't free the transaction. This + * avoids pointless scanning of transactions which still +@@ -663,7 +626,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) + jbd2_journal_put_journal_head(jh); + + /* Is this transaction empty? */ +- if (transaction->t_checkpoint_list || transaction->t_checkpoint_io_list) ++ if (transaction->t_checkpoint_list) + return 0; + + /* +@@ -755,7 +718,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact + J_ASSERT(transaction->t_forget == NULL); + J_ASSERT(transaction->t_shadow_list == NULL); + J_ASSERT(transaction->t_checkpoint_list == NULL); +- J_ASSERT(transaction->t_checkpoint_io_list == NULL); + J_ASSERT(atomic_read(&transaction->t_updates) == 0); + J_ASSERT(journal->j_committing_transaction != transaction); + J_ASSERT(journal->j_running_transaction != transaction); +diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c +index b33155dd70017..1073259902a60 100644 +--- a/fs/jbd2/commit.c ++++ b/fs/jbd2/commit.c +@@ -1141,8 +1141,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) + spin_lock(&journal->j_list_lock); + commit_transaction->t_state = T_FINISHED; + /* Check if the transaction can be dropped now that we are finished */ +- if (commit_transaction->t_checkpoint_list == NULL && +- commit_transaction->t_checkpoint_io_list == NULL) { ++ if (commit_transaction->t_checkpoint_list == NULL) { + __jbd2_journal_drop_transaction(journal, commit_transaction); + jbd2_journal_free_transaction(commit_transaction); + } +diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h +index f619bae1dcc5d..91a2cf4bc5756 100644 +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -622,12 +622,6 @@ struct transaction_s + */ + struct journal_head *t_checkpoint_list; + +- /* +- * Doubly-linked circular list of all buffers submitted for IO while +- * checkpointing. [j_list_lock] +- */ +- struct journal_head *t_checkpoint_io_list; +- + /* + * Doubly-linked circular list of metadata buffers being + * shadowed by log IO. The IO buffers on the iobuf list and +-- +2.40.1 + diff --git a/queue-6.4/mlxsw-fix-the-size-of-virt_router_msb.patch b/queue-6.4/mlxsw-fix-the-size-of-virt_router_msb.patch new file mode 100644 index 00000000000..893f11453f1 --- /dev/null +++ b/queue-6.4/mlxsw-fix-the-size-of-virt_router_msb.patch @@ -0,0 +1,88 @@ +From 0ceaec06151056a3e81d4e7dd562d2a650f30373 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Aug 2023 15:58:24 +0200 +Subject: mlxsw: Fix the size of 'VIRT_ROUTER_MSB' + +From: Amit Cohen + +[ Upstream commit 348c976be0a599918b88729def198a843701c9fe ] + +The field 'virtual router' was extended to 12 bits in Spectrum-4. +Therefore, the element 'MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB' needs 3 bits for +Spectrum < 4 and 4 bits for Spectrum >= 4. + +The elements are stored in an internal storage scratchpad. Currently, the +MSB is defined there as 3 bits. It means that for Spectrum-4, only 2K VRFs +can be used for multicast routing, as the highest bit is not really used by +the driver. Fix the definition of 'VIRT_ROUTER_MSB' to use 4 bits. Adjust +the definitions of 'virtual router' field in the blocks accordingly - use +'_avoid_size_check' for Spectrum-2 instead of for Spectrum-4. Fix the mask +in parse function to use 4 bits. + +Fixes: 6d5d8ebb881c ("mlxsw: Rename virtual router flex key element") +Signed-off-by: Amit Cohen +Reviewed-by: Ido Schimmel +Signed-off-by: Petr Machata +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/79bed2b70f6b9ed58d4df02e9798a23da648015b.1692268427.git.petrm@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c | 4 ++-- + drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c | 2 +- + drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c | 4 ++-- + 3 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c +index bd1a51a0a5408..f208a237d0b52 100644 +--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c +@@ -32,8 +32,8 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { + MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8), + MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2), + MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6), +- MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_MSB, 0x18, 17, 3), +- MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_LSB, 0x18, 20, 8), ++ MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_MSB, 0x18, 17, 4), ++ MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_LSB, 0x18, 21, 8), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4), + MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4), +diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c +index e4f4cded2b6f9..b1178b7a7f51a 100644 +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c +@@ -193,7 +193,7 @@ mlxsw_sp2_mr_tcam_rule_parse(struct mlxsw_sp_acl_rule *rule, + key->vrid, GENMASK(7, 0)); + mlxsw_sp_acl_rulei_keymask_u32(rulei, + MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB, +- key->vrid >> 8, GENMASK(2, 0)); ++ key->vrid >> 8, GENMASK(3, 0)); + switch (key->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return mlxsw_sp2_mr_tcam_rule_parse4(rulei, key); +diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c +index 00c32320f8915..173808c096bab 100644 +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c +@@ -169,7 +169,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_2[] = { + + static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4[] = { + MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 24, 8), +- MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_MSB, 0x00, 0, 3), ++ MLXSW_AFK_ELEMENT_INST_EXT_U32(VIRT_ROUTER_MSB, 0x00, 0, 3, 0, true), + }; + + static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_0[] = { +@@ -319,7 +319,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5b[] = { + + static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4b[] = { + MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 13, 8), +- MLXSW_AFK_ELEMENT_INST_EXT_U32(VIRT_ROUTER_MSB, 0x04, 21, 4, 0, true), ++ MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_MSB, 0x04, 21, 4), + }; + + static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_2b[] = { +-- +2.40.1 + diff --git a/queue-6.4/mlxsw-pci-set-time-stamp-fields-also-when-its-type-i.patch b/queue-6.4/mlxsw-pci-set-time-stamp-fields-also-when-its-type-i.patch new file mode 100644 index 00000000000..da2b2b979b7 --- /dev/null +++ b/queue-6.4/mlxsw-pci-set-time-stamp-fields-also-when-its-type-i.patch @@ -0,0 +1,75 @@ +From 8ddf2a1788b9d8af87089e3499cdf2e4ffa72f47 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Aug 2023 15:58:22 +0200 +Subject: mlxsw: pci: Set time stamp fields also when its type is MIRROR_UTC + +From: Danielle Ratson + +[ Upstream commit bc2de151ab6ad0762a04563527ec42e54dde572a ] + +Currently, in Spectrum-2 and above, time stamps are extracted from the CQE +into the time stamp fields in 'struct mlxsw_skb_cb', only when the CQE +time stamp type is UTC. The time stamps are read directly from the CQE and +software can get the time stamp in UTC format using CQEv2. + +From Spectrum-4, the time stamps that are read from the CQE are allowed +to be also from MIRROR_UTC type. + +Therefore, we get a warning [1] from the driver that the time stamp fields +were not set, when LLDP control packet is sent. + +Allow the time stamp type to be MIRROR_UTC and set the time stamp in this +case as well. + +[1] + WARNING: CPU: 11 PID: 0 at drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c:1409 mlxsw_sp2_ptp_hwtstamp_fill+0x1f/0x70 [mlxsw_spectrum] +[...] + Call Trace: + + mlxsw_sp2_ptp_receive+0x3c/0x80 [mlxsw_spectrum] + mlxsw_core_skb_receive+0x119/0x190 [mlxsw_core] + mlxsw_pci_cq_tasklet+0x3c9/0x780 [mlxsw_pci] + tasklet_action_common.constprop.0+0x9f/0x110 + __do_softirq+0xbb/0x296 + irq_exit_rcu+0x79/0xa0 + common_interrupt+0x86/0xa0 + + + +Fixes: 4735402173e6 ("mlxsw: spectrum: Extend to support Spectrum-4 ASIC") +Signed-off-by: Danielle Ratson +Reviewed-by: Ido Schimmel +Signed-off-by: Petr Machata +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/bcef4d044ef608a4e258d33a7ec0ecd91f480db5.1692268427.git.petrm@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlxsw/pci.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c +index c968309657dd1..51eea1f0529c8 100644 +--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c +@@ -517,11 +517,15 @@ static void mlxsw_pci_skb_cb_ts_set(struct mlxsw_pci *mlxsw_pci, + struct sk_buff *skb, + enum mlxsw_pci_cqe_v cqe_v, char *cqe) + { ++ u8 ts_type; ++ + if (cqe_v != MLXSW_PCI_CQE_V2) + return; + +- if (mlxsw_pci_cqe2_time_stamp_type_get(cqe) != +- MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC) ++ ts_type = mlxsw_pci_cqe2_time_stamp_type_get(cqe); ++ ++ if (ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC && ++ ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_MIRROR_UTC) + return; + + mlxsw_skb_cb(skb)->cqe_ts.sec = mlxsw_pci_cqe2_time_stamp_sec_get(cqe); +-- +2.40.1 + diff --git a/queue-6.4/mlxsw-reg-fix-sspr-register-layout.patch b/queue-6.4/mlxsw-reg-fix-sspr-register-layout.patch new file mode 100644 index 00000000000..cef4fbd28bd --- /dev/null +++ b/queue-6.4/mlxsw-reg-fix-sspr-register-layout.patch @@ -0,0 +1,62 @@ +From cf648ee0a9ca24643a736d7fafcb741ec031da2f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Aug 2023 15:58:23 +0200 +Subject: mlxsw: reg: Fix SSPR register layout + +From: Ido Schimmel + +[ Upstream commit 0dc63b9cfd4c5666ced52c829fdd65dcaeb9f0f1 ] + +The two most significant bits of the "local_port" field in the SSPR +register are always cleared since they are overwritten by the deprecated +and overlapping "sub_port" field. + +On systems with more than 255 local ports (e.g., Spectrum-4), this +results in the firmware maintaining invalid mappings between system port +and local port. Specifically, two different systems ports (0x1 and +0x101) point to the same local port (0x1), which eventually leads to +firmware errors. + +Fix by removing the deprecated "sub_port" field. + +Fixes: fd24b29a1b74 ("mlxsw: reg: Align existing registers to use extended local_port field") +Signed-off-by: Ido Schimmel +Signed-off-by: Petr Machata +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/9b909a3033c8d3d6f67f237306bef4411c5e6ae4.1692268427.git.petrm@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlxsw/reg.h | 9 --------- + 1 file changed, 9 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h +index 8165bf31a99ae..17160e867befb 100644 +--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h ++++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h +@@ -97,14 +97,6 @@ MLXSW_ITEM32(reg, sspr, m, 0x00, 31, 1); + */ + MLXSW_ITEM32_LP(reg, sspr, 0x00, 16, 0x00, 12); + +-/* reg_sspr_sub_port +- * Virtual port within the physical port. +- * Should be set to 0 when virtual ports are not enabled on the port. +- * +- * Access: RW +- */ +-MLXSW_ITEM32(reg, sspr, sub_port, 0x00, 8, 8); +- + /* reg_sspr_system_port + * Unique identifier within the stacking domain that represents all the ports + * that are available in the system (external ports). +@@ -120,7 +112,6 @@ static inline void mlxsw_reg_sspr_pack(char *payload, u16 local_port) + MLXSW_REG_ZERO(sspr, payload); + mlxsw_reg_sspr_m_set(payload, 1); + mlxsw_reg_sspr_local_port_set(payload, local_port); +- mlxsw_reg_sspr_sub_port_set(payload, 0); + mlxsw_reg_sspr_system_port_set(payload, local_port); + } + +-- +2.40.1 + diff --git a/queue-6.4/mm-disable-config_per_vma_lock-until-its-fixed.patch b/queue-6.4/mm-disable-config_per_vma_lock-until-its-fixed.patch new file mode 100644 index 00000000000..267bad5d841 --- /dev/null +++ b/queue-6.4/mm-disable-config_per_vma_lock-until-its-fixed.patch @@ -0,0 +1,54 @@ +From 841878e2ca4600b3ffaf09921cde08f9f944b9f7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 5 Jul 2023 18:14:00 -0700 +Subject: mm: disable CONFIG_PER_VMA_LOCK until its fixed +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Suren Baghdasaryan + +[ Upstream commit f96c48670319d685d18d50819ed0c1ef751ed2ac ] + +A memory corruption was reported in [1] with bisection pointing to the +patch [2] enabling per-VMA locks for x86. Disable per-VMA locks config to +prevent this issue until the fix is confirmed. This is expected to be a +temporary measure. + +[1] https://bugzilla.kernel.org/show_bug.cgi?id=217624 +[2] https://lore.kernel.org/all/20230227173632.3292573-30-surenb@google.com + +Link: https://lkml.kernel.org/r/20230706011400.2949242-3-surenb@google.com +Reported-by: Jiri Slaby +Closes: https://lore.kernel.org/all/dbdef34c-3a07-5951-e1ae-e9c6e3cdf51b@kernel.org/ +Reported-by: Jacob Young +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217624 +Fixes: 0bff0aaea03e ("x86/mm: try VMA lock-based page fault handling first") +Signed-off-by: Suren Baghdasaryan +Cc: David Hildenbrand +Cc: Holger Hoffstätte +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + mm/Kconfig | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/mm/Kconfig b/mm/Kconfig +index e3454087fd31a..1d2afc2567d0e 100644 +--- a/mm/Kconfig ++++ b/mm/Kconfig +@@ -1198,8 +1198,9 @@ config ARCH_SUPPORTS_PER_VMA_LOCK + def_bool n + + config PER_VMA_LOCK +- def_bool y ++ bool "Enable per-vma locking during page fault handling." + depends on ARCH_SUPPORTS_PER_VMA_LOCK && MMU && SMP ++ depends on BROKEN + help + Allow per-vma locking during page fault handling. + +-- +2.40.1 + diff --git a/queue-6.4/net-bcmgenet-fix-return-value-check-for-fixed_phy_re.patch b/queue-6.4/net-bcmgenet-fix-return-value-check-for-fixed_phy_re.patch new file mode 100644 index 00000000000..02681c139b7 --- /dev/null +++ b/queue-6.4/net-bcmgenet-fix-return-value-check-for-fixed_phy_re.patch @@ -0,0 +1,38 @@ +From ede17bae121081abc970e4392542f2fdeb964694 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Aug 2023 13:12:21 +0800 +Subject: net: bcmgenet: Fix return value check for fixed_phy_register() + +From: Ruan Jinjie + +[ Upstream commit 32bbe64a1386065ab2aef8ce8cae7c689d0add6e ] + +The fixed_phy_register() function returns error pointers and never +returns NULL. Update the checks accordingly. + +Fixes: b0ba512e25d7 ("net: bcmgenet: enable driver to work without a device tree") +Signed-off-by: Ruan Jinjie +Reviewed-by: Leon Romanovsky +Acked-by: Doug Berger +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/genet/bcmmii.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c +index 0092e46c46f83..cc3afb605b1ec 100644 +--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c ++++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c +@@ -617,7 +617,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) + }; + + phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL); +- if (!phydev || IS_ERR(phydev)) { ++ if (IS_ERR(phydev)) { + dev_err(kdev, "failed to register fixed PHY device\n"); + return -ENODEV; + } +-- +2.40.1 + diff --git a/queue-6.4/net-bgmac-fix-return-value-check-for-fixed_phy_regis.patch b/queue-6.4/net-bgmac-fix-return-value-check-for-fixed_phy_regis.patch new file mode 100644 index 00000000000..fd44ffc2993 --- /dev/null +++ b/queue-6.4/net-bgmac-fix-return-value-check-for-fixed_phy_regis.patch @@ -0,0 +1,38 @@ +From a8e73ac731ed0a06d6d8a7f2451f8855dfacf75d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Aug 2023 13:12:20 +0800 +Subject: net: bgmac: Fix return value check for fixed_phy_register() + +From: Ruan Jinjie + +[ Upstream commit 23a14488ea5882dea5851b65c9fce2127ee8fcad ] + +The fixed_phy_register() function returns error pointers and never +returns NULL. Update the checks accordingly. + +Fixes: c25b23b8a387 ("bgmac: register fixed PHY for ARM BCM470X / BCM5301X chipsets") +Signed-off-by: Ruan Jinjie +Reviewed-by: Andrew Lunn +Reviewed-by: Leon Romanovsky +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bgmac.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c +index 10c7c232cc4ec..52ee3751187a2 100644 +--- a/drivers/net/ethernet/broadcom/bgmac.c ++++ b/drivers/net/ethernet/broadcom/bgmac.c +@@ -1448,7 +1448,7 @@ int bgmac_phy_connect_direct(struct bgmac *bgmac) + int err; + + phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, NULL); +- if (!phy_dev || IS_ERR(phy_dev)) { ++ if (IS_ERR(phy_dev)) { + dev_err(bgmac->dev, "Failed to register fixed PHY device\n"); + return -ENODEV; + } +-- +2.40.1 + diff --git a/queue-6.4/net-dsa-felix-fix-oversize-frame-dropping-for-always.patch b/queue-6.4/net-dsa-felix-fix-oversize-frame-dropping-for-always.patch new file mode 100644 index 00000000000..8a246ee2f14 --- /dev/null +++ b/queue-6.4/net-dsa-felix-fix-oversize-frame-dropping-for-always.patch @@ -0,0 +1,73 @@ +From 094cc85a126225198f64ff6e7cdfeb67fadadf61 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Aug 2023 15:01:11 +0300 +Subject: net: dsa: felix: fix oversize frame dropping for always closed + tc-taprio gates + +From: Vladimir Oltean + +[ Upstream commit d44036cad31170da0cb9c728e80743f84267da6e ] + +The blamed commit resolved a bug where frames would still get stuck at +egress, even though they're smaller than the maxSDU[tc], because the +driver did not take into account the extra 33 ns that the queue system +needs for scheduling the frame. + +It now takes that into account, but the arithmetic that we perform in +vsc9959_tas_remaining_gate_len_ps() is buggy, because we operate on +64-bit unsigned integers, so gate_len_ns - VSC9959_TAS_MIN_GATE_LEN_NS +may become a very large integer if gate_len_ns < 33 ns. + +In practice, this means that we've introduced a regression where all +traffic class gates which are permanently closed will not get detected +by the driver, and we won't enable oversize frame dropping for them. + +Before: +mscc_felix 0000:00:00.5: port 0: max frame size 1526 needs 12400000 ps, 1152000 ps for mPackets at speed 1000 +mscc_felix 0000:00:00.5: port 0 tc 0 min gate len 1000000, sending all frames +mscc_felix 0000:00:00.5: port 0 tc 1 min gate len 0, sending all frames +mscc_felix 0000:00:00.5: port 0 tc 2 min gate len 0, sending all frames +mscc_felix 0000:00:00.5: port 0 tc 3 min gate len 0, sending all frames +mscc_felix 0000:00:00.5: port 0 tc 4 min gate len 0, sending all frames +mscc_felix 0000:00:00.5: port 0 tc 5 min gate len 0, sending all frames +mscc_felix 0000:00:00.5: port 0 tc 6 min gate len 0, sending all frames +mscc_felix 0000:00:00.5: port 0 tc 7 min gate length 5120 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 615 octets including FCS + +After: +mscc_felix 0000:00:00.5: port 0: max frame size 1526 needs 12400000 ps, 1152000 ps for mPackets at speed 1000 +mscc_felix 0000:00:00.5: port 0 tc 0 min gate len 1000000, sending all frames +mscc_felix 0000:00:00.5: port 0 tc 1 min gate length 0 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 1 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 2 min gate length 0 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 1 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 3 min gate length 0 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 1 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 4 min gate length 0 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 1 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 5 min gate length 0 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 1 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 6 min gate length 0 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 1 octets including FCS +mscc_felix 0000:00:00.5: port 0 tc 7 min gate length 5120 ns not enough for max frame size 1526 at 1000 Mbps, dropping frames over 615 octets including FCS + +Fixes: 11afdc6526de ("net: dsa: felix: tc-taprio intervals smaller than MTU should send at least one packet") +Signed-off-by: Vladimir Oltean +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20230817120111.3522827-1-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/ocelot/felix_vsc9959.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c +index ca69973ae91b9..b1ecd08cec96a 100644 +--- a/drivers/net/dsa/ocelot/felix_vsc9959.c ++++ b/drivers/net/dsa/ocelot/felix_vsc9959.c +@@ -1081,6 +1081,9 @@ static u64 vsc9959_tas_remaining_gate_len_ps(u64 gate_len_ns) + if (gate_len_ns == U64_MAX) + return U64_MAX; + ++ if (gate_len_ns < VSC9959_TAS_MIN_GATE_LEN_NS) ++ return 0; ++ + return (gate_len_ns - VSC9959_TAS_MIN_GATE_LEN_NS) * PSEC_PER_NSEC; + } + +-- +2.40.1 + diff --git a/queue-6.4/net-dsa-mt7530-fix-handling-of-802.1x-pae-frames.patch b/queue-6.4/net-dsa-mt7530-fix-handling-of-802.1x-pae-frames.patch new file mode 100644 index 00000000000..9060371049d --- /dev/null +++ b/queue-6.4/net-dsa-mt7530-fix-handling-of-802.1x-pae-frames.patch @@ -0,0 +1,58 @@ +From e6e407349a8ba7a9e3604cb932a4b71f198d0519 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 13 Aug 2023 13:59:17 +0300 +Subject: net: dsa: mt7530: fix handling of 802.1X PAE frames +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Arınç ÜNAL + +[ Upstream commit e94b590abfff2cdbf0bdaa7d9904364c8d480af5 ] + +802.1X PAE frames are link-local frames, therefore they must be trapped to +the CPU port. Currently, the MT753X switches treat 802.1X PAE frames as +regular multicast frames, therefore flooding them to user ports. To fix +this, set 802.1X PAE frames to be trapped to the CPU port(s). + +Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") +Signed-off-by: Arınç ÜNAL +Reviewed-by: Vladimir Oltean +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/mt7530.c | 4 ++++ + drivers/net/dsa/mt7530.h | 2 ++ + 2 files changed, 6 insertions(+) + +diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c +index 7e773c4ba0463..32dc4f19c82c6 100644 +--- a/drivers/net/dsa/mt7530.c ++++ b/drivers/net/dsa/mt7530.c +@@ -1006,6 +1006,10 @@ mt753x_trap_frames(struct mt7530_priv *priv) + mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK, + MT753X_BPDU_CPU_ONLY); + ++ /* Trap 802.1X PAE frames to the CPU port(s) */ ++ mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_PORT_FW_MASK, ++ MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY)); ++ + /* Trap LLDP frames with :0E MAC DA to the CPU port(s) */ + mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_PORT_FW_MASK, + MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY)); +diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h +index 08045b035e6ab..17e42d30fff4b 100644 +--- a/drivers/net/dsa/mt7530.h ++++ b/drivers/net/dsa/mt7530.h +@@ -66,6 +66,8 @@ enum mt753x_id { + /* Registers for BPDU and PAE frame control*/ + #define MT753X_BPC 0x24 + #define MT753X_BPDU_PORT_FW_MASK GENMASK(2, 0) ++#define MT753X_PAE_PORT_FW_MASK GENMASK(18, 16) ++#define MT753X_PAE_PORT_FW(x) FIELD_PREP(MT753X_PAE_PORT_FW_MASK, x) + + /* Register for :03 and :0E MAC DA frame control */ + #define MT753X_RGAC2 0x2c +-- +2.40.1 + diff --git a/queue-6.4/net-ethernet-mtk_eth_soc-fix-null-pointer-on-hw-rese.patch b/queue-6.4/net-ethernet-mtk_eth_soc-fix-null-pointer-on-hw-rese.patch new file mode 100644 index 00000000000..8ac7fdea47f --- /dev/null +++ b/queue-6.4/net-ethernet-mtk_eth_soc-fix-null-pointer-on-hw-rese.patch @@ -0,0 +1,61 @@ +From f64448a618d818b00b22c2308ca2018495b0cb2d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Aug 2023 17:12:44 +0100 +Subject: net: ethernet: mtk_eth_soc: fix NULL pointer on hw reset + +From: Daniel Golle + +[ Upstream commit 604204fcb321abe81238551936ecda5269e81076 ] + +When a hardware reset is triggered on devices not initializing WED the +calls to mtk_wed_fe_reset and mtk_wed_fe_reset_complete dereference a +pointer on uninitialized stack memory. +Break out of both functions in case a hw_list entry is 0. + +Fixes: 08a764a7c51b ("net: ethernet: mtk_wed: add reset/reset_complete callbacks") +Signed-off-by: Daniel Golle +Reviewed-by: Simon Horman +Acked-by: Lorenzo Bianconi +Link: https://lore.kernel.org/r/5465c1609b464cc7407ae1530c40821dcdf9d3e6.1692634266.git.daniel@makrotopia.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mediatek/mtk_wed.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c +index 985cff910f30c..3b651efcc25e1 100644 +--- a/drivers/net/ethernet/mediatek/mtk_wed.c ++++ b/drivers/net/ethernet/mediatek/mtk_wed.c +@@ -221,9 +221,13 @@ void mtk_wed_fe_reset(void) + + for (i = 0; i < ARRAY_SIZE(hw_list); i++) { + struct mtk_wed_hw *hw = hw_list[i]; +- struct mtk_wed_device *dev = hw->wed_dev; ++ struct mtk_wed_device *dev; + int err; + ++ if (!hw) ++ break; ++ ++ dev = hw->wed_dev; + if (!dev || !dev->wlan.reset) + continue; + +@@ -244,8 +248,12 @@ void mtk_wed_fe_reset_complete(void) + + for (i = 0; i < ARRAY_SIZE(hw_list); i++) { + struct mtk_wed_hw *hw = hw_list[i]; +- struct mtk_wed_device *dev = hw->wed_dev; ++ struct mtk_wed_device *dev; ++ ++ if (!hw) ++ break; + ++ dev = hw->wed_dev; + if (!dev || !dev->wlan.reset_complete) + continue; + +-- +2.40.1 + diff --git a/queue-6.4/net-mdio-mdio-bitbang-fix-c45-read-write-protocol.patch b/queue-6.4/net-mdio-mdio-bitbang-fix-c45-read-write-protocol.patch new file mode 100644 index 00000000000..dabc0967a68 --- /dev/null +++ b/queue-6.4/net-mdio-mdio-bitbang-fix-c45-read-write-protocol.patch @@ -0,0 +1,54 @@ +From b08528505c9e0c8b60ca9989445d7e065b2a628f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 16 Aug 2023 21:06:52 +0300 +Subject: net: mdio: mdio-bitbang: Fix C45 read/write protocol + +From: Serge Semin + +[ Upstream commit 2572ce62415cf3b632391091447252e2661ed520 ] + +Based on the original code semantic in case of Clause 45 MDIO, the address +command is supposed to be followed by the command sending the MMD address, +not the CSR address. The commit 002dd3de097c ("net: mdio: mdio-bitbang: +Separate C22 and C45 transactions") has erroneously broken that. So most +likely due to an unfortunate variable name it switched the code to sending +the CSR address. In our case it caused the protocol malfunction so the +read operation always failed with the turnaround bit always been driven to +one by PHY instead of zero. Fix that by getting back the correct +behaviour: sending MMD address command right after the regular address +command. + +Fixes: 002dd3de097c ("net: mdio: mdio-bitbang: Separate C22 and C45 transactions") +Signed-off-by: Serge Semin +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/mdio/mdio-bitbang.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/mdio/mdio-bitbang.c b/drivers/net/mdio/mdio-bitbang.c +index b83932562be21..81b7748c10ce0 100644 +--- a/drivers/net/mdio/mdio-bitbang.c ++++ b/drivers/net/mdio/mdio-bitbang.c +@@ -186,7 +186,7 @@ int mdiobb_read_c45(struct mii_bus *bus, int phy, int devad, int reg) + struct mdiobb_ctrl *ctrl = bus->priv; + + mdiobb_cmd_addr(ctrl, phy, devad, reg); +- mdiobb_cmd(ctrl, MDIO_C45_READ, phy, reg); ++ mdiobb_cmd(ctrl, MDIO_C45_READ, phy, devad); + + return mdiobb_read_common(bus, phy); + } +@@ -222,7 +222,7 @@ int mdiobb_write_c45(struct mii_bus *bus, int phy, int devad, int reg, u16 val) + struct mdiobb_ctrl *ctrl = bus->priv; + + mdiobb_cmd_addr(ctrl, phy, devad, reg); +- mdiobb_cmd(ctrl, MDIO_C45_WRITE, phy, reg); ++ mdiobb_cmd(ctrl, MDIO_C45_WRITE, phy, devad); + + return mdiobb_write_common(bus, val); + } +-- +2.40.1 + diff --git a/queue-6.4/net-sched-fix-a-qdisc-modification-with-ambiguous-co.patch b/queue-6.4/net-sched-fix-a-qdisc-modification-with-ambiguous-co.patch new file mode 100644 index 00000000000..0a0606645ad --- /dev/null +++ b/queue-6.4/net-sched-fix-a-qdisc-modification-with-ambiguous-co.patch @@ -0,0 +1,138 @@ +From 8aaeb4c01370bd9ea12de9b286d7cae485dcfbc7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Aug 2023 06:12:31 -0400 +Subject: net/sched: fix a qdisc modification with ambiguous command request + +From: Jamal Hadi Salim + +[ Upstream commit da71714e359b64bd7aab3bd56ec53f307f058133 ] + +When replacing an existing root qdisc, with one that is of the same kind, the +request boils down to essentially a parameterization change i.e not one that +requires allocation and grafting of a new qdisc. syzbot was able to create a +scenario which resulted in a taprio qdisc replacing an existing taprio qdisc +with a combination of NLM_F_CREATE, NLM_F_REPLACE and NLM_F_EXCL leading to +create and graft scenario. +The fix ensures that only when the qdisc kinds are different that we should +allow a create and graft, otherwise it goes into the "change" codepath. + +While at it, fix the code and comments to improve readability. + +While syzbot was able to create the issue, it did not zone on the root cause. +Analysis from Vladimir Oltean helped narrow it down. + +v1->V2 changes: +- remove "inline" function definition (Vladmir) +- remove extrenous braces in branches (Vladmir) +- change inline function names (Pedro) +- Run tdc tests (Victor) +v2->v3 changes: +- dont break else/if (Simon) + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: syzbot+a3618a167af2021433cd@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/netdev/20230816225759.g25x76kmgzya2gei@skbuf/T/ +Tested-by: Vladimir Oltean +Tested-by: Victor Nogueira +Reviewed-by: Pedro Tammela +Reviewed-by: Victor Nogueira +Signed-off-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/sch_api.c | 53 ++++++++++++++++++++++++++++++++++----------- + 1 file changed, 40 insertions(+), 13 deletions(-) + +diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c +index aa6b1fe651519..e9eaf637220e9 100644 +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -1547,10 +1547,28 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + return 0; + } + ++static bool req_create_or_replace(struct nlmsghdr *n) ++{ ++ return (n->nlmsg_flags & NLM_F_CREATE && ++ n->nlmsg_flags & NLM_F_REPLACE); ++} ++ ++static bool req_create_exclusive(struct nlmsghdr *n) ++{ ++ return (n->nlmsg_flags & NLM_F_CREATE && ++ n->nlmsg_flags & NLM_F_EXCL); ++} ++ ++static bool req_change(struct nlmsghdr *n) ++{ ++ return (!(n->nlmsg_flags & NLM_F_CREATE) && ++ !(n->nlmsg_flags & NLM_F_REPLACE) && ++ !(n->nlmsg_flags & NLM_F_EXCL)); ++} ++ + /* + * Create/change qdisc. + */ +- + static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + struct netlink_ext_ack *extack) + { +@@ -1644,27 +1662,35 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + * + * We know, that some child q is already + * attached to this parent and have choice: +- * either to change it or to create/graft new one. ++ * 1) change it or 2) create/graft new one. ++ * If the requested qdisc kind is different ++ * than the existing one, then we choose graft. ++ * If they are the same then this is "change" ++ * operation - just let it fallthrough.. + * + * 1. We are allowed to create/graft only +- * if CREATE and REPLACE flags are set. ++ * if the request is explicitly stating ++ * "please create if it doesn't exist". + * +- * 2. If EXCL is set, requestor wanted to say, +- * that qdisc tcm_handle is not expected ++ * 2. If the request is to exclusive create ++ * then the qdisc tcm_handle is not expected + * to exist, so that we choose create/graft too. + * + * 3. The last case is when no flags are set. ++ * This will happen when for example tc ++ * utility issues a "change" command. + * Alas, it is sort of hole in API, we + * cannot decide what to do unambiguously. +- * For now we select create/graft, if +- * user gave KIND, which does not match existing. ++ * For now we select create/graft. + */ +- if ((n->nlmsg_flags & NLM_F_CREATE) && +- (n->nlmsg_flags & NLM_F_REPLACE) && +- ((n->nlmsg_flags & NLM_F_EXCL) || +- (tca[TCA_KIND] && +- nla_strcmp(tca[TCA_KIND], q->ops->id)))) +- goto create_n_graft; ++ if (tca[TCA_KIND] && ++ nla_strcmp(tca[TCA_KIND], q->ops->id)) { ++ if (req_create_or_replace(n) || ++ req_create_exclusive(n)) ++ goto create_n_graft; ++ else if (req_change(n)) ++ goto create_n_graft2; ++ } + } + } + } else { +@@ -1698,6 +1724,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, + NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag"); + return -ENOENT; + } ++create_n_graft2: + if (clid == TC_H_INGRESS) { + if (dev_ingress_queue(dev)) { + q = qdisc_create(dev, dev_ingress_queue(dev), +-- +2.40.1 + diff --git a/queue-6.4/net-validate-veth-and-vxcan-peer-ifindexes.patch b/queue-6.4/net-validate-veth-and-vxcan-peer-ifindexes.patch new file mode 100644 index 00000000000..e0ad7a6ac74 --- /dev/null +++ b/queue-6.4/net-validate-veth-and-vxcan-peer-ifindexes.patch @@ -0,0 +1,137 @@ +From 7985f5e94c001acf633b7efb24317034f382e2d4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Aug 2023 18:26:02 -0700 +Subject: net: validate veth and vxcan peer ifindexes + +From: Jakub Kicinski + +[ Upstream commit f534f6581ec084fe94d6759f7672bd009794b07e ] + +veth and vxcan need to make sure the ifindexes of the peer +are not negative, core does not validate this. + +Using iproute2 with user-space-level checking removed: + +Before: + + # ./ip link add index 10 type veth peer index -1 + # ip link show + 1: lo: mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000 + link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 + 2: enp1s0: mtu 1500 qdisc fq_codel state UP mode DEFAULT group default qlen 1000 + link/ether 52:54:00:74:b2:03 brd ff:ff:ff:ff:ff:ff + 10: veth1@veth0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 + link/ether 8a:90:ff:57:6d:5d brd ff:ff:ff:ff:ff:ff + -1: veth0@veth1: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 + link/ether ae:ed:18:e6:fa:7f brd ff:ff:ff:ff:ff:ff + +Now: + + $ ./ip link add index 10 type veth peer index -1 + Error: ifindex can't be negative. + +This problem surfaced in net-next because an explicit WARN() +was added, the root cause is older. + +Fixes: e6f8f1a739b6 ("veth: Allow to create peer link with given ifindex") +Fixes: a8f820a380a2 ("can: add Virtual CAN Tunnel driver (vxcan)") +Reported-by: syzbot+5ba06978f34abb058571@syzkaller.appspotmail.com +Signed-off-by: Jakub Kicinski +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/can/vxcan.c | 7 +------ + drivers/net/veth.c | 5 +---- + include/net/rtnetlink.h | 4 ++-- + net/core/rtnetlink.c | 22 ++++++++++++++++++---- + 4 files changed, 22 insertions(+), 16 deletions(-) + +diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c +index 4068d962203d6..98c669ad51414 100644 +--- a/drivers/net/can/vxcan.c ++++ b/drivers/net/can/vxcan.c +@@ -192,12 +192,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev, + + nla_peer = data[VXCAN_INFO_PEER]; + ifmp = nla_data(nla_peer); +- err = rtnl_nla_parse_ifla(peer_tb, +- nla_data(nla_peer) + +- sizeof(struct ifinfomsg), +- nla_len(nla_peer) - +- sizeof(struct ifinfomsg), +- NULL); ++ err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack); + if (err < 0) + return err; + +diff --git a/drivers/net/veth.c b/drivers/net/veth.c +index 76019949e3fe9..c977b704f1342 100644 +--- a/drivers/net/veth.c ++++ b/drivers/net/veth.c +@@ -1851,10 +1851,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev, + + nla_peer = data[VETH_INFO_PEER]; + ifmp = nla_data(nla_peer); +- err = rtnl_nla_parse_ifla(peer_tb, +- nla_data(nla_peer) + sizeof(struct ifinfomsg), +- nla_len(nla_peer) - sizeof(struct ifinfomsg), +- NULL); ++ err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack); + if (err < 0) + return err; + +diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h +index d9076a7a430c2..6506221c5fe31 100644 +--- a/include/net/rtnetlink.h ++++ b/include/net/rtnetlink.h +@@ -190,8 +190,8 @@ int rtnl_delete_link(struct net_device *dev, u32 portid, const struct nlmsghdr * + int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm, + u32 portid, const struct nlmsghdr *nlh); + +-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, +- struct netlink_ext_ack *exterr); ++int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer, ++ struct netlink_ext_ack *exterr); + struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid); + + #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) +diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c +index aa1743b2b770b..baa323ca37c42 100644 +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -2268,13 +2268,27 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) + return err; + } + +-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, +- struct netlink_ext_ack *exterr) ++int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer, ++ struct netlink_ext_ack *exterr) + { +- return nla_parse_deprecated(tb, IFLA_MAX, head, len, ifla_policy, ++ const struct ifinfomsg *ifmp; ++ const struct nlattr *attrs; ++ size_t len; ++ ++ ifmp = nla_data(nla_peer); ++ attrs = nla_data(nla_peer) + sizeof(struct ifinfomsg); ++ len = nla_len(nla_peer) - sizeof(struct ifinfomsg); ++ ++ if (ifmp->ifi_index < 0) { ++ NL_SET_ERR_MSG_ATTR(exterr, nla_peer, ++ "ifindex can't be negative"); ++ return -EINVAL; ++ } ++ ++ return nla_parse_deprecated(tb, IFLA_MAX, attrs, len, ifla_policy, + exterr); + } +-EXPORT_SYMBOL(rtnl_nla_parse_ifla); ++EXPORT_SYMBOL(rtnl_nla_parse_ifinfomsg); + + struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) + { +-- +2.40.1 + diff --git a/queue-6.4/netfilter-nf_tables-defer-gc-run-if-previous-batch-i.patch b/queue-6.4/netfilter-nf_tables-defer-gc-run-if-previous-batch-i.patch new file mode 100644 index 00000000000..96079b38fc5 --- /dev/null +++ b/queue-6.4/netfilter-nf_tables-defer-gc-run-if-previous-batch-i.patch @@ -0,0 +1,81 @@ +From e240994b5adafca406677977ba42d7214ad9d1f2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Aug 2023 22:03:57 +0200 +Subject: netfilter: nf_tables: defer gc run if previous batch is still pending + +From: Florian Westphal + +[ Upstream commit 8e51830e29e12670b4c10df070a4ea4c9593e961 ] + +Don't queue more gc work, else we may queue the same elements multiple +times. + +If an element is flagged as dead, this can mean that either the previous +gc request was invalidated/discarded by a transaction or that the previous +request is still pending in the system work queue. + +The latter will happen if the gc interval is set to a very low value, +e.g. 1ms, and system work queue is backlogged. + +The sets refcount is 1 if no previous gc requeusts are queued, so add +a helper for this and skip gc run if old requests are pending. + +Add a helper for this and skip the gc run in this case. + +Fixes: f6c383b8c31a ("netfilter: nf_tables: adapt set backend to use GC transaction API") +Signed-off-by: Florian Westphal +Reviewed-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + include/net/netfilter/nf_tables.h | 5 +++++ + net/netfilter/nft_set_hash.c | 3 +++ + net/netfilter/nft_set_rbtree.c | 3 +++ + 3 files changed, 11 insertions(+) + +diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h +index a9a730fb9f963..394b22b44b0e8 100644 +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -586,6 +586,11 @@ static inline void *nft_set_priv(const struct nft_set *set) + return (void *)set->data; + } + ++static inline bool nft_set_gc_is_pending(const struct nft_set *s) ++{ ++ return refcount_read(&s->refs) != 1; ++} ++ + static inline struct nft_set *nft_set_container_of(const void *priv) + { + return (void *)priv - offsetof(struct nft_set, data); +diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c +index cef5df8460009..524763659f251 100644 +--- a/net/netfilter/nft_set_hash.c ++++ b/net/netfilter/nft_set_hash.c +@@ -326,6 +326,9 @@ static void nft_rhash_gc(struct work_struct *work) + nft_net = nft_pernet(net); + gc_seq = READ_ONCE(nft_net->gc_seq); + ++ if (nft_set_gc_is_pending(set)) ++ goto done; ++ + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); + if (!gc) + goto done; +diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c +index f9d4c8fcbbf82..c6435e7092319 100644 +--- a/net/netfilter/nft_set_rbtree.c ++++ b/net/netfilter/nft_set_rbtree.c +@@ -611,6 +611,9 @@ static void nft_rbtree_gc(struct work_struct *work) + nft_net = nft_pernet(net); + gc_seq = READ_ONCE(nft_net->gc_seq); + ++ if (nft_set_gc_is_pending(set)) ++ goto done; ++ + gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); + if (!gc) + goto done; +-- +2.40.1 + diff --git a/queue-6.4/netfilter-nf_tables-fix-out-of-memory-error-handling.patch b/queue-6.4/netfilter-nf_tables-fix-out-of-memory-error-handling.patch new file mode 100644 index 00000000000..a45d8ecbcbd --- /dev/null +++ b/queue-6.4/netfilter-nf_tables-fix-out-of-memory-error-handling.patch @@ -0,0 +1,65 @@ +From df204178d9a07bce88929df48bddc46c14111f0a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Aug 2023 19:49:52 +0200 +Subject: netfilter: nf_tables: fix out of memory error handling + +From: Florian Westphal + +[ Upstream commit 5e1be4cdc98c989d5387ce94ff15b5ad06a5b681 ] + +Several instances of pipapo_resize() don't propagate allocation failures, +this causes a crash when fault injection is enabled for gfp_kernel slabs. + +Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") +Signed-off-by: Florian Westphal +Reviewed-by: Stefano Brivio +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_set_pipapo.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c +index 352180b123fc7..58bd514260b90 100644 +--- a/net/netfilter/nft_set_pipapo.c ++++ b/net/netfilter/nft_set_pipapo.c +@@ -902,12 +902,14 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f) + static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k, + int mask_bits) + { +- int rule = f->rules++, group, ret, bit_offset = 0; ++ int rule = f->rules, group, ret, bit_offset = 0; + +- ret = pipapo_resize(f, f->rules - 1, f->rules); ++ ret = pipapo_resize(f, f->rules, f->rules + 1); + if (ret) + return ret; + ++ f->rules++; ++ + for (group = 0; group < f->groups; group++) { + int i, v; + u8 mask; +@@ -1052,7 +1054,9 @@ static int pipapo_expand(struct nft_pipapo_field *f, + step++; + if (step >= len) { + if (!masks) { +- pipapo_insert(f, base, 0); ++ err = pipapo_insert(f, base, 0); ++ if (err < 0) ++ return err; + masks = 1; + } + goto out; +@@ -1235,6 +1239,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, + else + ret = pipapo_expand(f, start, end, f->groups * f->bb); + ++ if (ret < 0) ++ return ret; ++ + if (f->bsize > bsize_max) + bsize_max = f->bsize; + +-- +2.40.1 + diff --git a/queue-6.4/netfilter-nf_tables-flush-pending-destroy-work-befor.patch b/queue-6.4/netfilter-nf_tables-flush-pending-destroy-work-befor.patch new file mode 100644 index 00000000000..bcd15d33f38 --- /dev/null +++ b/queue-6.4/netfilter-nf_tables-flush-pending-destroy-work-befor.patch @@ -0,0 +1,44 @@ +From a0227f3bd4faa1bff72b7b65e1c309cfc3fb4088 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Aug 2023 01:13:31 +0200 +Subject: netfilter: nf_tables: flush pending destroy work before netlink + notifier + +From: Pablo Neira Ayuso + +[ Upstream commit 2c9f0293280e258606e54ed2b96fa71498432eae ] + +Destroy work waits for the RCU grace period then it releases the objects +with no mutex held. All releases objects follow this path for +transactions, therefore, order is guaranteed and references to top-level +objects in the hierarchy remain valid. + +However, netlink notifier might interfer with pending destroy work. +rcu_barrier() is not correct because objects are not release via RCU +callback. Flush destroy work before releasing objects from netlink +notifier path. + +Fixes: d4bc8271db21 ("netfilter: nf_tables: netlink notifier might race to release objects") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Florian Westphal +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 5275c4112b57d..539bc5d5c12fd 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -11084,7 +11084,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, + gc_seq = nft_gc_seq_begin(nft_net); + + if (!list_empty(&nf_tables_destroy_list)) +- rcu_barrier(); ++ nf_tables_trans_destroy_flush_work(); + again: + list_for_each_entry(table, &nft_net->tables, list) { + if (nft_table_has_owner(table) && +-- +2.40.1 + diff --git a/queue-6.4/netfilter-nf_tables-gc-transaction-race-with-abort-p.patch b/queue-6.4/netfilter-nf_tables-gc-transaction-race-with-abort-p.patch new file mode 100644 index 00000000000..dc0448e0b34 --- /dev/null +++ b/queue-6.4/netfilter-nf_tables-gc-transaction-race-with-abort-p.patch @@ -0,0 +1,42 @@ +From c5ae402fc5d60b333ec3f414dccf725b3221b6c7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Aug 2023 01:13:52 +0200 +Subject: netfilter: nf_tables: GC transaction race with abort path + +From: Pablo Neira Ayuso + +[ Upstream commit 720344340fb9be2765bbaab7b292ece0a4570eae ] + +Abort path is missing a synchronization point with GC transactions. Add +GC sequence number hence any GC transaction losing race will be +discarded. + +Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Florian Westphal +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 539bc5d5c12fd..9cd8c14a0faf4 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -10348,8 +10348,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb, + enum nfnl_abort_action action) + { + struct nftables_pernet *nft_net = nft_pernet(net); +- int ret = __nf_tables_abort(net, action); ++ unsigned int gc_seq; ++ int ret; + ++ gc_seq = nft_gc_seq_begin(nft_net); ++ ret = __nf_tables_abort(net, action); ++ nft_gc_seq_end(nft_net, gc_seq); + mutex_unlock(&nft_net->commit_mutex); + + return ret; +-- +2.40.1 + diff --git a/queue-6.4/netfilter-nf_tables-use-correct-lock-to-protect-gc_l.patch b/queue-6.4/netfilter-nf_tables-use-correct-lock-to-protect-gc_l.patch new file mode 100644 index 00000000000..538878b8229 --- /dev/null +++ b/queue-6.4/netfilter-nf_tables-use-correct-lock-to-protect-gc_l.patch @@ -0,0 +1,39 @@ +From ecade17e4529d650ce9796bfc8bc94b96423d928 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Aug 2023 14:33:32 +0200 +Subject: netfilter: nf_tables: use correct lock to protect gc_list + +From: Pablo Neira Ayuso + +[ Upstream commit 8357bc946a2abc2a10ca40e5a2105d2b4c57515e ] + +Use nf_tables_gc_list_lock spinlock, not nf_tables_destroy_list_lock to +protect the gc list. + +Fixes: 5f68718b34a5 ("netfilter: nf_tables: GC transaction API to avoid race with control plane") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Florian Westphal +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 9cd8c14a0faf4..ad38f84a8f11a 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -9470,9 +9470,9 @@ static void nft_trans_gc_work(struct work_struct *work) + struct nft_trans_gc *trans, *next; + LIST_HEAD(trans_gc_list); + +- spin_lock(&nf_tables_destroy_list_lock); ++ spin_lock(&nf_tables_gc_list_lock); + list_splice_init(&nf_tables_gc_list, &trans_gc_list); +- spin_unlock(&nf_tables_destroy_list_lock); ++ spin_unlock(&nf_tables_gc_list_lock); + + list_for_each_entry_safe(trans, next, &trans_gc_list, list) { + list_del(&trans->list); +-- +2.40.1 + diff --git a/queue-6.4/netfilter-nf_tables-validate-all-pending-tables.patch b/queue-6.4/netfilter-nf_tables-validate-all-pending-tables.patch new file mode 100644 index 00000000000..a9992cbb7ef --- /dev/null +++ b/queue-6.4/netfilter-nf_tables-validate-all-pending-tables.patch @@ -0,0 +1,100 @@ +From ee214106c800c055d8fb979168cf039fe1a3da95 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Aug 2023 20:28:32 +0200 +Subject: netfilter: nf_tables: validate all pending tables + +From: Florian Westphal + +[ Upstream commit 4b80ced971b0d118f9a11dd503a5833a5016de92 ] + +We have to validate all tables in the transaction that are in +VALIDATE_DO state, the blamed commit below did not move the break +statement to its right location so we only validate one table. + +Moreover, we can't init table->validate to _SKIP when a table object +is allocated. + +If we do, then if a transcaction creates a new table and then +fails the transaction, nfnetlink will loop and nft will hang until +user cancels the command. + +Add back the pernet state as a place to stash the last state encountered. +This is either _DO (we hit an error during commit validation) or _SKIP +(transaction passed all checks). + +Fixes: 00c320f9b755 ("netfilter: nf_tables: make validation state per table") +Reported-by: Pablo Neira Ayuso +Signed-off-by: Florian Westphal +Signed-off-by: Sasha Levin +--- + include/net/netfilter/nf_tables.h | 1 + + net/netfilter/nf_tables_api.c | 11 +++++++---- + 2 files changed, 8 insertions(+), 4 deletions(-) + +diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h +index ad97049e28881..a9a730fb9f963 100644 +--- a/include/net/netfilter/nf_tables.h ++++ b/include/net/netfilter/nf_tables.h +@@ -1817,6 +1817,7 @@ struct nftables_pernet { + u64 table_handle; + unsigned int base_seq; + unsigned int gc_seq; ++ u8 validate_state; + }; + + extern unsigned int nf_tables_net_id; +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index b280b151a9e98..5275c4112b57d 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -1372,7 +1372,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info, + if (table == NULL) + goto err_kzalloc; + +- table->validate_state = NFT_VALIDATE_SKIP; ++ table->validate_state = nft_net->validate_state; + table->name = nla_strdup(attr, GFP_KERNEL_ACCOUNT); + if (table->name == NULL) + goto err_strdup; +@@ -9065,9 +9065,8 @@ static int nf_tables_validate(struct net *net) + return -EAGAIN; + + nft_validate_state_update(table, NFT_VALIDATE_SKIP); ++ break; + } +- +- break; + } + + return 0; +@@ -9813,8 +9812,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) + } + + /* 0. Validate ruleset, otherwise roll back for error reporting. */ +- if (nf_tables_validate(net) < 0) ++ if (nf_tables_validate(net) < 0) { ++ nft_net->validate_state = NFT_VALIDATE_DO; + return -EAGAIN; ++ } + + err = nft_flow_rule_offload_commit(net); + if (err < 0) +@@ -10070,6 +10071,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) + nf_tables_commit_audit_log(&adl, nft_net->base_seq); + + nft_gc_seq_end(nft_net, gc_seq); ++ nft_net->validate_state = NFT_VALIDATE_SKIP; + nf_tables_commit_release(net); + + return 0; +@@ -11126,6 +11128,7 @@ static int __net_init nf_tables_init_net(struct net *net) + mutex_init(&nft_net->commit_mutex); + nft_net->base_seq = 1; + nft_net->gc_seq = 0; ++ nft_net->validate_state = NFT_VALIDATE_SKIP; + + return 0; + } +-- +2.40.1 + diff --git a/queue-6.4/nfsv4-fix-out-path-in-__nfs4_get_acl_uncached.patch b/queue-6.4/nfsv4-fix-out-path-in-__nfs4_get_acl_uncached.patch new file mode 100644 index 00000000000..04543110b1d --- /dev/null +++ b/queue-6.4/nfsv4-fix-out-path-in-__nfs4_get_acl_uncached.patch @@ -0,0 +1,46 @@ +From fc307541c3e4c56d0fe0c735e2aa9074be9a745c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 25 Jul 2023 14:59:30 +0300 +Subject: NFSv4: fix out path in __nfs4_get_acl_uncached + +From: Fedor Pchelkin + +[ Upstream commit f4e89f1a6dab4c063fc1e823cc9dddc408ff40cf ] + +Another highly rare error case when a page allocating loop (inside +__nfs4_get_acl_uncached, this time) is not properly unwound on error. +Since pages array is allocated being uninitialized, need to free only +lower array indices. NULL checks were useful before commit 62a1573fcf84 +("NFSv4 fix acl retrieval over krb5i/krb5p mounts") when the array had +been initialized to zero on stack. + +Found by Linux Verification Center (linuxtesting.org). + +Fixes: 62a1573fcf84 ("NFSv4 fix acl retrieval over krb5i/krb5p mounts") +Signed-off-by: Fedor Pchelkin +Reviewed-by: Benjamin Coddington +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs4proc.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c +index 9faba2dac11dd..f16742b8e0e21 100644 +--- a/fs/nfs/nfs4proc.c ++++ b/fs/nfs/nfs4proc.c +@@ -6004,9 +6004,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, + out_ok: + ret = res.acl_len; + out_free: +- for (i = 0; i < npages; i++) +- if (pages[i]) +- __free_page(pages[i]); ++ while (--i >= 0) ++ __free_page(pages[i]); + if (res.acl_scratch) + __free_page(res.acl_scratch); + kfree(pages); +-- +2.40.1 + diff --git a/queue-6.4/nfsv4.2-fix-error-handling-in-nfs42_proc_getxattr.patch b/queue-6.4/nfsv4.2-fix-error-handling-in-nfs42_proc_getxattr.patch new file mode 100644 index 00000000000..cebc962b935 --- /dev/null +++ b/queue-6.4/nfsv4.2-fix-error-handling-in-nfs42_proc_getxattr.patch @@ -0,0 +1,51 @@ +From 92598baddb560829e2d578deeceed592d1db3aec Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 25 Jul 2023 14:58:58 +0300 +Subject: NFSv4.2: fix error handling in nfs42_proc_getxattr + +From: Fedor Pchelkin + +[ Upstream commit 4e3733fd2b0f677faae21cf838a43faf317986d3 ] + +There is a slight issue with error handling code inside +nfs42_proc_getxattr(). If page allocating loop fails then we free the +failing page array element which is NULL but __free_page() can't deal with +NULL args. + +Found by Linux Verification Center (linuxtesting.org). + +Fixes: a1f26739ccdc ("NFSv4.2: improve page handling for GETXATTR") +Signed-off-by: Fedor Pchelkin +Reviewed-by: Benjamin Coddington +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/nfs42proc.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c +index 93e306bf4430f..5d7e0511f3513 100644 +--- a/fs/nfs/nfs42proc.c ++++ b/fs/nfs/nfs42proc.c +@@ -1360,7 +1360,6 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name, + for (i = 0; i < np; i++) { + pages[i] = alloc_page(GFP_KERNEL); + if (!pages[i]) { +- np = i + 1; + err = -ENOMEM; + goto out; + } +@@ -1384,8 +1383,8 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name, + } while (exception.retry); + + out: +- while (--np >= 0) +- __free_page(pages[np]); ++ while (--i >= 0) ++ __free_page(pages[i]); + kfree(pages); + + return err; +-- +2.40.1 + diff --git a/queue-6.4/octeontx2-af-sdp-fix-receive-link-config.patch b/queue-6.4/octeontx2-af-sdp-fix-receive-link-config.patch new file mode 100644 index 00000000000..a943553bbf9 --- /dev/null +++ b/queue-6.4/octeontx2-af-sdp-fix-receive-link-config.patch @@ -0,0 +1,45 @@ +From 50763b304335eb70f351713e68801a564cf97d12 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Aug 2023 12:00:06 +0530 +Subject: octeontx2-af: SDP: fix receive link config + +From: Hariprasad Kelam + +[ Upstream commit 05f3d5bc23524bed6f043dfe6b44da687584f9fb ] + +On SDP interfaces, frame oversize and undersize errors are +observed as driver is not considering packet sizes of all +subscribers of the link before updating the link config. + +This patch fixes the same. + +Fixes: 9b7dd87ac071 ("octeontx2-af: Support to modify min/max allowed packet lengths") +Signed-off-by: Hariprasad Kelam +Signed-off-by: Sunil Goutham +Reviewed-by: Leon Romanovsky +Link: https://lore.kernel.org/r/20230817063006.10366-1-hkelam@marvell.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +index 8cdf91a5bf44f..49c1dbe5ec788 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c ++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +@@ -4016,9 +4016,10 @@ int rvu_mbox_handler_nix_set_hw_frs(struct rvu *rvu, struct nix_frs_cfg *req, + if (link < 0) + return NIX_AF_ERR_RX_LINK_INVALID; + +- nix_find_link_frs(rvu, req, pcifunc); + + linkcfg: ++ nix_find_link_frs(rvu, req, pcifunc); ++ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link)); + cfg = (cfg & ~(0xFFFFULL << 16)) | ((u64)req->maxlen << 16); + if (req->update_minlen) +-- +2.40.1 + diff --git a/queue-6.4/pci-acpiphp-reassign-resources-on-bridge-if-necessar.patch b/queue-6.4/pci-acpiphp-reassign-resources-on-bridge-if-necessar.patch new file mode 100644 index 00000000000..b9b3961cf9b --- /dev/null +++ b/queue-6.4/pci-acpiphp-reassign-resources-on-bridge-if-necessar.patch @@ -0,0 +1,83 @@ +From d7ba52da788cb5faff173aa978eb8efa091f7846 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Apr 2023 21:15:57 +0200 +Subject: PCI: acpiphp: Reassign resources on bridge if necessary + +From: Igor Mammedov + +[ Upstream commit 40613da52b13fb21c5566f10b287e0ca8c12c4e9 ] + +When using ACPI PCI hotplug, hotplugging a device with large BARs may fail +if bridge windows programmed by firmware are not large enough. + +Reproducer: + $ qemu-kvm -monitor stdio -M q35 -m 4G \ + -global ICH9-LPC.acpi-pci-hotplug-with-bridge-support=on \ + -device id=rp1,pcie-root-port,bus=pcie.0,chassis=4 \ + disk_image + + wait till linux guest boots, then hotplug device: + (qemu) device_add qxl,bus=rp1 + + hotplug on guest side fails with: + pci 0000:01:00.0: [1b36:0100] type 00 class 0x038000 + pci 0000:01:00.0: reg 0x10: [mem 0x00000000-0x03ffffff] + pci 0000:01:00.0: reg 0x14: [mem 0x00000000-0x03ffffff] + pci 0000:01:00.0: reg 0x18: [mem 0x00000000-0x00001fff] + pci 0000:01:00.0: reg 0x1c: [io 0x0000-0x001f] + pci 0000:01:00.0: BAR 0: no space for [mem size 0x04000000] + pci 0000:01:00.0: BAR 0: failed to assign [mem size 0x04000000] + pci 0000:01:00.0: BAR 1: no space for [mem size 0x04000000] + pci 0000:01:00.0: BAR 1: failed to assign [mem size 0x04000000] + pci 0000:01:00.0: BAR 2: assigned [mem 0xfe800000-0xfe801fff] + pci 0000:01:00.0: BAR 3: assigned [io 0x1000-0x101f] + qxl 0000:01:00.0: enabling device (0000 -> 0003) + Unable to create vram_mapping + qxl: probe of 0000:01:00.0 failed with error -12 + +However when using native PCIe hotplug + '-global ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off' +it works fine, since kernel attempts to reassign unused resources. + +Use the same machinery as native PCIe hotplug to (re)assign resources. + +Link: https://lore.kernel.org/r/20230424191557.2464760-1-imammedo@redhat.com +Signed-off-by: Igor Mammedov +Signed-off-by: Bjorn Helgaas +Acked-by: Michael S. Tsirkin +Acked-by: Rafael J. Wysocki +Cc: stable@vger.kernel.org +Signed-off-by: Sasha Levin +--- + drivers/pci/hotplug/acpiphp_glue.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c +index 5b1f271c6034b..328d1e4160147 100644 +--- a/drivers/pci/hotplug/acpiphp_glue.c ++++ b/drivers/pci/hotplug/acpiphp_glue.c +@@ -498,7 +498,6 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge) + acpiphp_native_scan_bridge(dev); + } + } else { +- LIST_HEAD(add_list); + int max, pass; + + acpiphp_rescan_slot(slot); +@@ -512,12 +511,10 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge) + if (pass && dev->subordinate) { + check_hotplug_bridge(slot, dev); + pcibios_resource_survey_bus(dev->subordinate); +- __pci_bus_size_bridges(dev->subordinate, +- &add_list); + } + } + } +- __pci_bus_assign_resources(bus, &add_list, NULL); ++ pci_assign_unassigned_bridge_resources(bus->self); + } + + acpiphp_sanitize_bus(bus); +-- +2.40.1 + diff --git a/queue-6.4/revert-ice-fix-ice-vf-reset-during-iavf-initializati.patch b/queue-6.4/revert-ice-fix-ice-vf-reset-during-iavf-initializati.patch new file mode 100644 index 00000000000..69f2d0af246 --- /dev/null +++ b/queue-6.4/revert-ice-fix-ice-vf-reset-during-iavf-initializati.patch @@ -0,0 +1,131 @@ +From 939dc92646342e3dd9812dca419ed611ca73602d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 11 Aug 2023 10:07:01 +0200 +Subject: Revert "ice: Fix ice VF reset during iavf initialization" + +From: Petr Oros + +[ Upstream commit 0ecff05e6c59dd82dbcb9706db911f7fd9f40fb8 ] + +This reverts commit 7255355a0636b4eff08d5e8139c77d98f151c4fc. + +After this commit we are not able to attach VF to VM: +virsh attach-interface v0 hostdev --managed 0000:41:01.0 --mac 52:52:52:52:52:52 +error: Failed to attach interface +error: Cannot set interface MAC to 52:52:52:52:52:52 for ifname enp65s0f0np0 vf 0: Resource temporarily unavailable + +ice_check_vf_ready_for_cfg() already contain waiting for reset. +New condition in ice_check_vf_ready_for_reset() causing only problems. + +Fixes: 7255355a0636 ("ice: Fix ice VF reset during iavf initialization") +Signed-off-by: Petr Oros +Reviewed-by: Simon Horman +Reviewed-by: Przemek Kitszel +Reviewed-by: Jacob Keller +Tested-by: Rafal Romanowski +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_sriov.c | 8 ++++---- + drivers/net/ethernet/intel/ice/ice_vf_lib.c | 19 ------------------- + drivers/net/ethernet/intel/ice/ice_vf_lib.h | 1 - + drivers/net/ethernet/intel/ice/ice_virtchnl.c | 1 - + 4 files changed, 4 insertions(+), 25 deletions(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c +index 588ad8696756d..f1dca59bd8449 100644 +--- a/drivers/net/ethernet/intel/ice/ice_sriov.c ++++ b/drivers/net/ethernet/intel/ice/ice_sriov.c +@@ -1171,7 +1171,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) + if (!vf) + return -EINVAL; + +- ret = ice_check_vf_ready_for_reset(vf); ++ ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + goto out_put_vf; + +@@ -1286,7 +1286,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) + goto out_put_vf; + } + +- ret = ice_check_vf_ready_for_reset(vf); ++ ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + goto out_put_vf; + +@@ -1340,7 +1340,7 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) + return -EOPNOTSUPP; + } + +- ret = ice_check_vf_ready_for_reset(vf); ++ ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + goto out_put_vf; + +@@ -1653,7 +1653,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, + if (!vf) + return -EINVAL; + +- ret = ice_check_vf_ready_for_reset(vf); ++ ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + goto out_put_vf; + +diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c +index bf74a2f3a4f8c..89fd6982df093 100644 +--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c ++++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c +@@ -185,25 +185,6 @@ int ice_check_vf_ready_for_cfg(struct ice_vf *vf) + return 0; + } + +-/** +- * ice_check_vf_ready_for_reset - check if VF is ready to be reset +- * @vf: VF to check if it's ready to be reset +- * +- * The purpose of this function is to ensure that the VF is not in reset, +- * disabled, and is both initialized and active, thus enabling us to safely +- * initialize another reset. +- */ +-int ice_check_vf_ready_for_reset(struct ice_vf *vf) +-{ +- int ret; +- +- ret = ice_check_vf_ready_for_cfg(vf); +- if (!ret && !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) +- ret = -EAGAIN; +- +- return ret; +-} +- + /** + * ice_trigger_vf_reset - Reset a VF on HW + * @vf: pointer to the VF structure +diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h +index a38ef00a36794..e3cda6fb71ab1 100644 +--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h ++++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h +@@ -215,7 +215,6 @@ u16 ice_get_num_vfs(struct ice_pf *pf); + struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf); + bool ice_is_vf_disabled(struct ice_vf *vf); + int ice_check_vf_ready_for_cfg(struct ice_vf *vf); +-int ice_check_vf_ready_for_reset(struct ice_vf *vf); + void ice_set_vf_state_dis(struct ice_vf *vf); + bool ice_is_any_vf_in_unicast_promisc(struct ice_pf *pf); + void +diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c +index f4a524f80b110..97243c616d5d6 100644 +--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c ++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c +@@ -3955,7 +3955,6 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event, + ice_vc_notify_vf_link_state(vf); + break; + case VIRTCHNL_OP_RESET_VF: +- clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states); + ops->reset_vf(vf); + break; + case VIRTCHNL_OP_ADD_ETH_ADDR: +-- +2.40.1 + diff --git a/queue-6.4/rtnetlink-reject-negative-ifindexes-in-rtm_newlink.patch b/queue-6.4/rtnetlink-reject-negative-ifindexes-in-rtm_newlink.patch new file mode 100644 index 00000000000..cc7dd243d72 --- /dev/null +++ b/queue-6.4/rtnetlink-reject-negative-ifindexes-in-rtm_newlink.patch @@ -0,0 +1,68 @@ +From 1e81b46bd49f5fc6fcb6bc1647c1d583007a8ce3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Aug 2023 09:43:48 +0300 +Subject: rtnetlink: Reject negative ifindexes in RTM_NEWLINK + +From: Ido Schimmel + +[ Upstream commit 30188bd7838c16a98a520db1fe9df01ffc6ed368 ] + +Negative ifindexes are illegal, but the kernel does not validate the +ifindex in the ancillary header of RTM_NEWLINK messages, resulting in +the kernel generating a warning [1] when such an ifindex is specified. + +Fix by rejecting negative ifindexes. + +[1] +WARNING: CPU: 0 PID: 5031 at net/core/dev.c:9593 dev_index_reserve+0x1a2/0x1c0 net/core/dev.c:9593 +[...] +Call Trace: + + register_netdevice+0x69a/0x1490 net/core/dev.c:10081 + br_dev_newlink+0x27/0x110 net/bridge/br_netlink.c:1552 + rtnl_newlink_create net/core/rtnetlink.c:3471 [inline] + __rtnl_newlink+0x115e/0x18c0 net/core/rtnetlink.c:3688 + rtnl_newlink+0x67/0xa0 net/core/rtnetlink.c:3701 + rtnetlink_rcv_msg+0x439/0xd30 net/core/rtnetlink.c:6427 + netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2545 + netlink_unicast_kernel net/netlink/af_netlink.c:1342 [inline] + netlink_unicast+0x536/0x810 net/netlink/af_netlink.c:1368 + netlink_sendmsg+0x93c/0xe40 net/netlink/af_netlink.c:1910 + sock_sendmsg_nosec net/socket.c:728 [inline] + sock_sendmsg+0xd9/0x180 net/socket.c:751 + ____sys_sendmsg+0x6ac/0x940 net/socket.c:2538 + ___sys_sendmsg+0x135/0x1d0 net/socket.c:2592 + __sys_sendmsg+0x117/0x1e0 net/socket.c:2621 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x63/0xcd + +Fixes: 38f7b870d4a6 ("[RTNETLINK]: Link creation API") +Reported-by: syzbot+5ba06978f34abb058571@syzkaller.appspotmail.com +Signed-off-by: Ido Schimmel +Reviewed-by: Jiri Pirko +Reviewed-by: Jakub Kicinski +Link: https://lore.kernel.org/r/20230823064348.2252280-1-idosch@nvidia.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/core/rtnetlink.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c +index baa323ca37c42..fd6d2430d40ff 100644 +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -3560,6 +3560,9 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, + if (ifm->ifi_index > 0) { + link_specified = true; + dev = __dev_get_by_index(net, ifm->ifi_index); ++ } else if (ifm->ifi_index < 0) { ++ NL_SET_ERR_MSG(extack, "ifindex can't be negative"); ++ return -EINVAL; + } else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) { + link_specified = true; + dev = rtnl_dev_get(net, tb); +-- +2.40.1 + diff --git a/queue-6.4/selftests-bonding-do-not-set-port-down-before-adding.patch b/queue-6.4/selftests-bonding-do-not-set-port-down-before-adding.patch new file mode 100644 index 00000000000..604df2762c7 --- /dev/null +++ b/queue-6.4/selftests-bonding-do-not-set-port-down-before-adding.patch @@ -0,0 +1,48 @@ +From 705cd022bdd70a7090e9725cea5be9ed0e8ef2f6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Aug 2023 16:24:59 +0800 +Subject: selftests: bonding: do not set port down before adding to bond + +From: Hangbin Liu + +[ Upstream commit be809424659c2844a2d7ab653aacca4898538023 ] + +Before adding a port to bond, it need to be set down first. In the +lacpdu test the author set the port down specifically. But commit +a4abfa627c38 ("net: rtnetlink: Enslave device before bringing it up") +changed the operation order, the kernel will set the port down _after_ +adding to bond. So all the ports will be down at last and the test failed. + +In fact, the veth interfaces are already inactive when added. This +means there's no need to set them down again before adding to the bond. +Let's just remove the link down operation. + +Fixes: a4abfa627c38 ("net: rtnetlink: Enslave device before bringing it up") +Reported-by: Zhengchao Shao +Closes: https://lore.kernel.org/netdev/a0ef07c7-91b0-94bd-240d-944a330fcabd@huawei.com/ +Signed-off-by: Hangbin Liu +Link: https://lore.kernel.org/r/20230817082459.1685972-1-liuhangbin@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh +index 47ab90596acb2..6358df5752f90 100755 +--- a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh ++++ b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh +@@ -57,8 +57,8 @@ ip link add name veth2-bond type veth peer name veth2-end + + # add ports + ip link set fbond master fab-br0 +-ip link set veth1-bond down master fbond +-ip link set veth2-bond down master fbond ++ip link set veth1-bond master fbond ++ip link set veth2-bond master fbond + + # bring up + ip link set veth1-end up +-- +2.40.1 + diff --git a/queue-6.4/selftests-mlxsw-fix-test-failure-on-spectrum-4.patch b/queue-6.4/selftests-mlxsw-fix-test-failure-on-spectrum-4.patch new file mode 100644 index 00000000000..df5db6b13bb --- /dev/null +++ b/queue-6.4/selftests-mlxsw-fix-test-failure-on-spectrum-4.patch @@ -0,0 +1,83 @@ +From 08f47c17c394f865385311b414e5cc8ae92f120e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Aug 2023 15:58:25 +0200 +Subject: selftests: mlxsw: Fix test failure on Spectrum-4 + +From: Ido Schimmel + +[ Upstream commit f520489e99a35b0a5257667274fbe9afd2d8c50b ] + +Remove assumptions about shared buffer cell size and instead query the +cell size from devlink. Adjust the test to send small packets that fit +inside a single cell. + +Tested on Spectrum-{1,2,3,4}. + +Fixes: 4735402173e6 ("mlxsw: spectrum: Extend to support Spectrum-4 ASIC") +Signed-off-by: Ido Schimmel +Reviewed-by: Petr Machata +Signed-off-by: Petr Machata +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/f7dfbf3c4d1cb23838d9eb99bab09afaa320c4ca.1692268427.git.petrm@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + .../selftests/drivers/net/mlxsw/sharedbuffer.sh | 16 ++++++---------- + 1 file changed, 6 insertions(+), 10 deletions(-) + +diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +index 7d9e73a43a49b..0c47faff9274b 100755 +--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh ++++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +@@ -98,12 +98,12 @@ sb_occ_etc_check() + + port_pool_test() + { +- local exp_max_occ=288 ++ local exp_max_occ=$(devlink_cell_size_get) + local max_occ + + devlink sb occupancy clearmax $DEVLINK_DEV + +- $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ ++ $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ + -t ip -q + + devlink sb occupancy snapshot $DEVLINK_DEV +@@ -126,12 +126,12 @@ port_pool_test() + + port_tc_ip_test() + { +- local exp_max_occ=288 ++ local exp_max_occ=$(devlink_cell_size_get) + local max_occ + + devlink sb occupancy clearmax $DEVLINK_DEV + +- $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ ++ $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ + -t ip -q + + devlink sb occupancy snapshot $DEVLINK_DEV +@@ -154,16 +154,12 @@ port_tc_ip_test() + + port_tc_arp_test() + { +- local exp_max_occ=96 ++ local exp_max_occ=$(devlink_cell_size_get) + local max_occ + +- if [[ $MLXSW_CHIP != "mlxsw_spectrum" ]]; then +- exp_max_occ=144 +- fi +- + devlink sb occupancy clearmax $DEVLINK_DEV + +- $MZ $h1 -c 1 -p 160 -a $h1mac -A 192.0.1.1 -t arp -q ++ $MZ $h1 -c 1 -p 10 -a $h1mac -A 192.0.1.1 -t arp -q + + devlink sb occupancy snapshot $DEVLINK_DEV + +-- +2.40.1 + diff --git a/queue-6.4/series b/queue-6.4/series new file mode 100644 index 00000000000..e38d08d61eb --- /dev/null +++ b/queue-6.4/series @@ -0,0 +1,51 @@ +nfsv4.2-fix-error-handling-in-nfs42_proc_getxattr.patch +nfsv4-fix-out-path-in-__nfs4_get_acl_uncached.patch +xprtrdma-remap-receive-buffers-after-a-reconnect.patch +pci-acpiphp-reassign-resources-on-bridge-if-necessar.patch +mm-disable-config_per_vma_lock-until-its-fixed.patch +jbd2-remove-t_checkpoint_io_list.patch +jbd2-remove-journal_clean_one_cp_list.patch +jbd2-fix-a-race-when-checking-checkpoint-buffer-busy.patch +can-raw-fix-receiver-memory-leak.patch +can-raw-fix-lockdep-issue-in-raw_release.patch +wifi-iwlwifi-mvm-add-dependency-for-ptp-clock.patch +tracing-fix-cpu-buffers-unavailable-due-to-record_di.patch +tracing-synthetic-use-union-instead-of-casts.patch +tracing-synthetic-skip-first-entry-for-stack-traces.patch +tracing-synthetic-allocate-one-additional-element-fo.patch +tracing-fix-memleak-due-to-race-between-current_trac.patch +octeontx2-af-sdp-fix-receive-link-config.patch +devlink-add-missing-unregister-linecard-notification.patch +net-dsa-felix-fix-oversize-frame-dropping-for-always.patch +sock-annotate-data-races-around-prot-memory_pressure.patch +dccp-annotate-data-races-in-dccp_poll.patch +ipvlan-fix-a-reference-count-leak-warning-in-ipvlan_.patch +mlxsw-pci-set-time-stamp-fields-also-when-its-type-i.patch +mlxsw-reg-fix-sspr-register-layout.patch +mlxsw-fix-the-size-of-virt_router_msb.patch +selftests-mlxsw-fix-test-failure-on-spectrum-4.patch +net-dsa-mt7530-fix-handling-of-802.1x-pae-frames.patch +net-mdio-mdio-bitbang-fix-c45-read-write-protocol.patch +net-bgmac-fix-return-value-check-for-fixed_phy_regis.patch +net-bcmgenet-fix-return-value-check-for-fixed_phy_re.patch +net-validate-veth-and-vxcan-peer-ifindexes.patch +ipv4-fix-data-races-around-inet-inet_id.patch +ice-fix-receive-buffer-size-miscalculation.patch +revert-ice-fix-ice-vf-reset-during-iavf-initializati.patch +ice-fix-null-pointer-deref-during-vf-reset.patch +selftests-bonding-do-not-set-port-down-before-adding.patch +tg3-use-slab_build_skb-when-needed.patch +net-ethernet-mtk_eth_soc-fix-null-pointer-on-hw-rese.patch +can-isotp-fix-support-for-transmission-of-sf-without.patch +igb-avoid-starting-unnecessary-workqueues.patch +igc-fix-the-typo-in-the-ptm-control-macro.patch +net-sched-fix-a-qdisc-modification-with-ambiguous-co.patch +i40e-fix-potential-null-pointer-dereferencing-of-pf-.patch +netfilter-nf_tables-validate-all-pending-tables.patch +netfilter-nf_tables-flush-pending-destroy-work-befor.patch +netfilter-nf_tables-gc-transaction-race-with-abort-p.patch +netfilter-nf_tables-use-correct-lock-to-protect-gc_l.patch +netfilter-nf_tables-fix-out-of-memory-error-handling.patch +netfilter-nf_tables-defer-gc-run-if-previous-batch-i.patch +rtnetlink-reject-negative-ifindexes-in-rtm_newlink.patch +bonding-fix-macvlan-over-alb-bond-support.patch diff --git a/queue-6.4/sock-annotate-data-races-around-prot-memory_pressure.patch b/queue-6.4/sock-annotate-data-races-around-prot-memory_pressure.patch new file mode 100644 index 00000000000..0e3aae97c6d --- /dev/null +++ b/queue-6.4/sock-annotate-data-races-around-prot-memory_pressure.patch @@ -0,0 +1,82 @@ +From 890bbe4a9db79a3f3c61499869f3688e4abf6aa2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Aug 2023 01:51:32 +0000 +Subject: sock: annotate data-races around prot->memory_pressure + +From: Eric Dumazet + +[ Upstream commit 76f33296d2e09f63118db78125c95ef56df438e9 ] + +*prot->memory_pressure is read/writen locklessly, we need +to add proper annotations. + +A recent commit added a new race, it is time to audit all accesses. + +Fixes: 2d0c88e84e48 ("sock: Fix misuse of sk_under_memory_pressure()") +Fixes: 4d93df0abd50 ("[SCTP]: Rewrite of sctp buffer management code") +Signed-off-by: Eric Dumazet +Cc: Abel Wu +Reviewed-by: Shakeel Butt +Link: https://lore.kernel.org/r/20230818015132.2699348-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/sock.h | 7 ++++--- + net/sctp/socket.c | 2 +- + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/include/net/sock.h b/include/net/sock.h +index 415f3840a26aa..d0d796d51a504 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -1324,6 +1324,7 @@ struct proto { + /* + * Pressure flag: try to collapse. + * Technical note: it is used by multiple contexts non atomically. ++ * Make sure to use READ_ONCE()/WRITE_ONCE() for all reads/writes. + * All the __sk_mem_schedule() is of this nature: accounting + * is strict, actions are advisory and have some latency. + */ +@@ -1424,7 +1425,7 @@ static inline bool sk_has_memory_pressure(const struct sock *sk) + static inline bool sk_under_global_memory_pressure(const struct sock *sk) + { + return sk->sk_prot->memory_pressure && +- !!*sk->sk_prot->memory_pressure; ++ !!READ_ONCE(*sk->sk_prot->memory_pressure); + } + + static inline bool sk_under_memory_pressure(const struct sock *sk) +@@ -1436,7 +1437,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk) + mem_cgroup_under_socket_pressure(sk->sk_memcg)) + return true; + +- return !!*sk->sk_prot->memory_pressure; ++ return !!READ_ONCE(*sk->sk_prot->memory_pressure); + } + + static inline long +@@ -1513,7 +1514,7 @@ proto_memory_pressure(struct proto *prot) + { + if (!prot->memory_pressure) + return false; +- return !!*prot->memory_pressure; ++ return !!READ_ONCE(*prot->memory_pressure); + } + + +diff --git a/net/sctp/socket.c b/net/sctp/socket.c +index ee15eff6364ee..de52045774303 100644 +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -99,7 +99,7 @@ struct percpu_counter sctp_sockets_allocated; + + static void sctp_enter_memory_pressure(struct sock *sk) + { +- sctp_memory_pressure = 1; ++ WRITE_ONCE(sctp_memory_pressure, 1); + } + + +-- +2.40.1 + diff --git a/queue-6.4/tg3-use-slab_build_skb-when-needed.patch b/queue-6.4/tg3-use-slab_build_skb-when-needed.patch new file mode 100644 index 00000000000..396e6fc9f02 --- /dev/null +++ b/queue-6.4/tg3-use-slab_build_skb-when-needed.patch @@ -0,0 +1,53 @@ +From 83f12c89fa5f2c51a3e822ee193efc620087f225 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Aug 2023 10:54:21 -0700 +Subject: tg3: Use slab_build_skb() when needed + +From: Kees Cook + +[ Upstream commit 99b415fe8986803ba0eaf6b8897b16edc8fe7ec2 ] + +The tg3 driver will use kmalloc() under some conditions. Check the +frag_size and use slab_build_skb() when frag_size is 0. Silences +the warning introduced by commit ce098da1497c ("skbuff: Introduce +slab_build_skb()"): + + Use slab_build_skb() instead + ... + tg3_poll_work+0x638/0xf90 [tg3] + +Fixes: ce098da1497c ("skbuff: Introduce slab_build_skb()") +Reported-by: Fiona Ebner +Closes: https://lore.kernel.org/all/1bd4cb9c-4eb8-3bdb-3e05-8689817242d1@proxmox.com +Cc: Siva Reddy Kallam +Cc: Prashant Sreedharan +Cc: Michael Chan +Cc: Bagas Sanjaya +Signed-off-by: Kees Cook +Reviewed-by: Pavan Chebbi +Link: https://lore.kernel.org/r/20230818175417.never.273-kees@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/tg3.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c +index 5ef073a79ce94..cb2810f175ccd 100644 +--- a/drivers/net/ethernet/broadcom/tg3.c ++++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -6881,7 +6881,10 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) + + ri->data = NULL; + +- skb = build_skb(data, frag_size); ++ if (frag_size) ++ skb = build_skb(data, frag_size); ++ else ++ skb = slab_build_skb(data); + if (!skb) { + tg3_frag_free(frag_size != 0, data); + goto drop_it_no_recycle; +-- +2.40.1 + diff --git a/queue-6.4/tracing-fix-cpu-buffers-unavailable-due-to-record_di.patch b/queue-6.4/tracing-fix-cpu-buffers-unavailable-due-to-record_di.patch new file mode 100644 index 00000000000..f19834a6cdc --- /dev/null +++ b/queue-6.4/tracing-fix-cpu-buffers-unavailable-due-to-record_di.patch @@ -0,0 +1,73 @@ +From 2c14dd6a0c5f65d935840f35f8eff9e60ed2fda6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 5 Aug 2023 11:38:15 +0800 +Subject: tracing: Fix cpu buffers unavailable due to 'record_disabled' missed + +From: Zheng Yejian + +[ Upstream commit b71645d6af10196c46cbe3732de2ea7d36b3ff6d ] + +Trace ring buffer can no longer record anything after executing +following commands at the shell prompt: + + # cd /sys/kernel/tracing + # cat tracing_cpumask + fff + # echo 0 > tracing_cpumask + # echo 1 > snapshot + # echo fff > tracing_cpumask + # echo 1 > tracing_on + # echo "hello world" > trace_marker + -bash: echo: write error: Bad file descriptor + +The root cause is that: + 1. After `echo 0 > tracing_cpumask`, 'record_disabled' of cpu buffers + in 'tr->array_buffer.buffer' became 1 (see tracing_set_cpumask()); + 2. After `echo 1 > snapshot`, 'tr->array_buffer.buffer' is swapped + with 'tr->max_buffer.buffer', then the 'record_disabled' became 0 + (see update_max_tr()); + 3. After `echo fff > tracing_cpumask`, the 'record_disabled' become -1; +Then array_buffer and max_buffer are both unavailable due to value of +'record_disabled' is not 0. + +To fix it, enable or disable both array_buffer and max_buffer at the same +time in tracing_set_cpumask(). + +Link: https://lkml.kernel.org/r/20230805033816.3284594-2-zhengyejian1@huawei.com + +Cc: +Cc: +Cc: +Fixes: 71babb2705e2 ("tracing: change CPU ring buffer state from tracing_cpumask") +Signed-off-by: Zheng Yejian +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index fd051f85efd4b..17663ce4936a4 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -5260,11 +5260,17 @@ int tracing_set_cpumask(struct trace_array *tr, + !cpumask_test_cpu(cpu, tracing_cpumask_new)) { + atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); + ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); ++#ifdef CONFIG_TRACER_MAX_TRACE ++ ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu); ++#endif + } + if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && + cpumask_test_cpu(cpu, tracing_cpumask_new)) { + atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled); + ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); ++#ifdef CONFIG_TRACER_MAX_TRACE ++ ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu); ++#endif + } + } + arch_spin_unlock(&tr->max_lock); +-- +2.40.1 + diff --git a/queue-6.4/tracing-fix-memleak-due-to-race-between-current_trac.patch b/queue-6.4/tracing-fix-memleak-due-to-race-between-current_trac.patch new file mode 100644 index 00000000000..c25d4ace703 --- /dev/null +++ b/queue-6.4/tracing-fix-memleak-due-to-race-between-current_trac.patch @@ -0,0 +1,122 @@ +From 92e0045093f7146866a6aeb8ec111d1f2f5b4370 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Aug 2023 20:55:39 +0800 +Subject: tracing: Fix memleak due to race between current_tracer and trace + +From: Zheng Yejian + +[ Upstream commit eecb91b9f98d6427d4af5fdb8f108f52572a39e7 ] + +Kmemleak report a leak in graph_trace_open(): + + unreferenced object 0xffff0040b95f4a00 (size 128): + comm "cat", pid 204981, jiffies 4301155872 (age 99771.964s) + hex dump (first 32 bytes): + e0 05 e7 b4 ab 7d 00 00 0b 00 01 00 00 00 00 00 .....}.......... + f4 00 01 10 00 a0 ff ff 00 00 00 00 65 00 10 00 ............e... + backtrace: + [<000000005db27c8b>] kmem_cache_alloc_trace+0x348/0x5f0 + [<000000007df90faa>] graph_trace_open+0xb0/0x344 + [<00000000737524cd>] __tracing_open+0x450/0xb10 + [<0000000098043327>] tracing_open+0x1a0/0x2a0 + [<00000000291c3876>] do_dentry_open+0x3c0/0xdc0 + [<000000004015bcd6>] vfs_open+0x98/0xd0 + [<000000002b5f60c9>] do_open+0x520/0x8d0 + [<00000000376c7820>] path_openat+0x1c0/0x3e0 + [<00000000336a54b5>] do_filp_open+0x14c/0x324 + [<000000002802df13>] do_sys_openat2+0x2c4/0x530 + [<0000000094eea458>] __arm64_sys_openat+0x130/0x1c4 + [<00000000a71d7881>] el0_svc_common.constprop.0+0xfc/0x394 + [<00000000313647bf>] do_el0_svc+0xac/0xec + [<000000002ef1c651>] el0_svc+0x20/0x30 + [<000000002fd4692a>] el0_sync_handler+0xb0/0xb4 + [<000000000c309c35>] el0_sync+0x160/0x180 + +The root cause is descripted as follows: + + __tracing_open() { // 1. File 'trace' is being opened; + ... + *iter->trace = *tr->current_trace; // 2. Tracer 'function_graph' is + // currently set; + ... + iter->trace->open(iter); // 3. Call graph_trace_open() here, + // and memory are allocated in it; + ... + } + + s_start() { // 4. The opened file is being read; + ... + *iter->trace = *tr->current_trace; // 5. If tracer is switched to + // 'nop' or others, then memory + // in step 3 are leaked!!! + ... + } + +To fix it, in s_start(), close tracer before switching then reopen the +new tracer after switching. And some tracers like 'wakeup' may not update +'iter->private' in some cases when reopen, then it should be cleared +to avoid being mistakenly closed again. + +Link: https://lore.kernel.org/linux-trace-kernel/20230817125539.1646321-1-zhengyejian1@huawei.com + +Fixes: d7350c3f4569 ("tracing/core: make the read callbacks reentrants") +Signed-off-by: Zheng Yejian +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace.c | 9 ++++++++- + kernel/trace/trace_irqsoff.c | 3 ++- + kernel/trace/trace_sched_wakeup.c | 2 ++ + 3 files changed, 12 insertions(+), 2 deletions(-) + +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index 17663ce4936a4..f4855be6ac2b5 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -4196,8 +4196,15 @@ static void *s_start(struct seq_file *m, loff_t *pos) + * will point to the same string as current_trace->name. + */ + mutex_lock(&trace_types_lock); +- if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) ++ if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) { ++ /* Close iter->trace before switching to the new current tracer */ ++ if (iter->trace->close) ++ iter->trace->close(iter); + *iter->trace = *tr->current_trace; ++ /* Reopen the new current tracer */ ++ if (iter->trace->open) ++ iter->trace->open(iter); ++ } + mutex_unlock(&trace_types_lock); + + #ifdef CONFIG_TRACER_MAX_TRACE +diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c +index 590b3d51afae9..ba37f768e2f27 100644 +--- a/kernel/trace/trace_irqsoff.c ++++ b/kernel/trace/trace_irqsoff.c +@@ -231,7 +231,8 @@ static void irqsoff_trace_open(struct trace_iterator *iter) + { + if (is_graph(iter->tr)) + graph_trace_open(iter); +- ++ else ++ iter->private = NULL; + } + + static void irqsoff_trace_close(struct trace_iterator *iter) +diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c +index 330aee1c1a49e..0469a04a355f2 100644 +--- a/kernel/trace/trace_sched_wakeup.c ++++ b/kernel/trace/trace_sched_wakeup.c +@@ -168,6 +168,8 @@ static void wakeup_trace_open(struct trace_iterator *iter) + { + if (is_graph(iter->tr)) + graph_trace_open(iter); ++ else ++ iter->private = NULL; + } + + static void wakeup_trace_close(struct trace_iterator *iter) +-- +2.40.1 + diff --git a/queue-6.4/tracing-synthetic-allocate-one-additional-element-fo.patch b/queue-6.4/tracing-synthetic-allocate-one-additional-element-fo.patch new file mode 100644 index 00000000000..87f50098200 --- /dev/null +++ b/queue-6.4/tracing-synthetic-allocate-one-additional-element-fo.patch @@ -0,0 +1,52 @@ +From 714fb1ae9f915d04b756f7421c3fbe1c4287e8a6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 16 Aug 2023 17:49:28 +0200 +Subject: tracing/synthetic: Allocate one additional element for size + +From: Sven Schnelle + +[ Upstream commit c4d6b5438116c184027b2e911c0f2c7c406fb47c ] + +While debugging another issue I noticed that the stack trace contains one +invalid entry at the end: + +-0 [008] d..4. 26.484201: wake_lat: pid=0 delta=2629976084 000000009cc24024 stack=STACK: +=> __schedule+0xac6/0x1a98 +=> schedule+0x126/0x2c0 +=> schedule_timeout+0x150/0x2c0 +=> kcompactd+0x9ca/0xc20 +=> kthread+0x2f6/0x3d8 +=> __ret_from_fork+0x8a/0xe8 +=> 0x6b6b6b6b6b6b6b6b + +This is because the code failed to add the one element containing the +number of entries to field_size. + +Link: https://lkml.kernel.org/r/20230816154928.4171614-4-svens@linux.ibm.com + +Cc: Masami Hiramatsu +Fixes: 00cf3d672a9d ("tracing: Allow synthetic events to pass around stacktraces") +Signed-off-by: Sven Schnelle +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace_events_synth.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c +index a4c58a932dfa6..32109d092b10f 100644 +--- a/kernel/trace/trace_events_synth.c ++++ b/kernel/trace/trace_events_synth.c +@@ -528,7 +528,8 @@ static notrace void trace_event_raw_event_synth(void *__data, + str_val = (char *)(long)var_ref_vals[val_idx]; + + if (event->dynamic_fields[i]->is_stack) { +- len = *((unsigned long *)str_val); ++ /* reserve one extra element for size */ ++ len = *((unsigned long *)str_val) + 1; + len *= sizeof(unsigned long); + } else { + len = fetch_store_strlen((unsigned long)str_val); +-- +2.40.1 + diff --git a/queue-6.4/tracing-synthetic-skip-first-entry-for-stack-traces.patch b/queue-6.4/tracing-synthetic-skip-first-entry-for-stack-traces.patch new file mode 100644 index 00000000000..e027459037f --- /dev/null +++ b/queue-6.4/tracing-synthetic-skip-first-entry-for-stack-traces.patch @@ -0,0 +1,93 @@ +From 393bd346d98c07c4fa2a93d707ae8fa51979942b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 16 Aug 2023 17:49:27 +0200 +Subject: tracing/synthetic: Skip first entry for stack traces + +From: Sven Schnelle + +[ Upstream commit 887f92e09ef34a949745ad26ce82be69e2dabcf6 ] + +While debugging another issue I noticed that the stack trace output +contains the number of entries on top: + + -0 [000] d..4. 203.322502: wake_lat: pid=0 delta=2268270616 stack=STACK: +=> 0x10 +=> __schedule+0xac6/0x1a98 +=> schedule+0x126/0x2c0 +=> schedule_timeout+0x242/0x2c0 +=> __wait_for_common+0x434/0x680 +=> __wait_rcu_gp+0x198/0x3e0 +=> synchronize_rcu+0x112/0x138 +=> ring_buffer_reset_online_cpus+0x140/0x2e0 +=> tracing_reset_online_cpus+0x15c/0x1d0 +=> tracing_set_clock+0x180/0x1d8 +=> hist_register_trigger+0x486/0x670 +=> event_hist_trigger_parse+0x494/0x1318 +=> trigger_process_regex+0x1d4/0x258 +=> event_trigger_write+0xb4/0x170 +=> vfs_write+0x210/0xad0 +=> ksys_write+0x122/0x208 + +Fix this by skipping the first element. Also replace the pointer +logic with an index variable which is easier to read. + +Link: https://lkml.kernel.org/r/20230816154928.4171614-3-svens@linux.ibm.com + +Cc: Masami Hiramatsu +Fixes: 00cf3d672a9d ("tracing: Allow synthetic events to pass around stacktraces") +Signed-off-by: Sven Schnelle +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/trace_events_synth.c | 17 ++++------------- + 1 file changed, 4 insertions(+), 13 deletions(-) + +diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c +index da0627fa91caf..a4c58a932dfa6 100644 +--- a/kernel/trace/trace_events_synth.c ++++ b/kernel/trace/trace_events_synth.c +@@ -350,7 +350,7 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, + struct trace_seq *s = &iter->seq; + struct synth_trace_event *entry; + struct synth_event *se; +- unsigned int i, n_u64; ++ unsigned int i, j, n_u64; + char print_fmt[32]; + const char *fmt; + +@@ -389,18 +389,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, + n_u64 += STR_VAR_LEN_MAX / sizeof(u64); + } + } else if (se->fields[i]->is_stack) { +- unsigned long *p, *end; + union trace_synth_field *data = &entry->fields[n_u64]; +- +- p = (void *)entry + data->as_dynamic.offset; +- end = (void *)p + data->as_dynamic.len - (sizeof(long) - 1); ++ unsigned long *p = (void *)entry + data->as_dynamic.offset; + + trace_seq_printf(s, "%s=STACK:\n", se->fields[i]->name); +- +- for (; *p && p < end; p++) +- trace_seq_printf(s, "=> %pS\n", (void *)*p); ++ for (j = 1; j < data->as_dynamic.len / sizeof(long); j++) ++ trace_seq_printf(s, "=> %pS\n", (void *)p[j]); + n_u64++; +- + } else { + struct trace_print_flags __flags[] = { + __def_gfpflag_names, {-1, NULL} }; +@@ -490,10 +485,6 @@ static unsigned int trace_stack(struct synth_trace_event *entry, + break; + } + +- /* Include the zero'd element if it fits */ +- if (len < HIST_STACKTRACE_DEPTH) +- len++; +- + len *= sizeof(long); + + /* Find the dynamic section to copy the stack into. */ +-- +2.40.1 + diff --git a/queue-6.4/tracing-synthetic-use-union-instead-of-casts.patch b/queue-6.4/tracing-synthetic-use-union-instead-of-casts.patch new file mode 100644 index 00000000000..4732337a611 --- /dev/null +++ b/queue-6.4/tracing-synthetic-use-union-instead-of-casts.patch @@ -0,0 +1,311 @@ +From 1001ec56a3fdea62f5fdc76e9ae2a8ec2bc1483e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 16 Aug 2023 17:49:26 +0200 +Subject: tracing/synthetic: Use union instead of casts + +From: Sven Schnelle + +[ Upstream commit ddeea494a16f32522bce16ee65f191d05d4b8282 ] + +The current code uses a lot of casts to access the fields member in struct +synth_trace_events with different sizes. This makes the code hard to +read, and had already introduced an endianness bug. Use a union and struct +instead. + +Link: https://lkml.kernel.org/r/20230816154928.4171614-2-svens@linux.ibm.com + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu +Fixes: 00cf3d672a9dd ("tracing: Allow synthetic events to pass around stacktraces") +Signed-off-by: Sven Schnelle +Signed-off-by: Steven Rostedt (Google) +Stable-dep-of: 887f92e09ef3 ("tracing/synthetic: Skip first entry for stack traces") +Signed-off-by: Sasha Levin +--- + include/linux/trace_events.h | 11 ++++ + kernel/trace/trace.h | 8 +++ + kernel/trace/trace_events_synth.c | 87 +++++++++++++------------------ + 3 files changed, 56 insertions(+), 50 deletions(-) + +diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h +index 7c4a0b72334eb..c55fc453e33b5 100644 +--- a/include/linux/trace_events.h ++++ b/include/linux/trace_events.h +@@ -59,6 +59,17 @@ int trace_raw_output_prep(struct trace_iterator *iter, + extern __printf(2, 3) + void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...); + ++/* Used to find the offset and length of dynamic fields in trace events */ ++struct trace_dynamic_info { ++#ifdef CONFIG_CPU_BIG_ENDIAN ++ u16 offset; ++ u16 len; ++#else ++ u16 len; ++ u16 offset; ++#endif ++}; ++ + /* + * The trace entry - the most basic unit of tracing. This is what + * is printed in the end as a single line in the trace output, such as: +diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h +index eee1f3ca47494..2daeac8e690a6 100644 +--- a/kernel/trace/trace.h ++++ b/kernel/trace/trace.h +@@ -1282,6 +1282,14 @@ static inline void trace_branch_disable(void) + /* set ring buffers to default size if not already done so */ + int tracing_update_buffers(void); + ++union trace_synth_field { ++ u8 as_u8; ++ u16 as_u16; ++ u32 as_u32; ++ u64 as_u64; ++ struct trace_dynamic_info as_dynamic; ++}; ++ + struct ftrace_event_field { + struct list_head link; + const char *name; +diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c +index d6a70aff24101..da0627fa91caf 100644 +--- a/kernel/trace/trace_events_synth.c ++++ b/kernel/trace/trace_events_synth.c +@@ -127,7 +127,7 @@ static bool synth_event_match(const char *system, const char *event, + + struct synth_trace_event { + struct trace_entry ent; +- u64 fields[]; ++ union trace_synth_field fields[]; + }; + + static int synth_event_define_fields(struct trace_event_call *call) +@@ -321,19 +321,19 @@ static const char *synth_field_fmt(char *type) + + static void print_synth_event_num_val(struct trace_seq *s, + char *print_fmt, char *name, +- int size, u64 val, char *space) ++ int size, union trace_synth_field *val, char *space) + { + switch (size) { + case 1: +- trace_seq_printf(s, print_fmt, name, (u8)val, space); ++ trace_seq_printf(s, print_fmt, name, val->as_u8, space); + break; + + case 2: +- trace_seq_printf(s, print_fmt, name, (u16)val, space); ++ trace_seq_printf(s, print_fmt, name, val->as_u16, space); + break; + + case 4: +- trace_seq_printf(s, print_fmt, name, (u32)val, space); ++ trace_seq_printf(s, print_fmt, name, val->as_u32, space); + break; + + default: +@@ -374,36 +374,26 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, + /* parameter values */ + if (se->fields[i]->is_string) { + if (se->fields[i]->is_dynamic) { +- u32 offset, data_offset; +- char *str_field; +- +- offset = (u32)entry->fields[n_u64]; +- data_offset = offset & 0xffff; +- +- str_field = (char *)entry + data_offset; ++ union trace_synth_field *data = &entry->fields[n_u64]; + + trace_seq_printf(s, print_fmt, se->fields[i]->name, + STR_VAR_LEN_MAX, +- str_field, ++ (char *)entry + data->as_dynamic.offset, + i == se->n_fields - 1 ? "" : " "); + n_u64++; + } else { + trace_seq_printf(s, print_fmt, se->fields[i]->name, + STR_VAR_LEN_MAX, +- (char *)&entry->fields[n_u64], ++ (char *)&entry->fields[n_u64].as_u64, + i == se->n_fields - 1 ? "" : " "); + n_u64 += STR_VAR_LEN_MAX / sizeof(u64); + } + } else if (se->fields[i]->is_stack) { +- u32 offset, data_offset, len; + unsigned long *p, *end; ++ union trace_synth_field *data = &entry->fields[n_u64]; + +- offset = (u32)entry->fields[n_u64]; +- data_offset = offset & 0xffff; +- len = offset >> 16; +- +- p = (void *)entry + data_offset; +- end = (void *)p + len - (sizeof(long) - 1); ++ p = (void *)entry + data->as_dynamic.offset; ++ end = (void *)p + data->as_dynamic.len - (sizeof(long) - 1); + + trace_seq_printf(s, "%s=STACK:\n", se->fields[i]->name); + +@@ -419,13 +409,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, + print_synth_event_num_val(s, print_fmt, + se->fields[i]->name, + se->fields[i]->size, +- entry->fields[n_u64], ++ &entry->fields[n_u64], + space); + + if (strcmp(se->fields[i]->type, "gfp_t") == 0) { + trace_seq_puts(s, " ("); + trace_print_flags_seq(s, "|", +- entry->fields[n_u64], ++ entry->fields[n_u64].as_u64, + __flags); + trace_seq_putc(s, ')'); + } +@@ -454,21 +444,16 @@ static unsigned int trace_string(struct synth_trace_event *entry, + int ret; + + if (is_dynamic) { +- u32 data_offset; ++ union trace_synth_field *data = &entry->fields[*n_u64]; + +- data_offset = struct_size(entry, fields, event->n_u64); +- data_offset += data_size; +- +- len = fetch_store_strlen((unsigned long)str_val); +- +- data_offset |= len << 16; +- *(u32 *)&entry->fields[*n_u64] = data_offset; ++ data->as_dynamic.offset = struct_size(entry, fields, event->n_u64) + data_size; ++ data->as_dynamic.len = fetch_store_strlen((unsigned long)str_val); + + ret = fetch_store_string((unsigned long)str_val, &entry->fields[*n_u64], entry); + + (*n_u64)++; + } else { +- str_field = (char *)&entry->fields[*n_u64]; ++ str_field = (char *)&entry->fields[*n_u64].as_u64; + + #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE + if ((unsigned long)str_val < TASK_SIZE) +@@ -492,6 +477,7 @@ static unsigned int trace_stack(struct synth_trace_event *entry, + unsigned int data_size, + unsigned int *n_u64) + { ++ union trace_synth_field *data = &entry->fields[*n_u64]; + unsigned int len; + u32 data_offset; + void *data_loc; +@@ -515,8 +501,9 @@ static unsigned int trace_stack(struct synth_trace_event *entry, + memcpy(data_loc, stack, len); + + /* Fill in the field that holds the offset/len combo */ +- data_offset |= len << 16; +- *(u32 *)&entry->fields[*n_u64] = data_offset; ++ ++ data->as_dynamic.offset = data_offset; ++ data->as_dynamic.len = len; + + (*n_u64)++; + +@@ -592,19 +579,19 @@ static notrace void trace_event_raw_event_synth(void *__data, + + switch (field->size) { + case 1: +- *(u8 *)&entry->fields[n_u64] = (u8)val; ++ entry->fields[n_u64].as_u8 = (u8)val; + break; + + case 2: +- *(u16 *)&entry->fields[n_u64] = (u16)val; ++ entry->fields[n_u64].as_u16 = (u16)val; + break; + + case 4: +- *(u32 *)&entry->fields[n_u64] = (u32)val; ++ entry->fields[n_u64].as_u32 = (u32)val; + break; + + default: +- entry->fields[n_u64] = val; ++ entry->fields[n_u64].as_u64 = val; + break; + } + n_u64++; +@@ -1790,19 +1777,19 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...) + + switch (field->size) { + case 1: +- *(u8 *)&state.entry->fields[n_u64] = (u8)val; ++ state.entry->fields[n_u64].as_u8 = (u8)val; + break; + + case 2: +- *(u16 *)&state.entry->fields[n_u64] = (u16)val; ++ state.entry->fields[n_u64].as_u16 = (u16)val; + break; + + case 4: +- *(u32 *)&state.entry->fields[n_u64] = (u32)val; ++ state.entry->fields[n_u64].as_u32 = (u32)val; + break; + + default: +- state.entry->fields[n_u64] = val; ++ state.entry->fields[n_u64].as_u64 = val; + break; + } + n_u64++; +@@ -1883,19 +1870,19 @@ int synth_event_trace_array(struct trace_event_file *file, u64 *vals, + + switch (field->size) { + case 1: +- *(u8 *)&state.entry->fields[n_u64] = (u8)val; ++ state.entry->fields[n_u64].as_u8 = (u8)val; + break; + + case 2: +- *(u16 *)&state.entry->fields[n_u64] = (u16)val; ++ state.entry->fields[n_u64].as_u16 = (u16)val; + break; + + case 4: +- *(u32 *)&state.entry->fields[n_u64] = (u32)val; ++ state.entry->fields[n_u64].as_u32 = (u32)val; + break; + + default: +- state.entry->fields[n_u64] = val; ++ state.entry->fields[n_u64].as_u64 = val; + break; + } + n_u64++; +@@ -2030,19 +2017,19 @@ static int __synth_event_add_val(const char *field_name, u64 val, + } else { + switch (field->size) { + case 1: +- *(u8 *)&trace_state->entry->fields[field->offset] = (u8)val; ++ trace_state->entry->fields[field->offset].as_u8 = (u8)val; + break; + + case 2: +- *(u16 *)&trace_state->entry->fields[field->offset] = (u16)val; ++ trace_state->entry->fields[field->offset].as_u16 = (u16)val; + break; + + case 4: +- *(u32 *)&trace_state->entry->fields[field->offset] = (u32)val; ++ trace_state->entry->fields[field->offset].as_u32 = (u32)val; + break; + + default: +- trace_state->entry->fields[field->offset] = val; ++ trace_state->entry->fields[field->offset].as_u64 = val; + break; + } + } +-- +2.40.1 + diff --git a/queue-6.4/wifi-iwlwifi-mvm-add-dependency-for-ptp-clock.patch b/queue-6.4/wifi-iwlwifi-mvm-add-dependency-for-ptp-clock.patch new file mode 100644 index 00000000000..f540efac128 --- /dev/null +++ b/queue-6.4/wifi-iwlwifi-mvm-add-dependency-for-ptp-clock.patch @@ -0,0 +1,65 @@ +From dcdc49b0dcb7735552d94005b222e4ed049a5807 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 11 Aug 2023 22:29:47 -0700 +Subject: wifi: iwlwifi: mvm: add dependency for PTP clock + +From: Randy Dunlap + +[ Upstream commit 609a1bcd7bebac90a1b443e9fed47fd48dac5799 ] + +When the code to use the PTP HW clock was added, it didn't update +the Kconfig entry for the PTP dependency, leading to build errors, +so update the Kconfig entry to depend on PTP_1588_CLOCK_OPTIONAL. + +aarch64-linux-ld: drivers/net/wireless/intel/iwlwifi/mvm/ptp.o: in function `iwl_mvm_ptp_init': +drivers/net/wireless/intel/iwlwifi/mvm/ptp.c:294: undefined reference to `ptp_clock_register' +drivers/net/wireless/intel/iwlwifi/mvm/ptp.c:294:(.text+0xce8): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `ptp_clock_register' +aarch64-linux-ld: drivers/net/wireless/intel/iwlwifi/mvm/ptp.c:301: undefined reference to `ptp_clock_index' +drivers/net/wireless/intel/iwlwifi/mvm/ptp.c:301:(.text+0xd18): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `ptp_clock_index' +aarch64-linux-ld: drivers/net/wireless/intel/iwlwifi/mvm/ptp.o: in function `iwl_mvm_ptp_remove': +drivers/net/wireless/intel/iwlwifi/mvm/ptp.c:315: undefined reference to `ptp_clock_index' +drivers/net/wireless/intel/iwlwifi/mvm/ptp.c:315:(.text+0xe80): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `ptp_clock_index' +aarch64-linux-ld: drivers/net/wireless/intel/iwlwifi/mvm/ptp.c:319: undefined reference to `ptp_clock_unregister' +drivers/net/wireless/intel/iwlwifi/mvm/ptp.c:319:(.text+0xeac): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `ptp_clock_unregister' + +Fixes: 1595ecce1cf3 ("wifi: iwlwifi: mvm: add support for PTP HW clock (PHC)") +Signed-off-by: Randy Dunlap +Reported-by: kernel test robot +Link: https://lore.kernel.org/all/202308110447.4QSJHmFH-lkp@intel.com/ +Cc: Krishnanand Prabhu +Cc: Luca Coelho +Cc: Gregory Greenman +Cc: Johannes Berg +Cc: Kalle Valo +Cc: linux-wireless@vger.kernel.org +Cc: "David S. Miller" +Cc: Eric Dumazet +Cc: Jakub Kicinski +Cc: Paolo Abeni +Cc: netdev@vger.kernel.org +Reviewed-by: Simon Horman +Tested-by: Simon Horman # build-tested +Acked-by: Richard Cochran +Acked-by: Gregory Greenman +Link: https://lore.kernel.org/r/20230812052947.22913-1-rdunlap@infradead.org +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/intel/iwlwifi/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig +index b20409f8c13ab..20971304fdef4 100644 +--- a/drivers/net/wireless/intel/iwlwifi/Kconfig ++++ b/drivers/net/wireless/intel/iwlwifi/Kconfig +@@ -66,6 +66,7 @@ config IWLMVM + tristate "Intel Wireless WiFi MVM Firmware support" + select WANT_DEV_COREDUMP + depends on MAC80211 ++ depends on PTP_1588_CLOCK_OPTIONAL + help + This is the driver that supports the MVM firmware. The list + of the devices that use this firmware is available here: +-- +2.40.1 + diff --git a/queue-6.4/xprtrdma-remap-receive-buffers-after-a-reconnect.patch b/queue-6.4/xprtrdma-remap-receive-buffers-after-a-reconnect.patch new file mode 100644 index 00000000000..c20b69b8bb8 --- /dev/null +++ b/queue-6.4/xprtrdma-remap-receive-buffers-after-a-reconnect.patch @@ -0,0 +1,60 @@ +From f007e054857758fe5aa20f9120e76ffb1236caab Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 3 Jul 2023 14:18:29 -0400 +Subject: xprtrdma: Remap Receive buffers after a reconnect + +From: Chuck Lever + +[ Upstream commit 895cedc1791916e8a98864f12b656702fad0bb67 ] + +On server-initiated disconnect, rpcrdma_xprt_disconnect() was DMA- +unmapping the Receive buffers, but rpcrdma_post_recvs() neglected +to remap them after a new connection had been established. The +result was immediate failure of the new connection with the Receives +flushing with LOCAL_PROT_ERR. + +Fixes: 671c450b6fe0 ("xprtrdma: Fix oops in Receive handler after device removal") +Signed-off-by: Chuck Lever +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + net/sunrpc/xprtrdma/verbs.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c +index b098fde373abf..28c0771c4e8c3 100644 +--- a/net/sunrpc/xprtrdma/verbs.c ++++ b/net/sunrpc/xprtrdma/verbs.c +@@ -935,9 +935,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, + if (!rep->rr_rdmabuf) + goto out_free; + +- if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) +- goto out_free_regbuf; +- + rep->rr_cid.ci_completion_id = + atomic_inc_return(&r_xprt->rx_ep->re_completion_ids); + +@@ -956,8 +953,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, + spin_unlock(&buf->rb_lock); + return rep; + +-out_free_regbuf: +- rpcrdma_regbuf_free(rep->rr_rdmabuf); + out_free: + kfree(rep); + out: +@@ -1363,6 +1358,10 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp) + rep = rpcrdma_rep_create(r_xprt, temp); + if (!rep) + break; ++ if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) { ++ rpcrdma_rep_put(buf, rep); ++ break; ++ } + + rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id; + trace_xprtrdma_post_recv(rep); +-- +2.40.1 +