From f8a1606269a4988d52c0a8a7b95350d713b0999f Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 5 Aug 2023 16:50:06 -0400 Subject: [PATCH] Fixes for 4.19 Signed-off-by: Sasha Levin --- ...kb_under_panic-in-ip6mr_cache_report.patch | 77 ++++++++++ .../kvm-s390-fix-sthyi-error-handling.patch | 78 ++++++++++ ...-data-race-annotation-for-sk_ll_usec.patch | 36 +++++ ...data-race-annotations-around-sk-sk_p.patch | 63 ++++++++ ...turn-value-check-in-mlx5e_ipsec_remo.patch | 39 +++++ ...ute-no-longer-copy-tcf_result-on-upd.patch | 50 ++++++ ...cls_u32-fix-match-key-mis-addressing.patch | 145 ++++++++++++++++++ ...2-no-longer-copy-tcf_result-on-updat.patch | 50 ++++++ ..._from_different_cu-skip-if-there-is-.patch | 66 ++++++++ queue-4.19/series | 16 ++ ...otate-data-races-around-tm-tcpm_lock.patch | 51 ++++++ ...notate-data-races-around-tm-tcpm_net.patch | 66 ++++++++ ...tate-data-races-around-tm-tcpm_stamp.patch | 88 +++++++++++ ...otate-data-races-around-tm-tcpm_vals.patch | 85 ++++++++++ .../tcp_metrics-fix-addr_same-helper.patch | 46 ++++++ ...data-race-in-tcpm_suck_dst-vs-fastop.patch | 85 ++++++++++ ...se-the-same-return-type-for-has_zero.patch | 74 +++++++++ 17 files changed, 1115 insertions(+) create mode 100644 queue-4.19/ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch create mode 100644 queue-4.19/kvm-s390-fix-sthyi-error-handling.patch create mode 100644 queue-4.19/net-add-missing-data-race-annotation-for-sk_ll_usec.patch create mode 100644 queue-4.19/net-add-missing-data-race-annotations-around-sk-sk_p.patch create mode 100644 queue-4.19/net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch create mode 100644 queue-4.19/net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch create mode 100644 queue-4.19/net-sched-cls_u32-fix-match-key-mis-addressing.patch create mode 100644 queue-4.19/net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch create mode 100644 queue-4.19/perf-test-uprobe_from_different_cu-skip-if-there-is-.patch create mode 100644 queue-4.19/tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch create mode 100644 queue-4.19/tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch create mode 100644 queue-4.19/tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch create mode 100644 queue-4.19/tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch create mode 100644 queue-4.19/tcp_metrics-fix-addr_same-helper.patch create mode 100644 queue-4.19/tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch create mode 100644 queue-4.19/word-at-a-time-use-the-same-return-type-for-has_zero.patch diff --git a/queue-4.19/ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch b/queue-4.19/ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch new file mode 100644 index 00000000000..20ea00e799c --- /dev/null +++ b/queue-4.19/ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch @@ -0,0 +1,77 @@ +From 186df4fef1d605d8935ae97973755358df40ce6b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Aug 2023 14:43:18 +0800 +Subject: ip6mr: Fix skb_under_panic in ip6mr_cache_report() + +From: Yue Haibing + +[ Upstream commit 30e0191b16e8a58e4620fa3e2839ddc7b9d4281c ] + +skbuff: skb_under_panic: text:ffffffff88771f69 len:56 put:-4 + head:ffff88805f86a800 data:ffff887f5f86a850 tail:0x88 end:0x2c0 dev:pim6reg + ------------[ cut here ]------------ + kernel BUG at net/core/skbuff.c:192! + invalid opcode: 0000 [#1] PREEMPT SMP KASAN + CPU: 2 PID: 22968 Comm: kworker/2:11 Not tainted 6.5.0-rc3-00044-g0a8db05b571a #236 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 + Workqueue: ipv6_addrconf addrconf_dad_work + RIP: 0010:skb_panic+0x152/0x1d0 + Call Trace: + + skb_push+0xc4/0xe0 + ip6mr_cache_report+0xd69/0x19b0 + reg_vif_xmit+0x406/0x690 + dev_hard_start_xmit+0x17e/0x6e0 + __dev_queue_xmit+0x2d6a/0x3d20 + vlan_dev_hard_start_xmit+0x3ab/0x5c0 + dev_hard_start_xmit+0x17e/0x6e0 + __dev_queue_xmit+0x2d6a/0x3d20 + neigh_connected_output+0x3ed/0x570 + ip6_finish_output2+0x5b5/0x1950 + ip6_finish_output+0x693/0x11c0 + ip6_output+0x24b/0x880 + NF_HOOK.constprop.0+0xfd/0x530 + ndisc_send_skb+0x9db/0x1400 + ndisc_send_rs+0x12a/0x6c0 + addrconf_dad_completed+0x3c9/0xea0 + addrconf_dad_work+0x849/0x1420 + process_one_work+0xa22/0x16e0 + worker_thread+0x679/0x10c0 + ret_from_fork+0x28/0x60 + ret_from_fork_asm+0x11/0x20 + +When setup a vlan device on dev pim6reg, DAD ns packet may sent on reg_vif_xmit(). +reg_vif_xmit() + ip6mr_cache_report() + skb_push(skb, -skb_network_offset(pkt));//skb_network_offset(pkt) is 4 +And skb_push declared as: + void *skb_push(struct sk_buff *skb, unsigned int len); + skb->data -= len; + //0xffff88805f86a84c - 0xfffffffc = 0xffff887f5f86a850 +skb->data is set to 0xffff887f5f86a850, which is invalid mem addr, lead to skb_push() fails. + +Fixes: 14fb64e1f449 ("[IPV6] MROUTE: Support PIM-SM (SSM).") +Signed-off-by: Yue Haibing +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/ip6mr.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c +index e9eb917342b85..329bad6cbb768 100644 +--- a/net/ipv6/ip6mr.c ++++ b/net/ipv6/ip6mr.c +@@ -1064,7 +1064,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, + And all this only to mangle msg->im6_msgtype and + to set msg->im6_mbz to "mbz" :-) + */ +- skb_push(skb, -skb_network_offset(pkt)); ++ __skb_pull(skb, skb_network_offset(pkt)); + + skb_push(skb, sizeof(*msg)); + skb_reset_transport_header(skb); +-- +2.40.1 + diff --git a/queue-4.19/kvm-s390-fix-sthyi-error-handling.patch b/queue-4.19/kvm-s390-fix-sthyi-error-handling.patch new file mode 100644 index 00000000000..7319e148f76 --- /dev/null +++ b/queue-4.19/kvm-s390-fix-sthyi-error-handling.patch @@ -0,0 +1,78 @@ +From 1da2e338737e83c3069b097f2674c18a36aed0f7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 27 Jul 2023 20:29:39 +0200 +Subject: KVM: s390: fix sthyi error handling + +From: Heiko Carstens + +[ Upstream commit 0c02cc576eac161601927b41634f80bfd55bfa9e ] + +Commit 9fb6c9b3fea1 ("s390/sthyi: add cache to store hypervisor info") +added cache handling for store hypervisor info. This also changed the +possible return code for sthyi_fill(). + +Instead of only returning a condition code like the sthyi instruction would +do, it can now also return a negative error value (-ENOMEM). handle_styhi() +was not changed accordingly. In case of an error, the negative error value +would incorrectly injected into the guest PSW. + +Add proper error handling to prevent this, and update the comment which +describes the possible return values of sthyi_fill(). + +Fixes: 9fb6c9b3fea1 ("s390/sthyi: add cache to store hypervisor info") +Reviewed-by: Christian Borntraeger +Link: https://lore.kernel.org/r/20230727182939.2050744-1-hca@linux.ibm.com +Signed-off-by: Heiko Carstens +Signed-off-by: Sasha Levin +--- + arch/s390/kernel/sthyi.c | 6 +++--- + arch/s390/kvm/intercept.c | 9 ++++++--- + 2 files changed, 9 insertions(+), 6 deletions(-) + +diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c +index 888cc2f166db7..ce6084e28d904 100644 +--- a/arch/s390/kernel/sthyi.c ++++ b/arch/s390/kernel/sthyi.c +@@ -460,9 +460,9 @@ static int sthyi_update_cache(u64 *rc) + * + * Fills the destination with system information returned by the STHYI + * instruction. The data is generated by emulation or execution of STHYI, +- * if available. The return value is the condition code that would be +- * returned, the rc parameter is the return code which is passed in +- * register R2 + 1. ++ * if available. The return value is either a negative error value or ++ * the condition code that would be returned, the rc parameter is the ++ * return code which is passed in register R2 + 1. + */ + int sthyi_fill(void *dst, u64 *rc) + { +diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c +index a389fa85cca2d..5450d43d26ea5 100644 +--- a/arch/s390/kvm/intercept.c ++++ b/arch/s390/kvm/intercept.c +@@ -360,8 +360,8 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) + */ + int handle_sthyi(struct kvm_vcpu *vcpu) + { +- int reg1, reg2, r = 0; +- u64 code, addr, cc = 0, rc = 0; ++ int reg1, reg2, cc = 0, r = 0; ++ u64 code, addr, rc = 0; + struct sthyi_sctns *sctns = NULL; + + if (!test_kvm_facility(vcpu->kvm, 74)) +@@ -392,7 +392,10 @@ int handle_sthyi(struct kvm_vcpu *vcpu) + return -ENOMEM; + + cc = sthyi_fill(sctns, &rc); +- ++ if (cc < 0) { ++ free_page((unsigned long)sctns); ++ return cc; ++ } + out: + if (!cc) { + r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE); +-- +2.40.1 + diff --git a/queue-4.19/net-add-missing-data-race-annotation-for-sk_ll_usec.patch b/queue-4.19/net-add-missing-data-race-annotation-for-sk_ll_usec.patch new file mode 100644 index 00000000000..bd7c3aec0ca --- /dev/null +++ b/queue-4.19/net-add-missing-data-race-annotation-for-sk_ll_usec.patch @@ -0,0 +1,36 @@ +From 683d6085ad2e0686bac1e20a4e5511c4e9f24eb4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Jul 2023 15:03:17 +0000 +Subject: net: add missing data-race annotation for sk_ll_usec + +From: Eric Dumazet + +[ Upstream commit e5f0d2dd3c2faa671711dac6d3ff3cef307bcfe3 ] + +In a prior commit I forgot that sk_getsockopt() reads +sk->sk_ll_usec without holding a lock. + +Fixes: 0dbffbb5335a ("net: annotate data race around sk_ll_usec") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/core/sock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/core/sock.c b/net/core/sock.c +index f112862fe0682..3e6da3694a5a5 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1349,7 +1349,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, + + #ifdef CONFIG_NET_RX_BUSY_POLL + case SO_BUSY_POLL: +- v.val = sk->sk_ll_usec; ++ v.val = READ_ONCE(sk->sk_ll_usec); + break; + #endif + +-- +2.40.1 + diff --git a/queue-4.19/net-add-missing-data-race-annotations-around-sk-sk_p.patch b/queue-4.19/net-add-missing-data-race-annotations-around-sk-sk_p.patch new file mode 100644 index 00000000000..44cc680e7d3 --- /dev/null +++ b/queue-4.19/net-add-missing-data-race-annotations-around-sk-sk_p.patch @@ -0,0 +1,63 @@ +From 3d0930829c7a2e5cdf8363ff42a2013d42a93b6e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Jul 2023 15:03:16 +0000 +Subject: net: add missing data-race annotations around sk->sk_peek_off + +From: Eric Dumazet + +[ Upstream commit 11695c6e966b0ec7ed1d16777d294cef865a5c91 ] + +sk_getsockopt() runs locklessly, thus we need to annotate the read +of sk->sk_peek_off. + +While we are at it, add corresponding annotations to sk_set_peek_off() +and unix_set_peek_off(). + +Fixes: b9bb53f3836f ("sock: convert sk_peek_offset functions to WRITE_ONCE") +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/core/sock.c | 4 ++-- + net/unix/af_unix.c | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/net/core/sock.c b/net/core/sock.c +index 5b31f3446fc7a..f112862fe0682 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1319,7 +1319,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, + if (!sock->ops->set_peek_off) + return -EOPNOTSUPP; + +- v.val = sk->sk_peek_off; ++ v.val = READ_ONCE(sk->sk_peek_off); + break; + case SO_NOFCS: + v.val = sock_flag(sk, SOCK_NOFCS); +@@ -2559,7 +2559,7 @@ EXPORT_SYMBOL(__sk_mem_reclaim); + + int sk_set_peek_off(struct sock *sk, int val) + { +- sk->sk_peek_off = val; ++ WRITE_ONCE(sk->sk_peek_off, val); + return 0; + } + EXPORT_SYMBOL_GPL(sk_set_peek_off); +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index b0dcbb08e60db..8971341c4f8af 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -706,7 +706,7 @@ static int unix_set_peek_off(struct sock *sk, int val) + if (mutex_lock_interruptible(&u->iolock)) + return -EINTR; + +- sk->sk_peek_off = val; ++ WRITE_ONCE(sk->sk_peek_off, val); + mutex_unlock(&u->iolock); + + return 0; +-- +2.40.1 + diff --git a/queue-4.19/net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch b/queue-4.19/net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch new file mode 100644 index 00000000000..51ce9df70ad --- /dev/null +++ b/queue-4.19/net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch @@ -0,0 +1,39 @@ +From 38a5530235623c0a2ed81496587d472d4fd31980 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 25 Jul 2023 14:56:55 +0800 +Subject: net/mlx5e: fix return value check in mlx5e_ipsec_remove_trailer() + +From: Yuanjun Gong + +[ Upstream commit e5bcb7564d3bd0c88613c76963c5349be9c511c5 ] + +mlx5e_ipsec_remove_trailer() should return an error code if function +pskb_trim() returns an unexpected value. + +Fixes: 2ac9cfe78223 ("net/mlx5e: IPSec, Add Innova IPSec offload TX data path") +Signed-off-by: Yuanjun Gong +Reviewed-by: Leon Romanovsky +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +index 128a82b1dbfc6..ad9db70eb879e 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +@@ -121,7 +121,9 @@ static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x) + + trailer_len = alen + plen + 2; + +- pskb_trim(skb, skb->len - trailer_len); ++ ret = pskb_trim(skb, skb->len - trailer_len); ++ if (unlikely(ret)) ++ return ret; + if (skb->protocol == htons(ETH_P_IP)) { + ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len); + ip_send_check(ipv4hdr); +-- +2.40.1 + diff --git a/queue-4.19/net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch b/queue-4.19/net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch new file mode 100644 index 00000000000..7163f1b4760 --- /dev/null +++ b/queue-4.19/net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch @@ -0,0 +1,50 @@ +From 49bdbffef239881f0c87b23b1f65b1154deb4ebf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 29 Jul 2023 08:32:02 -0400 +Subject: net/sched: cls_route: No longer copy tcf_result on update to avoid + use-after-free + +From: valis + +[ Upstream commit b80b829e9e2c1b3f7aae34855e04d8f6ecaf13c8 ] + +When route4_change() is called on an existing filter, the whole +tcf_result struct is always copied into the new instance of the filter. + +This causes a problem when updating a filter bound to a class, +as tcf_unbind_filter() is always called on the old instance in the +success path, decreasing filter_cnt of the still referenced class +and allowing it to be deleted, leading to a use-after-free. + +Fix this by no longer copying the tcf_result struct from the old filter. + +Fixes: 1109c00547fc ("net: sched: RCU cls_route") +Reported-by: valis +Reported-by: Bing-Jhong Billy Jheng +Signed-off-by: valis +Signed-off-by: Jamal Hadi Salim +Reviewed-by: Victor Nogueira +Reviewed-by: Pedro Tammela +Reviewed-by: M A Ramdhan +Link: https://lore.kernel.org/r/20230729123202.72406-4-jhs@mojatatu.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/cls_route.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c +index 4c7fa1cfd8e38..a924292623ecf 100644 +--- a/net/sched/cls_route.c ++++ b/net/sched/cls_route.c +@@ -513,7 +513,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, + if (fold) { + f->id = fold->id; + f->iif = fold->iif; +- f->res = fold->res; + f->handle = fold->handle; + + f->tp = fold->tp; +-- +2.40.1 + diff --git a/queue-4.19/net-sched-cls_u32-fix-match-key-mis-addressing.patch b/queue-4.19/net-sched-cls_u32-fix-match-key-mis-addressing.patch new file mode 100644 index 00000000000..259844c28c0 --- /dev/null +++ b/queue-4.19/net-sched-cls_u32-fix-match-key-mis-addressing.patch @@ -0,0 +1,145 @@ +From 09218cb1c7c6822727684c5e1e6e867369c30b20 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 26 Jul 2023 09:51:51 -0400 +Subject: net: sched: cls_u32: Fix match key mis-addressing + +From: Jamal Hadi Salim + +[ Upstream commit e68409db995380d1badacba41ff24996bd396171 ] + +A match entry is uniquely identified with an "address" or "path" in the +form of: hashtable ID(12b):bucketid(8b):nodeid(12b). + +When creating table match entries all of hash table id, bucket id and +node (match entry id) are needed to be either specified by the user or +reasonable in-kernel defaults are used. The in-kernel default for a table id is +0x800(omnipresent root table); for bucketid it is 0x0. Prior to this fix there +was none for a nodeid i.e. the code assumed that the user passed the correct +nodeid and if the user passes a nodeid of 0 (as Mingi Cho did) then that is what +was used. But nodeid of 0 is reserved for identifying the table. This is not +a problem until we dump. The dump code notices that the nodeid is zero and +assumes it is referencing a table and therefore references table struct +tc_u_hnode instead of what was created i.e match entry struct tc_u_knode. + +Ming does an equivalent of: +tc filter add dev dummy0 parent 10: prio 1 handle 0x1000 \ +protocol ip u32 match ip src 10.0.0.1/32 classid 10:1 action ok + +Essentially specifying a table id 0, bucketid 1 and nodeid of zero +Tableid 0 is remapped to the default of 0x800. +Bucketid 1 is ignored and defaults to 0x00. +Nodeid was assumed to be what Ming passed - 0x000 + +dumping before fix shows: +~$ tc filter ls dev dummy0 parent 10: +filter protocol ip pref 1 u32 chain 0 +filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor 1 +filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor -30591 + +Note that the last line reports a table instead of a match entry +(you can tell this because it says "ht divisor..."). +As a result of reporting the wrong data type (misinterpretting of struct +tc_u_knode as being struct tc_u_hnode) the divisor is reported with value +of -30591. Ming identified this as part of the heap address +(physmap_base is 0xffff8880 (-30591 - 1)). + +The fix is to ensure that when table entry matches are added and no +nodeid is specified (i.e nodeid == 0) then we get the next available +nodeid from the table's pool. + +After the fix, this is what the dump shows: +$ tc filter ls dev dummy0 parent 10: +filter protocol ip pref 1 u32 chain 0 +filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor 1 +filter protocol ip pref 1 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 10:1 not_in_hw + match 0a000001/ffffffff at 12 + action order 1: gact action pass + random type none pass val 0 + index 1 ref 1 bind 1 + +Reported-by: Mingi Cho +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Jamal Hadi Salim +Link: https://lore.kernel.org/r/20230726135151.416917-1-jhs@mojatatu.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/cls_u32.c | 56 ++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 50 insertions(+), 6 deletions(-) + +diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c +index ee8ef606a8e99..5e052c7342b9f 100644 +--- a/net/sched/cls_u32.c ++++ b/net/sched/cls_u32.c +@@ -1067,18 +1067,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, + return -EINVAL; + } + ++ /* At this point, we need to derive the new handle that will be used to ++ * uniquely map the identity of this table match entry. The ++ * identity of the entry that we need to construct is 32 bits made of: ++ * htid(12b):bucketid(8b):node/entryid(12b) ++ * ++ * At this point _we have the table(ht)_ in which we will insert this ++ * entry. We carry the table's id in variable "htid". ++ * Note that earlier code picked the ht selection either by a) the user ++ * providing the htid specified via TCA_U32_HASH attribute or b) when ++ * no such attribute is passed then the root ht, is default to at ID ++ * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0. ++ * If OTOH the user passed us the htid, they may also pass a bucketid of ++ * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is ++ * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be ++ * passed via the htid, so even if it was non-zero it will be ignored. ++ * ++ * We may also have a handle, if the user passed one. The handle also ++ * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b). ++ * Rule: the bucketid on the handle is ignored even if one was passed; ++ * rather the value on "htid" is always assumed to be the bucketid. ++ */ + if (handle) { ++ /* Rule: The htid from handle and tableid from htid must match */ + if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) { + NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch"); + return -EINVAL; + } +- handle = htid | TC_U32_NODE(handle); +- err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle, +- GFP_KERNEL); +- if (err) +- return err; +- } else ++ /* Ok, so far we have a valid htid(12b):bucketid(8b) but we ++ * need to finalize the table entry identification with the last ++ * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for ++ * entries. Rule: nodeid of 0 is reserved only for tables(see ++ * earlier code which processes TC_U32_DIVISOR attribute). ++ * Rule: The nodeid can only be derived from the handle (and not ++ * htid). ++ * Rule: if the handle specified zero for the node id example ++ * 0x60000000, then pick a new nodeid from the pool of IDs ++ * this hash table has been allocating from. ++ * If OTOH it is specified (i.e for example the user passed a ++ * handle such as 0x60000123), then we use it generate our final ++ * handle which is used to uniquely identify the match entry. ++ */ ++ if (!TC_U32_NODE(handle)) { ++ handle = gen_new_kid(ht, htid); ++ } else { ++ handle = htid | TC_U32_NODE(handle); ++ err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, ++ handle, GFP_KERNEL); ++ if (err) ++ return err; ++ } ++ } else { ++ /* The user did not give us a handle; lets just generate one ++ * from the table's pool of nodeids. ++ */ + handle = gen_new_kid(ht, htid); ++ } + + if (tb[TCA_U32_SEL] == NULL) { + NL_SET_ERR_MSG_MOD(extack, "Selector not specified"); +-- +2.40.1 + diff --git a/queue-4.19/net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch b/queue-4.19/net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch new file mode 100644 index 00000000000..aa756054d1f --- /dev/null +++ b/queue-4.19/net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch @@ -0,0 +1,50 @@ +From 7a2a294806bcdf3e8b03df9ffbd7d6ab26b46cd9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 29 Jul 2023 08:32:00 -0400 +Subject: net/sched: cls_u32: No longer copy tcf_result on update to avoid + use-after-free + +From: valis + +[ Upstream commit 3044b16e7c6fe5d24b1cdbcf1bd0a9d92d1ebd81 ] + +When u32_change() is called on an existing filter, the whole +tcf_result struct is always copied into the new instance of the filter. + +This causes a problem when updating a filter bound to a class, +as tcf_unbind_filter() is always called on the old instance in the +success path, decreasing filter_cnt of the still referenced class +and allowing it to be deleted, leading to a use-after-free. + +Fix this by no longer copying the tcf_result struct from the old filter. + +Fixes: de5df63228fc ("net: sched: cls_u32 changes to knode must appear atomic to readers") +Reported-by: valis +Reported-by: M A Ramdhan +Signed-off-by: valis +Signed-off-by: Jamal Hadi Salim +Reviewed-by: Victor Nogueira +Reviewed-by: Pedro Tammela +Reviewed-by: M A Ramdhan +Link: https://lore.kernel.org/r/20230729123202.72406-2-jhs@mojatatu.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/sched/cls_u32.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c +index 5e052c7342b9f..1e71ff093c91d 100644 +--- a/net/sched/cls_u32.c ++++ b/net/sched/cls_u32.c +@@ -879,7 +879,6 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, + new->ifindex = n->ifindex; + #endif + new->fshift = n->fshift; +- new->res = n->res; + new->flags = n->flags; + RCU_INIT_POINTER(new->ht_down, ht); + +-- +2.40.1 + diff --git a/queue-4.19/perf-test-uprobe_from_different_cu-skip-if-there-is-.patch b/queue-4.19/perf-test-uprobe_from_different_cu-skip-if-there-is-.patch new file mode 100644 index 00000000000..d1a6ea76f2f --- /dev/null +++ b/queue-4.19/perf-test-uprobe_from_different_cu-skip-if-there-is-.patch @@ -0,0 +1,66 @@ +From ef41b2fb7fce5dbd1e2bac03d833c2855f9f82c0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 28 Jul 2023 17:18:12 +0200 +Subject: perf test uprobe_from_different_cu: Skip if there is no gcc +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Georg Müller + +[ Upstream commit 98ce8e4a9dcfb448b30a2d7a16190f4a00382377 ] + +Without gcc, the test will fail. + +On cleanup, ignore probe removal errors. Otherwise, in case of an error +adding the probe, the temporary directory is not removed. + +Fixes: 56cbeacf14353057 ("perf probe: Add test for regression introduced by switch to die_get_decl_file()") +Signed-off-by: Georg Müller +Acked-by: Ian Rogers +Cc: Adrian Hunter +Cc: Alexander Shishkin +Cc: Georg Müller +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Masami Hiramatsu +Cc: Namhyung Kim +Cc: Peter Zijlstra +Link: https://lore.kernel.org/r/20230728151812.454806-2-georgmueller@gmx.net +Link: https://lore.kernel.org/r/CAP-5=fUP6UuLgRty3t2=fQsQi3k4hDMz415vWdp1x88QMvZ8ug@mail.gmail.com/ +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/tests/shell/test_uprobe_from_different_cu.sh | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh +index 00d2e0e2e0c28..319f36ebb9a40 100644 +--- a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh ++++ b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh +@@ -4,6 +4,12 @@ + + set -e + ++# skip if there's no gcc ++if ! [ -x "$(command -v gcc)" ]; then ++ echo "failed: no gcc compiler" ++ exit 2 ++fi ++ + temp_dir=$(mktemp -d /tmp/perf-uprobe-different-cu-sh.XXXXXXXXXX) + + cleanup() +@@ -11,7 +17,7 @@ cleanup() + trap - EXIT TERM INT + if [[ "${temp_dir}" =~ ^/tmp/perf-uprobe-different-cu-sh.*$ ]]; then + echo "--- Cleaning up ---" +- perf probe -x ${temp_dir}/testfile -d foo ++ perf probe -x ${temp_dir}/testfile -d foo || true + rm -f "${temp_dir}/"* + rmdir "${temp_dir}" + fi +-- +2.40.1 + diff --git a/queue-4.19/series b/queue-4.19/series index fa8dfad966f..7fc6dcc2841 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -286,3 +286,19 @@ asoc-cs42l51-fix-driver-to-properly-autoload-with-automatic-module-loading.patch net-sched-cls_u32-fix-reference-counter-leak-leading-to-overflow.patch perf-fix-function-pointer-case.patch loop-select-i-o-scheduler-none-from-inside-add_disk.patch +word-at-a-time-use-the-same-return-type-for-has_zero.patch +kvm-s390-fix-sthyi-error-handling.patch +net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch +perf-test-uprobe_from_different_cu-skip-if-there-is-.patch +net-sched-cls_u32-fix-match-key-mis-addressing.patch +net-add-missing-data-race-annotations-around-sk-sk_p.patch +net-add-missing-data-race-annotation-for-sk_ll_usec.patch +net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch +net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch +ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch +tcp_metrics-fix-addr_same-helper.patch +tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch +tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch +tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch +tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch +tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch diff --git a/queue-4.19/tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch b/queue-4.19/tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch new file mode 100644 index 00000000000..eb1f7020657 --- /dev/null +++ b/queue-4.19/tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch @@ -0,0 +1,51 @@ +From 986a40eb82bd433243eda23bebf59fe8052c5cd5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Aug 2023 13:14:57 +0000 +Subject: tcp_metrics: annotate data-races around tm->tcpm_lock + +From: Eric Dumazet + +[ Upstream commit 285ce119a3c6c4502585936650143e54c8692788 ] + +tm->tcpm_lock can be read or written locklessly. + +Add needed READ_ONCE()/WRITE_ONCE() to document this. + +Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.") +Signed-off-by: Eric Dumazet +Reviewed-by: David Ahern +Reviewed-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230802131500.1478140-4-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_metrics.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c +index 2d9d95559f5fa..2529b1e6ded0c 100644 +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -59,7 +59,8 @@ static inline struct net *tm_net(struct tcp_metrics_block *tm) + static bool tcp_metric_locked(struct tcp_metrics_block *tm, + enum tcp_metric_index idx) + { +- return tm->tcpm_lock & (1 << idx); ++ /* Paired with WRITE_ONCE() in tcpm_suck_dst() */ ++ return READ_ONCE(tm->tcpm_lock) & (1 << idx); + } + + static u32 tcp_metric_get(struct tcp_metrics_block *tm, +@@ -110,7 +111,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, + val |= 1 << TCP_METRIC_CWND; + if (dst_metric_locked(dst, RTAX_REORDERING)) + val |= 1 << TCP_METRIC_REORDERING; +- tm->tcpm_lock = val; ++ /* Paired with READ_ONCE() in tcp_metric_locked() */ ++ WRITE_ONCE(tm->tcpm_lock, val); + + msval = dst_metric_raw(dst, RTAX_RTT); + tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC; +-- +2.40.1 + diff --git a/queue-4.19/tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch b/queue-4.19/tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch new file mode 100644 index 00000000000..66cb1202d83 --- /dev/null +++ b/queue-4.19/tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch @@ -0,0 +1,66 @@ +From ba0f0c5da4d6fafee8f68e380ef626d23deda998 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Aug 2023 13:14:59 +0000 +Subject: tcp_metrics: annotate data-races around tm->tcpm_net + +From: Eric Dumazet + +[ Upstream commit d5d986ce42c71a7562d32c4e21e026b0f87befec ] + +tm->tcpm_net can be read or written locklessly. + +Instead of changing write_pnet() and read_pnet() and potentially +hurt performance, add the needed READ_ONCE()/WRITE_ONCE() +in tm_net() and tcpm_new(). + +Fixes: 849e8a0ca8d5 ("tcp_metrics: Add a field tcpm_net and verify it matches on lookup") +Signed-off-by: Eric Dumazet +Reviewed-by: David Ahern +Reviewed-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230802131500.1478140-6-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_metrics.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c +index fa99481abce8f..dfd224979cf65 100644 +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -40,7 +40,7 @@ struct tcp_fastopen_metrics { + + struct tcp_metrics_block { + struct tcp_metrics_block __rcu *tcpm_next; +- possible_net_t tcpm_net; ++ struct net *tcpm_net; + struct inetpeer_addr tcpm_saddr; + struct inetpeer_addr tcpm_daddr; + unsigned long tcpm_stamp; +@@ -51,9 +51,10 @@ struct tcp_metrics_block { + struct rcu_head rcu_head; + }; + +-static inline struct net *tm_net(struct tcp_metrics_block *tm) ++static inline struct net *tm_net(const struct tcp_metrics_block *tm) + { +- return read_pnet(&tm->tcpm_net); ++ /* Paired with the WRITE_ONCE() in tcpm_new() */ ++ return READ_ONCE(tm->tcpm_net); + } + + static bool tcp_metric_locked(struct tcp_metrics_block *tm, +@@ -197,7 +198,9 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, + if (!tm) + goto out_unlock; + } +- write_pnet(&tm->tcpm_net, net); ++ /* Paired with the READ_ONCE() in tm_net() */ ++ WRITE_ONCE(tm->tcpm_net, net); ++ + tm->tcpm_saddr = *saddr; + tm->tcpm_daddr = *daddr; + +-- +2.40.1 + diff --git a/queue-4.19/tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch b/queue-4.19/tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch new file mode 100644 index 00000000000..a3d87702003 --- /dev/null +++ b/queue-4.19/tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch @@ -0,0 +1,88 @@ +From 375c46cc62aed2ac4c4c404b0b21d689fd027318 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Aug 2023 13:14:56 +0000 +Subject: tcp_metrics: annotate data-races around tm->tcpm_stamp + +From: Eric Dumazet + +[ Upstream commit 949ad62a5d5311d36fce2e14fe5fed3f936da51c ] + +tm->tcpm_stamp can be read or written locklessly. + +Add needed READ_ONCE()/WRITE_ONCE() to document this. + +Also constify tcpm_check_stamp() dst argument. + +Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.") +Signed-off-by: Eric Dumazet +Reviewed-by: David Ahern +Reviewed-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230802131500.1478140-3-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_metrics.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c +index c3e133c0510ea..2d9d95559f5fa 100644 +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -97,7 +97,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, + u32 msval; + u32 val; + +- tm->tcpm_stamp = jiffies; ++ WRITE_ONCE(tm->tcpm_stamp, jiffies); + + val = 0; + if (dst_metric_locked(dst, RTAX_RTT)) +@@ -131,9 +131,15 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, + + #define TCP_METRICS_TIMEOUT (60 * 60 * HZ) + +-static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) ++static void tcpm_check_stamp(struct tcp_metrics_block *tm, ++ const struct dst_entry *dst) + { +- if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) ++ unsigned long limit; ++ ++ if (!tm) ++ return; ++ limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT; ++ if (unlikely(time_after(jiffies, limit))) + tcpm_suck_dst(tm, dst, false); + } + +@@ -174,7 +180,8 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, + oldest = deref_locked(tcp_metrics_hash[hash].chain); + for (tm = deref_locked(oldest->tcpm_next); tm; + tm = deref_locked(tm->tcpm_next)) { +- if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) ++ if (time_before(READ_ONCE(tm->tcpm_stamp), ++ READ_ONCE(oldest->tcpm_stamp))) + oldest = tm; + } + tm = oldest; +@@ -431,7 +438,7 @@ void tcp_update_metrics(struct sock *sk) + tp->reordering); + } + } +- tm->tcpm_stamp = jiffies; ++ WRITE_ONCE(tm->tcpm_stamp, jiffies); + out_unlock: + rcu_read_unlock(); + } +@@ -652,7 +659,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, + } + + if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE, +- jiffies - tm->tcpm_stamp, ++ jiffies - READ_ONCE(tm->tcpm_stamp), + TCP_METRICS_ATTR_PAD) < 0) + goto nla_put_failure; + +-- +2.40.1 + diff --git a/queue-4.19/tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch b/queue-4.19/tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch new file mode 100644 index 00000000000..fb1dab946d7 --- /dev/null +++ b/queue-4.19/tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch @@ -0,0 +1,85 @@ +From 248fbbaf691a51cbae70f91cf6ab3bc4d715e113 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Aug 2023 13:14:58 +0000 +Subject: tcp_metrics: annotate data-races around tm->tcpm_vals[] + +From: Eric Dumazet + +[ Upstream commit 8c4d04f6b443869d25e59822f7cec88d647028a9 ] + +tm->tcpm_vals[] values can be read or written locklessly. + +Add needed READ_ONCE()/WRITE_ONCE() to document this, +and force use of tcp_metric_get() and tcp_metric_set() + +Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.") +Signed-off-by: Eric Dumazet +Reviewed-by: David Ahern +Reviewed-by: Kuniyuki Iwashima +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_metrics.c | 23 ++++++++++++++--------- + 1 file changed, 14 insertions(+), 9 deletions(-) + +diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c +index 2529b1e6ded0c..fa99481abce8f 100644 +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -63,17 +63,19 @@ static bool tcp_metric_locked(struct tcp_metrics_block *tm, + return READ_ONCE(tm->tcpm_lock) & (1 << idx); + } + +-static u32 tcp_metric_get(struct tcp_metrics_block *tm, ++static u32 tcp_metric_get(const struct tcp_metrics_block *tm, + enum tcp_metric_index idx) + { +- return tm->tcpm_vals[idx]; ++ /* Paired with WRITE_ONCE() in tcp_metric_set() */ ++ return READ_ONCE(tm->tcpm_vals[idx]); + } + + static void tcp_metric_set(struct tcp_metrics_block *tm, + enum tcp_metric_index idx, + u32 val) + { +- tm->tcpm_vals[idx] = val; ++ /* Paired with READ_ONCE() in tcp_metric_get() */ ++ WRITE_ONCE(tm->tcpm_vals[idx], val); + } + + static bool addr_same(const struct inetpeer_addr *a, +@@ -115,13 +117,16 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, + WRITE_ONCE(tm->tcpm_lock, val); + + msval = dst_metric_raw(dst, RTAX_RTT); +- tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC; ++ tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC); + + msval = dst_metric_raw(dst, RTAX_RTTVAR); +- tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC; +- tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); +- tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); +- tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); ++ tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC); ++ tcp_metric_set(tm, TCP_METRIC_SSTHRESH, ++ dst_metric_raw(dst, RTAX_SSTHRESH)); ++ tcp_metric_set(tm, TCP_METRIC_CWND, ++ dst_metric_raw(dst, RTAX_CWND)); ++ tcp_metric_set(tm, TCP_METRIC_REORDERING, ++ dst_metric_raw(dst, RTAX_REORDERING)); + if (fastopen_clear) { + tm->tcpm_fastopen.mss = 0; + tm->tcpm_fastopen.syn_loss = 0; +@@ -672,7 +677,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, + if (!nest) + goto nla_put_failure; + for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) { +- u32 val = tm->tcpm_vals[i]; ++ u32 val = tcp_metric_get(tm, i); + + if (!val) + continue; +-- +2.40.1 + diff --git a/queue-4.19/tcp_metrics-fix-addr_same-helper.patch b/queue-4.19/tcp_metrics-fix-addr_same-helper.patch new file mode 100644 index 00000000000..f3164af2396 --- /dev/null +++ b/queue-4.19/tcp_metrics-fix-addr_same-helper.patch @@ -0,0 +1,46 @@ +From 0f19ff53abeb75c75cf2f6f9fb76514624ae382f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Aug 2023 13:14:55 +0000 +Subject: tcp_metrics: fix addr_same() helper + +From: Eric Dumazet + +[ Upstream commit e6638094d7af6c7b9dcca05ad009e79e31b4f670 ] + +Because v4 and v6 families use separate inetpeer trees (respectively +net->ipv4.peers and net->ipv6.peers), inetpeer_addr_cmp(a, b) assumes +a & b share the same family. + +tcp_metrics use a common hash table, where entries can have different +families. + +We must therefore make sure to not call inetpeer_addr_cmp() +if the families do not match. + +Fixes: d39d14ffa24c ("net: Add helper function to compare inetpeer addresses") +Signed-off-by: Eric Dumazet +Reviewed-by: David Ahern +Reviewed-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230802131500.1478140-2-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_metrics.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c +index 4960e2b6bd7f7..c3e133c0510ea 100644 +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -78,7 +78,7 @@ static void tcp_metric_set(struct tcp_metrics_block *tm, + static bool addr_same(const struct inetpeer_addr *a, + const struct inetpeer_addr *b) + { +- return inetpeer_addr_cmp(a, b) == 0; ++ return (a->family == b->family) && !inetpeer_addr_cmp(a, b); + } + + struct tcpm_hash_bucket { +-- +2.40.1 + diff --git a/queue-4.19/tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch b/queue-4.19/tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch new file mode 100644 index 00000000000..b122ed8458f --- /dev/null +++ b/queue-4.19/tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch @@ -0,0 +1,85 @@ +From a9240f11ad0dff1e879d2caf4c171b1d4e33f188 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Aug 2023 13:15:00 +0000 +Subject: tcp_metrics: fix data-race in tcpm_suck_dst() vs fastopen + +From: Eric Dumazet + +[ Upstream commit ddf251fa2bc1d3699eec0bae6ed0bc373b8fda79 ] + +Whenever tcpm_new() reclaims an old entry, tcpm_suck_dst() +would overwrite data that could be read from tcp_fastopen_cache_get() +or tcp_metrics_fill_info(). + +We need to acquire fastopen_seqlock to maintain consistency. + +For newly allocated objects, tcpm_new() can switch to kzalloc() +to avoid an extra fastopen_seqlock acquisition. + +Fixes: 1fe4c481ba63 ("net-tcp: Fast Open client - cookie cache") +Signed-off-by: Eric Dumazet +Cc: Yuchung Cheng +Reviewed-by: Kuniyuki Iwashima +Link: https://lore.kernel.org/r/20230802131500.1478140-7-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/tcp_metrics.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c +index dfd224979cf65..7bbd9125b5000 100644 +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -93,6 +93,7 @@ static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly; + static unsigned int tcp_metrics_hash_log __read_mostly; + + static DEFINE_SPINLOCK(tcp_metrics_lock); ++static DEFINE_SEQLOCK(fastopen_seqlock); + + static void tcpm_suck_dst(struct tcp_metrics_block *tm, + const struct dst_entry *dst, +@@ -129,11 +130,13 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, + tcp_metric_set(tm, TCP_METRIC_REORDERING, + dst_metric_raw(dst, RTAX_REORDERING)); + if (fastopen_clear) { ++ write_seqlock(&fastopen_seqlock); + tm->tcpm_fastopen.mss = 0; + tm->tcpm_fastopen.syn_loss = 0; + tm->tcpm_fastopen.try_exp = 0; + tm->tcpm_fastopen.cookie.exp = false; + tm->tcpm_fastopen.cookie.len = 0; ++ write_sequnlock(&fastopen_seqlock); + } + } + +@@ -194,7 +197,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, + } + tm = oldest; + } else { +- tm = kmalloc(sizeof(*tm), GFP_ATOMIC); ++ tm = kzalloc(sizeof(*tm), GFP_ATOMIC); + if (!tm) + goto out_unlock; + } +@@ -204,7 +207,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, + tm->tcpm_saddr = *saddr; + tm->tcpm_daddr = *daddr; + +- tcpm_suck_dst(tm, dst, true); ++ tcpm_suck_dst(tm, dst, reclaim); + + if (likely(!reclaim)) { + tm->tcpm_next = tcp_metrics_hash[hash].chain; +@@ -561,8 +564,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) + return ret; + } + +-static DEFINE_SEQLOCK(fastopen_seqlock); +- + void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, + struct tcp_fastopen_cookie *cookie) + { +-- +2.40.1 + diff --git a/queue-4.19/word-at-a-time-use-the-same-return-type-for-has_zero.patch b/queue-4.19/word-at-a-time-use-the-same-return-type-for-has_zero.patch new file mode 100644 index 00000000000..987c4f00250 --- /dev/null +++ b/queue-4.19/word-at-a-time-use-the-same-return-type-for-has_zero.patch @@ -0,0 +1,74 @@ +From e837b3bab10e874b1e943ad4a0eb71c48db3776f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Aug 2023 15:22:17 -0700 +Subject: word-at-a-time: use the same return type for has_zero regardless of + endianness + +From: ndesaulniers@google.com + +[ Upstream commit 79e8328e5acbe691bbde029a52c89d70dcbc22f3 ] + +Compiling big-endian targets with Clang produces the diagnostic: + + fs/namei.c:2173:13: warning: use of bitwise '|' with boolean operands [-Wbitwise-instead-of-logical] + } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants))); + ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + || + fs/namei.c:2173:13: note: cast one or both operands to int to silence this warning + +It appears that when has_zero was introduced, two definitions were +produced with different signatures (in particular different return +types). + +Looking at the usage in hash_name() in fs/namei.c, I suspect that +has_zero() is meant to be invoked twice per while loop iteration; using +logical-or would not update `bdata` when `a` did not have zeros. So I +think it's preferred to always return an unsigned long rather than a +bool than update the while loop in hash_name() to use a logical-or +rather than bitwise-or. + +[ Also changed powerpc version to do the same - Linus ] + +Link: https://github.com/ClangBuiltLinux/linux/issues/1832 +Link: https://lore.kernel.org/lkml/20230801-bitwise-v1-1-799bec468dc4@google.com/ +Fixes: 36126f8f2ed8 ("word-at-a-time: make the interfaces truly generic") +Debugged-by: Nathan Chancellor +Signed-off-by: Nick Desaulniers +Acked-by: Heiko Carstens +Cc: Arnd Bergmann +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + arch/powerpc/include/asm/word-at-a-time.h | 2 +- + include/asm-generic/word-at-a-time.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h +index f3f4710d4ff52..99129b0cd8b8a 100644 +--- a/arch/powerpc/include/asm/word-at-a-time.h ++++ b/arch/powerpc/include/asm/word-at-a-time.h +@@ -34,7 +34,7 @@ static inline long find_zero(unsigned long mask) + return leading_zero_bits >> 3; + } + +-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) ++static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) + { + unsigned long rhs = val | c->low_bits; + *data = rhs; +diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h +index 20c93f08c9933..95a1d214108a5 100644 +--- a/include/asm-generic/word-at-a-time.h ++++ b/include/asm-generic/word-at-a-time.h +@@ -38,7 +38,7 @@ static inline long find_zero(unsigned long mask) + return (mask >> 8) ? byte : byte + 1; + } + +-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) ++static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) + { + unsigned long rhs = val | c->low_bits; + *data = rhs; +-- +2.40.1 + -- 2.47.3