From: Sasha Levin Date: Sun, 15 May 2022 18:30:31 +0000 (-0400) Subject: Fixes for 5.17 X-Git-Tag: v4.9.315~52 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ef0466d813488e90e3d1edf2007591f642d5cd01;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.17 Signed-off-by: Sasha Levin --- diff --git a/queue-5.17/arm64-vdso-fix-makefile-dependency-on-vdso.so.patch b/queue-5.17/arm64-vdso-fix-makefile-dependency-on-vdso.so.patch new file mode 100644 index 00000000000..c6c878aa737 --- /dev/null +++ b/queue-5.17/arm64-vdso-fix-makefile-dependency-on-vdso.so.patch @@ -0,0 +1,77 @@ +From a0ca08695897ba05c0b919f8c58f66ae6e090602 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 May 2022 11:27:21 +0100 +Subject: arm64: vdso: fix makefile dependency on vdso.so + +From: Joey Gouly + +[ Upstream commit 205f3991a273cac6008ef4db3d1c0dc54d14fb56 ] + +There is currently no dependency for vdso*-wrap.S on vdso*.so, which means that +you can get a build that uses a stale vdso*-wrap.o. + +In commit a5b8ca97fbf8, the file that includes the vdso.so was moved and renamed +from arch/arm64/kernel/vdso/vdso.S to arch/arm64/kernel/vdso-wrap.S, when this +happened the Makefile was not updated to force the dependcy on vdso.so. + +Fixes: a5b8ca97fbf8 ("arm64: do not descend to vdso directories twice") +Signed-off-by: Joey Gouly +Cc: Masahiro Yamada +Cc: Vincenzo Frascino +Cc: Catalin Marinas +Cc: Will Deacon +Link: https://lore.kernel.org/r/20220510102721.50811-1-joey.gouly@arm.com +Signed-off-by: Will Deacon +Signed-off-by: Sasha Levin +--- + arch/arm64/kernel/Makefile | 4 ++++ + arch/arm64/kernel/vdso/Makefile | 3 --- + arch/arm64/kernel/vdso32/Makefile | 3 --- + 3 files changed, 4 insertions(+), 6 deletions(-) + +diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile +index 88b3e2a21408..db557856854e 100644 +--- a/arch/arm64/kernel/Makefile ++++ b/arch/arm64/kernel/Makefile +@@ -74,6 +74,10 @@ obj-$(CONFIG_ARM64_MTE) += mte.o + obj-y += vdso-wrap.o + obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o + ++# Force dependency (vdso*-wrap.S includes vdso.so through incbin) ++$(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so ++$(obj)/vdso32-wrap.o: $(obj)/vdso32/vdso.so ++ + obj-y += probes/ + head-y := head.o + extra-y += $(head-y) vmlinux.lds +diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile +index 172452f79e46..ac1964ebed1e 100644 +--- a/arch/arm64/kernel/vdso/Makefile ++++ b/arch/arm64/kernel/vdso/Makefile +@@ -52,9 +52,6 @@ GCOV_PROFILE := n + targets += vdso.lds + CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +-# Force dependency (incbin is bad) +-$(obj)/vdso.o : $(obj)/vdso.so +- + # Link rule for the .so file, .lds has to be first + $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE + $(call if_changed,vdsold_and_vdso_check) +diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile +index 6c01b63ff56d..2c2036eb0df7 100644 +--- a/arch/arm64/kernel/vdso32/Makefile ++++ b/arch/arm64/kernel/vdso32/Makefile +@@ -130,9 +130,6 @@ obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) + targets += vdso.lds + CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +-# Force dependency (vdso.s includes vdso.so through incbin) +-$(obj)/vdso.o: $(obj)/vdso.so +- + include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE + $(call if_changed,vdsosym) + +-- +2.35.1 + diff --git a/queue-5.17/asoc-max98090-generate-notifications-on-changes-for-.patch b/queue-5.17/asoc-max98090-generate-notifications-on-changes-for-.patch new file mode 100644 index 00000000000..46533a67ea9 --- /dev/null +++ b/queue-5.17/asoc-max98090-generate-notifications-on-changes-for-.patch @@ -0,0 +1,37 @@ +From ca4fdddfa270c05d37a2eab851a2666385711812 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Apr 2022 20:34:54 +0100 +Subject: ASoC: max98090: Generate notifications on changes for custom control + +From: Mark Brown + +[ Upstream commit 13fcf676d9e102594effc686d98521ff5c90b925 ] + +The max98090 driver has some custom controls which share a put() function +which returns 0 unconditionally, meaning that events are not generated +when the value changes. Fix that. + +Signed-off-by: Mark Brown +Link: https://lore.kernel.org/r/20220420193454.2647908-2-broonie@kernel.org +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/codecs/max98090.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c +index 6d9261346842..62b41ca050a2 100644 +--- a/sound/soc/codecs/max98090.c ++++ b/sound/soc/codecs/max98090.c +@@ -430,7 +430,7 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol, + mask << mc->shift, + sel << mc->shift); + +- return 0; ++ return *select != val; + } + + static const char *max98090_perf_pwr_text[] = +-- +2.35.1 + diff --git a/queue-5.17/asoc-max98090-reject-invalid-values-in-custom-contro.patch b/queue-5.17/asoc-max98090-reject-invalid-values-in-custom-contro.patch new file mode 100644 index 00000000000..fbee921351a --- /dev/null +++ b/queue-5.17/asoc-max98090-reject-invalid-values-in-custom-contro.patch @@ -0,0 +1,40 @@ +From a9ece649893a994e24b3befe92b26a6b209fa852 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Apr 2022 20:34:53 +0100 +Subject: ASoC: max98090: Reject invalid values in custom control put() + +From: Mark Brown + +[ Upstream commit 2fbe467bcbfc760a08f08475eea6bbd4c2874319 ] + +The max98090 driver has a custom put function for some controls which can +only be updated in certain circumstances which makes no effort to validate +that input is suitable for the control, allowing out of spec values to be +written to the hardware and presented to userspace. Fix this by returning +an error when invalid values are written. + +Signed-off-by: Mark Brown +Link: https://lore.kernel.org/r/20220420193454.2647908-1-broonie@kernel.org +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/codecs/max98090.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c +index b45ec35cd63c..6d9261346842 100644 +--- a/sound/soc/codecs/max98090.c ++++ b/sound/soc/codecs/max98090.c +@@ -413,6 +413,9 @@ static int max98090_put_enab_tlv(struct snd_kcontrol *kcontrol, + + val = (val >> mc->shift) & mask; + ++ if (sel < 0 || sel > mc->max) ++ return -EINVAL; ++ + *select = sel; + + /* Setting a volume is only valid if it is already On */ +-- +2.35.1 + diff --git a/queue-5.17/asoc-ops-validate-input-values-in-snd_soc_put_volsw_.patch b/queue-5.17/asoc-ops-validate-input-values-in-snd_soc_put_volsw_.patch new file mode 100644 index 00000000000..37c0e3380d2 --- /dev/null +++ b/queue-5.17/asoc-ops-validate-input-values-in-snd_soc_put_volsw_.patch @@ -0,0 +1,60 @@ +From 6e74f07a6430b7d510748409b5cb34ec2b9aaaec Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 23 Apr 2022 14:12:39 +0100 +Subject: ASoC: ops: Validate input values in snd_soc_put_volsw_range() + +From: Mark Brown + +[ Upstream commit aa22125c57f9e577f0a667e4fa07fc3fa8ca1e60 ] + +Check that values written via snd_soc_put_volsw_range() are +within the range advertised by the control, ensuring that we +don't write out of spec values to the hardware. + +Signed-off-by: Mark Brown +Link: https://lore.kernel.org/r/20220423131239.3375261-1-broonie@kernel.org +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/soc-ops.c | 18 +++++++++++++++++- + 1 file changed, 17 insertions(+), 1 deletion(-) + +diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c +index 58347eadd219..e693070f51fe 100644 +--- a/sound/soc/soc-ops.c ++++ b/sound/soc/soc-ops.c +@@ -519,7 +519,15 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, + unsigned int mask = (1 << fls(max)) - 1; + unsigned int invert = mc->invert; + unsigned int val, val_mask; +- int err, ret; ++ int err, ret, tmp; ++ ++ tmp = ucontrol->value.integer.value[0]; ++ if (tmp < 0) ++ return -EINVAL; ++ if (mc->platform_max && tmp > mc->platform_max) ++ return -EINVAL; ++ if (tmp > mc->max - mc->min + 1) ++ return -EINVAL; + + if (invert) + val = (max - ucontrol->value.integer.value[0]) & mask; +@@ -534,6 +542,14 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, + ret = err; + + if (snd_soc_volsw_is_stereo(mc)) { ++ tmp = ucontrol->value.integer.value[1]; ++ if (tmp < 0) ++ return -EINVAL; ++ if (mc->platform_max && tmp > mc->platform_max) ++ return -EINVAL; ++ if (tmp > mc->max - mc->min + 1) ++ return -EINVAL; ++ + if (invert) + val = (max - ucontrol->value.integer.value[1]) & mask; + else +-- +2.35.1 + diff --git a/queue-5.17/asoc-sof-fix-null-pointer-exception-in-sof_pci_probe.patch b/queue-5.17/asoc-sof-fix-null-pointer-exception-in-sof_pci_probe.patch new file mode 100644 index 00000000000..a062ebadac5 --- /dev/null +++ b/queue-5.17/asoc-sof-fix-null-pointer-exception-in-sof_pci_probe.patch @@ -0,0 +1,51 @@ +From 37a9bbfd32e574c47263a7101cd98e86fc52f128 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Apr 2022 13:33:57 -0500 +Subject: ASoC: SOF: Fix NULL pointer exception in sof_pci_probe callback + +From: Ajit Kumar Pandey + +[ Upstream commit c61711c1c95791850be48dd65a1d72eb34ba719f ] + +We are accessing "desc->ops" in sof_pci_probe without checking "desc" +pointer. This results in NULL pointer exception if pci_id->driver_data +i.e desc pointer isn't defined in sof device probe: + +BUG: kernel NULL pointer dereference, address: 0000000000000060 +PGD 0 P4D 0 +Oops: 0000 [#1] PREEMPT SMP NOPTI +RIP: 0010:sof_pci_probe+0x1e/0x17f [snd_sof_pci] +Code: Unable to access opcode bytes at RIP 0xffffffffc043dff4. +RSP: 0018:ffffac4b03b9b8d8 EFLAGS: 00010246 + +Add NULL pointer check for sof_dev_desc pointer to avoid such exception. + +Reviewed-by: Ranjani Sridharan +Signed-off-by: Ajit Kumar Pandey +Signed-off-by: Pierre-Louis Bossart +Link: https://lore.kernel.org/r/20220426183357.102155-1-pierre-louis.bossart@linux.intel.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/sof/sof-pci-dev.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c +index 20c6ca37dbc4..53e97abbe6e3 100644 +--- a/sound/soc/sof/sof-pci-dev.c ++++ b/sound/soc/sof/sof-pci-dev.c +@@ -130,6 +130,11 @@ int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) + + dev_dbg(&pci->dev, "PCI DSP detected"); + ++ if (!desc) { ++ dev_err(dev, "error: no matching PCI descriptor\n"); ++ return -ENODEV; ++ } ++ + if (!desc->ops) { + dev_err(dev, "error: no matching PCI descriptor ops\n"); + return -ENODEV; +-- +2.35.1 + diff --git a/queue-5.17/batman-adv-don-t-skb_split-skbuffs-with-frag_list.patch b/queue-5.17/batman-adv-don-t-skb_split-skbuffs-with-frag_list.patch new file mode 100644 index 00000000000..aa34967a83e --- /dev/null +++ b/queue-5.17/batman-adv-don-t-skb_split-skbuffs-with-frag_list.patch @@ -0,0 +1,60 @@ +From 642e927e19162a886d77bde65f8f555d0e896a71 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 16 Apr 2022 13:51:10 +0200 +Subject: batman-adv: Don't skb_split skbuffs with frag_list + +From: Sven Eckelmann + +[ Upstream commit a063f2fba3fa633a599253b62561051ac185fa99 ] + +The receiving interface might have used GRO to receive more fragments than +MAX_SKB_FRAGS fragments. In this case, these will not be stored in +skb_shinfo(skb)->frags but merged into the frag list. + +batman-adv relies on the function skb_split to split packets up into +multiple smaller packets which are not larger than the MTU on the outgoing +interface. But this function cannot handle frag_list entries and is only +operating on skb_shinfo(skb)->frags. If it is still trying to split such an +skb and xmit'ing it on an interface without support for NETIF_F_FRAGLIST, +then validate_xmit_skb() will try to linearize it. But this fails due to +inconsistent information. And __pskb_pull_tail will trigger a BUG_ON after +skb_copy_bits() returns an error. + +In case of entries in frag_list, just linearize the skb before operating on +it with skb_split(). + +Reported-by: Felix Kaechele +Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") +Signed-off-by: Sven Eckelmann +Tested-by: Felix Kaechele +Signed-off-by: Simon Wunderlich +Signed-off-by: Sasha Levin +--- + net/batman-adv/fragmentation.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c +index 0899a729a23f..c120c7c6d25f 100644 +--- a/net/batman-adv/fragmentation.c ++++ b/net/batman-adv/fragmentation.c +@@ -475,6 +475,17 @@ int batadv_frag_send_packet(struct sk_buff *skb, + goto free_skb; + } + ++ /* GRO might have added fragments to the fragment list instead of ++ * frags[]. But this is not handled by skb_split and must be ++ * linearized to avoid incorrect length information after all ++ * batman-adv fragments were created and submitted to the ++ * hard-interface ++ */ ++ if (skb_has_frag_list(skb) && __skb_linearize(skb)) { ++ ret = -ENOMEM; ++ goto free_skb; ++ } ++ + /* Create one header to be copied to all fragments */ + frag_header.packet_type = BATADV_UNICAST_FRAG; + frag_header.version = BATADV_COMPAT_VERSION; +-- +2.35.1 + diff --git a/queue-5.17/block-do-not-call-folio_next-on-an-unreferenced-foli.patch b/queue-5.17/block-do-not-call-folio_next-on-an-unreferenced-foli.patch new file mode 100644 index 00000000000..1ad98e2c2df --- /dev/null +++ b/queue-5.17/block-do-not-call-folio_next-on-an-unreferenced-foli.patch @@ -0,0 +1,68 @@ +From 59346e63b22f01c38a9f00fae20344aaeffdb8a9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 May 2022 00:09:31 -0400 +Subject: block: Do not call folio_next() on an unreferenced folio + +From: Matthew Wilcox (Oracle) + +[ Upstream commit 170f37d6aa6ad4582eefd7459015de79e244536e ] + +It is unsafe to call folio_next() on a folio unless you hold a reference +on it that prevents it from being split or freed. After returning +from the iterator, iomap calls folio_end_writeback() which may drop +the last reference to the page, or allow the page to be split. If that +happens, the iterator will not advance far enough through the bio_vec, +leading to assertion failures like the BUG() in folio_end_writeback() +that checks we're not trying to end writeback on a page not currently +under writeback. Other assertion failures were also seen, but they're +all explained by this one bug. + +Fix the bug by remembering where the next folio starts before returning +from the iterator. There are other ways of fixing this bug, but this +seems the simplest. + +Reported-by: Darrick J. Wong +Tested-by: Darrick J. Wong +Reported-by: Brian Foster +Tested-by: Brian Foster +Signed-off-by: Matthew Wilcox (Oracle) +Signed-off-by: Sasha Levin +--- + include/linux/bio.h | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/include/linux/bio.h b/include/linux/bio.h +index 117d7f248ac9..2ca54c084d5a 100644 +--- a/include/linux/bio.h ++++ b/include/linux/bio.h +@@ -272,6 +272,7 @@ struct folio_iter { + size_t offset; + size_t length; + /* private: for use by the iterator */ ++ struct folio *_next; + size_t _seg_count; + int _i; + }; +@@ -286,6 +287,7 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, + PAGE_SIZE * (bvec->bv_page - &fi->folio->page); + fi->_seg_count = bvec->bv_len; + fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); ++ fi->_next = folio_next(fi->folio); + fi->_i = i; + } + +@@ -293,9 +295,10 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio) + { + fi->_seg_count -= fi->length; + if (fi->_seg_count) { +- fi->folio = folio_next(fi->folio); ++ fi->folio = fi->_next; + fi->offset = 0; + fi->length = min(folio_size(fi->folio), fi->_seg_count); ++ fi->_next = folio_next(fi->folio); + } else if (fi->_i + 1 < bio->bi_vcnt) { + bio_first_folio(fi, bio, fi->_i + 1); + } else { +-- +2.35.1 + diff --git a/queue-5.17/dim-initialize-all-struct-fields.patch b/queue-5.17/dim-initialize-all-struct-fields.patch new file mode 100644 index 00000000000..d34627cf179 --- /dev/null +++ b/queue-5.17/dim-initialize-all-struct-fields.patch @@ -0,0 +1,119 @@ +From de1ad87750a6b7ef8ae754fd70d8d1c355b66733 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 May 2022 18:10:38 -0700 +Subject: dim: initialize all struct fields +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jesse Brandeburg + +[ Upstream commit ee1444b5e1df4155b591d0d9b1e72853a99ea861 ] + +The W=2 build pointed out that the code wasn't initializing all the +variables in the dim_cq_moder declarations with the struct initializers. +The net change here is zero since these structs were already static +const globals and were initialized with zeros by the compiler, but +removing compiler warnings has value in and of itself. + +lib/dim/net_dim.c: At top level: +lib/dim/net_dim.c:54:9: warning: missing initializer for field ‘comps’ of ‘const struct dim_cq_moder’ [-Wmissing-field-initializers] + 54 | NET_DIM_RX_EQE_PROFILES, + | ^~~~~~~~~~~~~~~~~~~~~~~ +In file included from lib/dim/net_dim.c:6: +./include/linux/dim.h:45:13: note: ‘comps’ declared here + 45 | u16 comps; + | ^~~~~ + +and repeats for the tx struct, and once you fix the comps entry then +the cq_period_mode field needs the same treatment. + +Use the commonly accepted style to indicate to the compiler that we +know what we're doing, and add a comma at the end of each struct +initializer to clean up the issue, and use explicit initializers +for the fields we are initializing which makes the compiler happy. + +While here and fixing these lines, clean up the code slightly with +a fix for the super long lines by removing the word "_MODERATION" from a +couple defines only used in this file. + +Fixes: f8be17b81d44 ("lib/dim: Fix -Wunused-const-variable warnings") +Signed-off-by: Jesse Brandeburg +Link: https://lore.kernel.org/r/20220507011038.14568-1-jesse.brandeburg@intel.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + lib/dim/net_dim.c | 44 ++++++++++++++++++++++---------------------- + 1 file changed, 22 insertions(+), 22 deletions(-) + +diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c +index 06811d866775..53f6b9c6e936 100644 +--- a/lib/dim/net_dim.c ++++ b/lib/dim/net_dim.c +@@ -12,41 +12,41 @@ + * Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES + */ + #define NET_DIM_PARAMS_NUM_PROFILES 5 +-#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 +-#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128 ++#define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256 ++#define NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE 128 + #define NET_DIM_DEF_PROFILE_CQE 1 + #define NET_DIM_DEF_PROFILE_EQE 1 + + #define NET_DIM_RX_EQE_PROFILES { \ +- {1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ +- {8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ +- {64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ +- {128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ +- {256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ ++ {.usec = 1, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ ++ {.usec = 8, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ ++ {.usec = 64, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ ++ {.usec = 128, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \ ++ {.usec = 256, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,} \ + } + + #define NET_DIM_RX_CQE_PROFILES { \ +- {2, 256}, \ +- {8, 128}, \ +- {16, 64}, \ +- {32, 64}, \ +- {64, 64} \ ++ {.usec = 2, .pkts = 256,}, \ ++ {.usec = 8, .pkts = 128,}, \ ++ {.usec = 16, .pkts = 64,}, \ ++ {.usec = 32, .pkts = 64,}, \ ++ {.usec = 64, .pkts = 64,} \ + } + + #define NET_DIM_TX_EQE_PROFILES { \ +- {1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ +- {8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ +- {32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ +- {64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \ +- {128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \ ++ {.usec = 1, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ ++ {.usec = 8, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ ++ {.usec = 32, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ ++ {.usec = 64, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \ ++ {.usec = 128, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,} \ + } + + #define NET_DIM_TX_CQE_PROFILES { \ +- {5, 128}, \ +- {8, 64}, \ +- {16, 32}, \ +- {32, 32}, \ +- {64, 32} \ ++ {.usec = 5, .pkts = 128,}, \ ++ {.usec = 8, .pkts = 64,}, \ ++ {.usec = 16, .pkts = 32,}, \ ++ {.usec = 32, .pkts = 32,}, \ ++ {.usec = 64, .pkts = 32,} \ + } + + static const struct dim_cq_moder +-- +2.35.1 + diff --git a/queue-5.17/drm-nouveau-fix-a-potential-theorical-leak-in-nouvea.patch b/queue-5.17/drm-nouveau-fix-a-potential-theorical-leak-in-nouvea.patch new file mode 100644 index 00000000000..59efdc04b65 --- /dev/null +++ b/queue-5.17/drm-nouveau-fix-a-potential-theorical-leak-in-nouvea.patch @@ -0,0 +1,72 @@ +From d3063e139d8abd68c2a2eaa52df947bfe475de8f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Feb 2022 07:03:11 +0100 +Subject: drm/nouveau: Fix a potential theorical leak in + nouveau_get_backlight_name() + +From: Christophe JAILLET + +[ Upstream commit ab244be47a8f111bc82496a8a20c907236e37f95 ] + +If successful ida_simple_get() calls are not undone when needed, some +additional memory may be allocated and wasted. + +Here, an ID between 0 and MAX_INT is required. If this ID is >=100, it is +not taken into account and is wasted. It should be released. + +Instead of calling ida_simple_remove(), take advantage of the 'max' +parameter to require the ID not to be too big. Should it be too big, it +is not allocated and don't need to be freed. + +While at it, use ida_alloc_xxx()/ida_free() instead to +ida_simple_get()/ida_simple_remove(). +The latter is deprecated and more verbose. + +Fixes: db1a0ae21461 ("drm/nouveau/bl: Assign different names to interfaces") +Signed-off-by: Christophe JAILLET +Reviewed-by: Lyude Paul +[Fixed formatting warning from checkpatch] +Signed-off-by: Lyude Paul +Link: https://patchwork.freedesktop.org/patch/msgid/9ba85bca59df6813dc029e743a836451d5173221.1644386541.git.christophe.jaillet@wanadoo.fr +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/nouveau/nouveau_backlight.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c +index daf9f87477ba..a2141d3d9b1d 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c ++++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c +@@ -46,8 +46,9 @@ static bool + nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE], + struct nouveau_backlight *bl) + { +- const int nb = ida_simple_get(&bl_ida, 0, 0, GFP_KERNEL); +- if (nb < 0 || nb >= 100) ++ const int nb = ida_alloc_max(&bl_ida, 99, GFP_KERNEL); ++ ++ if (nb < 0) + return false; + if (nb > 0) + snprintf(backlight_name, BL_NAME_SIZE, "nv_backlight%d", nb); +@@ -414,7 +415,7 @@ nouveau_backlight_init(struct drm_connector *connector) + nv_encoder, ops, &props); + if (IS_ERR(bl->dev)) { + if (bl->id >= 0) +- ida_simple_remove(&bl_ida, bl->id); ++ ida_free(&bl_ida, bl->id); + ret = PTR_ERR(bl->dev); + goto fail_alloc; + } +@@ -442,7 +443,7 @@ nouveau_backlight_fini(struct drm_connector *connector) + return; + + if (bl->id >= 0) +- ida_simple_remove(&bl_ida, bl->id); ++ ida_free(&bl_ida, bl->id); + + backlight_device_unregister(bl->dev); + nv_conn->backlight = NULL; +-- +2.35.1 + diff --git a/queue-5.17/drm-vc4-hdmi-fix-build-error-for-implicit-function-d.patch b/queue-5.17/drm-vc4-hdmi-fix-build-error-for-implicit-function-d.patch new file mode 100644 index 00000000000..89ce40d4693 --- /dev/null +++ b/queue-5.17/drm-vc4-hdmi-fix-build-error-for-implicit-function-d.patch @@ -0,0 +1,57 @@ +From e1293d515bf3615119e083da84101ff7abacddbe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 May 2022 21:51:48 +0800 +Subject: drm/vc4: hdmi: Fix build error for implicit function declaration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Hui Tang + +[ Upstream commit 6fed53de560768bde6d701a7c79c253b45b259e3 ] + +drivers/gpu/drm/vc4/vc4_hdmi.c: In function ‘vc4_hdmi_connector_detect’: +drivers/gpu/drm/vc4/vc4_hdmi.c:228:7: error: implicit declaration of function ‘gpiod_get_value_cansleep’; did you mean ‘gpio_get_value_cansleep’? [-Werror=implicit-function-declaration] + if (gpiod_get_value_cansleep(vc4_hdmi->hpd_gpio)) + ^~~~~~~~~~~~~~~~~~~~~~~~ + gpio_get_value_cansleep + CC [M] drivers/gpu/drm/vc4/vc4_validate.o + CC [M] drivers/gpu/drm/vc4/vc4_v3d.o + CC [M] drivers/gpu/drm/vc4/vc4_validate_shaders.o + CC [M] drivers/gpu/drm/vc4/vc4_debugfs.o +drivers/gpu/drm/vc4/vc4_hdmi.c: In function ‘vc4_hdmi_bind’: +drivers/gpu/drm/vc4/vc4_hdmi.c:2883:23: error: implicit declaration of function ‘devm_gpiod_get_optional’; did you mean ‘devm_clk_get_optional’? [-Werror=implicit-function-declaration] + vc4_hdmi->hpd_gpio = devm_gpiod_get_optional(dev, "hpd", GPIOD_IN); + ^~~~~~~~~~~~~~~~~~~~~~~ + devm_clk_get_optional +drivers/gpu/drm/vc4/vc4_hdmi.c:2883:59: error: ‘GPIOD_IN’ undeclared (first use in this function); did you mean ‘GPIOF_IN’? + vc4_hdmi->hpd_gpio = devm_gpiod_get_optional(dev, "hpd", GPIOD_IN); + ^~~~~~~~ + GPIOF_IN +drivers/gpu/drm/vc4/vc4_hdmi.c:2883:59: note: each undeclared identifier is reported only once for each function it appears in +cc1: all warnings being treated as errors + +Fixes: 6800234ceee0 ("drm/vc4: hdmi: Convert to gpiod") +Signed-off-by: Hui Tang +Signed-off-by: Maxime Ripard +Link: https://patchwork.freedesktop.org/patch/msgid/20220510135148.247719-1-tanghui20@huawei.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/vc4/vc4_hdmi.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c +index 3a1626f261e5..3f651364ed7a 100644 +--- a/drivers/gpu/drm/vc4/vc4_hdmi.c ++++ b/drivers/gpu/drm/vc4/vc4_hdmi.c +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + #include + #include + #include +-- +2.35.1 + diff --git a/queue-5.17/drm-vmwgfx-fix-fencing-on-svgav3.patch b/queue-5.17/drm-vmwgfx-fix-fencing-on-svgav3.patch new file mode 100644 index 00000000000..ce21384f069 --- /dev/null +++ b/queue-5.17/drm-vmwgfx-fix-fencing-on-svgav3.patch @@ -0,0 +1,253 @@ +From d783f324437bc54b25a5e08fb6bd819b9467b504 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Mar 2022 10:24:22 -0500 +Subject: drm/vmwgfx: Fix fencing on SVGAv3 + +From: Zack Rusin + +[ Upstream commit 1d6595b4cd47acfd824550f48f10b54a6f0e93ee ] + +Port of the vmwgfx to SVGAv3 lacked support for fencing. SVGAv3 removed +FIFO's and replaced them with command buffers and extra registers. +The initial version of SVGAv3 lacked support for most advanced features +(e.g. 3D) which made fences unnecessary. That is no longer the case, +especially as 3D support is being turned on. + +Switch from FIFO commands and capabilities to command buffers and extra +registers to enable fences on SVGAv3. + +Fixes: 2cd80dbd3551 ("drm/vmwgfx: Add basic support for SVGA3") +Signed-off-by: Zack Rusin +Reviewed-by: Martin Krastev +Reviewed-by: Maaz Mombasawala +Link: https://patchwork.freedesktop.org/patch/msgid/20220302152426.885214-5-zack@kde.org +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c | 2 +- + drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 8 ++++++++ + drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 28 ++++++++++++++++++++------- + drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 26 +++++++++++++++++-------- + drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 8 +++++--- + 5 files changed, 53 insertions(+), 19 deletions(-) + +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c +index a3bfbb6c3e14..bf1b394753da 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmd.c +@@ -528,7 +528,7 @@ int vmw_cmd_send_fence(struct vmw_private *dev_priv, uint32_t *seqno) + *seqno = atomic_add_return(1, &dev_priv->marker_seq); + } while (*seqno == 0); + +- if (!(vmw_fifo_caps(dev_priv) & SVGA_FIFO_CAP_FENCE)) { ++ if (!vmw_has_fences(dev_priv)) { + + /* + * Don't request hardware to send a fence. The +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +index ea3ecdda561d..6de0b9ef5c77 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +@@ -1679,4 +1679,12 @@ static inline void vmw_irq_status_write(struct vmw_private *vmw, + outl(status, vmw->io_start + SVGA_IRQSTATUS_PORT); + } + ++static inline bool vmw_has_fences(struct vmw_private *vmw) ++{ ++ if ((vmw->capabilities & (SVGA_CAP_COMMAND_BUFFERS | ++ SVGA_CAP_CMD_BUFFERS_2)) != 0) ++ return true; ++ return (vmw_fifo_caps(vmw) & SVGA_FIFO_CAP_FENCE) != 0; ++} ++ + #endif +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +index 5001b87aebe8..a16b854ca18a 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +@@ -82,6 +82,22 @@ fman_from_fence(struct vmw_fence_obj *fence) + return container_of(fence->base.lock, struct vmw_fence_manager, lock); + } + ++static u32 vmw_fence_goal_read(struct vmw_private *vmw) ++{ ++ if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0) ++ return vmw_read(vmw, SVGA_REG_FENCE_GOAL); ++ else ++ return vmw_fifo_mem_read(vmw, SVGA_FIFO_FENCE_GOAL); ++} ++ ++static void vmw_fence_goal_write(struct vmw_private *vmw, u32 value) ++{ ++ if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0) ++ vmw_write(vmw, SVGA_REG_FENCE_GOAL, value); ++ else ++ vmw_fifo_mem_write(vmw, SVGA_FIFO_FENCE_GOAL, value); ++} ++ + /* + * Note on fencing subsystem usage of irqs: + * Typically the vmw_fences_update function is called +@@ -392,7 +408,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, + if (likely(!fman->seqno_valid)) + return false; + +- goal_seqno = vmw_fifo_mem_read(fman->dev_priv, SVGA_FIFO_FENCE_GOAL); ++ goal_seqno = vmw_fence_goal_read(fman->dev_priv); + if (likely(passed_seqno - goal_seqno >= VMW_FENCE_WRAP)) + return false; + +@@ -400,9 +416,8 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, + list_for_each_entry(fence, &fman->fence_list, head) { + if (!list_empty(&fence->seq_passed_actions)) { + fman->seqno_valid = true; +- vmw_fifo_mem_write(fman->dev_priv, +- SVGA_FIFO_FENCE_GOAL, +- fence->base.seqno); ++ vmw_fence_goal_write(fman->dev_priv, ++ fence->base.seqno); + break; + } + } +@@ -434,13 +449,12 @@ static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence) + if (dma_fence_is_signaled_locked(&fence->base)) + return false; + +- goal_seqno = vmw_fifo_mem_read(fman->dev_priv, SVGA_FIFO_FENCE_GOAL); ++ goal_seqno = vmw_fence_goal_read(fman->dev_priv); + if (likely(fman->seqno_valid && + goal_seqno - fence->base.seqno < VMW_FENCE_WRAP)) + return false; + +- vmw_fifo_mem_write(fman->dev_priv, SVGA_FIFO_FENCE_GOAL, +- fence->base.seqno); ++ vmw_fence_goal_write(fman->dev_priv, fence->base.seqno); + fman->seqno_valid = true; + + return true; +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c +index c5191de365ca..fe4732bf2c9d 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c +@@ -32,6 +32,14 @@ + + #define VMW_FENCE_WRAP (1 << 24) + ++static u32 vmw_irqflag_fence_goal(struct vmw_private *vmw) ++{ ++ if ((vmw->capabilities2 & SVGA_CAP2_EXTRA_REGS) != 0) ++ return SVGA_IRQFLAG_REG_FENCE_GOAL; ++ else ++ return SVGA_IRQFLAG_FENCE_GOAL; ++} ++ + /** + * vmw_thread_fn - Deferred (process context) irq handler + * +@@ -96,7 +104,7 @@ static irqreturn_t vmw_irq_handler(int irq, void *arg) + wake_up_all(&dev_priv->fifo_queue); + + if ((masked_status & (SVGA_IRQFLAG_ANY_FENCE | +- SVGA_IRQFLAG_FENCE_GOAL)) && ++ vmw_irqflag_fence_goal(dev_priv))) && + !test_and_set_bit(VMW_IRQTHREAD_FENCE, dev_priv->irqthread_pending)) + ret = IRQ_WAKE_THREAD; + +@@ -137,8 +145,7 @@ bool vmw_seqno_passed(struct vmw_private *dev_priv, + if (likely(dev_priv->last_read_seqno - seqno < VMW_FENCE_WRAP)) + return true; + +- if (!(vmw_fifo_caps(dev_priv) & SVGA_FIFO_CAP_FENCE) && +- vmw_fifo_idle(dev_priv, seqno)) ++ if (!vmw_has_fences(dev_priv) && vmw_fifo_idle(dev_priv, seqno)) + return true; + + /** +@@ -160,6 +167,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv, + unsigned long timeout) + { + struct vmw_fifo_state *fifo_state = dev_priv->fifo; ++ bool fifo_down = false; + + uint32_t count = 0; + uint32_t signal_seq; +@@ -176,12 +184,14 @@ int vmw_fallback_wait(struct vmw_private *dev_priv, + */ + + if (fifo_idle) { +- down_read(&fifo_state->rwsem); + if (dev_priv->cman) { + ret = vmw_cmdbuf_idle(dev_priv->cman, interruptible, + 10*HZ); + if (ret) + goto out_err; ++ } else if (fifo_state) { ++ down_read(&fifo_state->rwsem); ++ fifo_down = true; + } + } + +@@ -218,12 +228,12 @@ int vmw_fallback_wait(struct vmw_private *dev_priv, + } + } + finish_wait(&dev_priv->fence_queue, &__wait); +- if (ret == 0 && fifo_idle) ++ if (ret == 0 && fifo_idle && fifo_state) + vmw_fence_write(dev_priv, signal_seq); + + wake_up_all(&dev_priv->fence_queue); + out_err: +- if (fifo_idle) ++ if (fifo_down) + up_read(&fifo_state->rwsem); + + return ret; +@@ -266,13 +276,13 @@ void vmw_seqno_waiter_remove(struct vmw_private *dev_priv) + + void vmw_goal_waiter_add(struct vmw_private *dev_priv) + { +- vmw_generic_waiter_add(dev_priv, SVGA_IRQFLAG_FENCE_GOAL, ++ vmw_generic_waiter_add(dev_priv, vmw_irqflag_fence_goal(dev_priv), + &dev_priv->goal_queue_waiters); + } + + void vmw_goal_waiter_remove(struct vmw_private *dev_priv) + { +- vmw_generic_waiter_remove(dev_priv, SVGA_IRQFLAG_FENCE_GOAL, ++ vmw_generic_waiter_remove(dev_priv, vmw_irqflag_fence_goal(dev_priv), + &dev_priv->goal_queue_waiters); + } + +diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +index bbd2f4ec08ec..93431e8f6606 100644 +--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c ++++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +@@ -1344,7 +1344,6 @@ vmw_kms_new_framebuffer(struct vmw_private *dev_priv, + ret = vmw_kms_new_framebuffer_surface(dev_priv, surface, &vfb, + mode_cmd, + is_bo_proxy); +- + /* + * vmw_create_bo_proxy() adds a reference that is no longer + * needed +@@ -1385,13 +1384,16 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, + ret = vmw_user_lookup_handle(dev_priv, file_priv, + mode_cmd->handles[0], + &surface, &bo); +- if (ret) ++ if (ret) { ++ DRM_ERROR("Invalid buffer object handle %u (0x%x).\n", ++ mode_cmd->handles[0], mode_cmd->handles[0]); + goto err_out; ++ } + + + if (!bo && + !vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height)) { +- DRM_ERROR("Surface size cannot exceed %dx%d", ++ DRM_ERROR("Surface size cannot exceed %dx%d\n", + dev_priv->texture_max_width, + dev_priv->texture_max_height); + goto err_out; +-- +2.35.1 + diff --git a/queue-5.17/fanotify-do-not-allow-setting-dirent-events-in-mask-.patch b/queue-5.17/fanotify-do-not-allow-setting-dirent-events-in-mask-.patch new file mode 100644 index 00000000000..da3a1d2de40 --- /dev/null +++ b/queue-5.17/fanotify-do-not-allow-setting-dirent-events-in-mask-.patch @@ -0,0 +1,71 @@ +From b7a7484f8f4738c1e747cd707c1dd85e8af01695 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 7 May 2022 11:00:28 +0300 +Subject: fanotify: do not allow setting dirent events in mask of non-dir + +From: Amir Goldstein + +[ Upstream commit ceaf69f8eadcafb323392be88e7a5248c415d423 ] + +Dirent events (create/delete/move) are only reported on watched +directory inodes, but in fanotify as well as in legacy inotify, it was +always allowed to set them on non-dir inode, which does not result in +any meaningful outcome. + +Until kernel v5.17, dirent events in fanotify also differed from events +"on child" (e.g. FAN_OPEN) in the information provided in the event. +For example, FAN_OPEN could be set in the mask of a non-dir or the mask +of its parent and event would report the fid of the child regardless of +the marked object. +By contrast, FAN_DELETE is not reported if the child is marked and the +child fid was not reported in the events. + +Since kernel v5.17, with fanotify group flag FAN_REPORT_TARGET_FID, the +fid of the child is reported with dirent events, like events "on child", +which may create confusion for users expecting the same behavior as +events "on child" when setting events in the mask on a child. + +The desired semantics of setting dirent events in the mask of a child +are not clear, so for now, deny this action for a group initialized +with flag FAN_REPORT_TARGET_FID and for the new event FAN_RENAME. +We may relax this restriction in the future if we decide on the +semantics and implement them. + +Fixes: d61fd650e9d2 ("fanotify: introduce group flag FAN_REPORT_TARGET_FID") +Fixes: 8cc3b1ccd930 ("fanotify: wire up FAN_RENAME event") +Link: https://lore.kernel.org/linux-fsdevel/20220505133057.zm5t6vumc4xdcnsg@quack3.lan/ +Signed-off-by: Amir Goldstein +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20220507080028.219826-1-amir73il@gmail.com +Signed-off-by: Sasha Levin +--- + fs/notify/fanotify/fanotify_user.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c +index 2ff6bd85ba8f..f2a1947ec5ee 100644 +--- a/fs/notify/fanotify/fanotify_user.c ++++ b/fs/notify/fanotify/fanotify_user.c +@@ -1638,6 +1638,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, + else + mnt = path.mnt; + ++ /* ++ * FAN_RENAME is not allowed on non-dir (for now). ++ * We shouldn't have allowed setting any dirent events in mask of ++ * non-dir, but because we always allowed it, error only if group ++ * was initialized with the new flag FAN_REPORT_TARGET_FID. ++ */ ++ ret = -ENOTDIR; ++ if (inode && !S_ISDIR(inode->i_mode) && ++ ((mask & FAN_RENAME) || ++ ((mask & FANOTIFY_DIRENT_EVENTS) && ++ FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID)))) ++ goto path_put_and_out; ++ + /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ + if (mnt || !S_ISDIR(inode->i_mode)) { + mask &= ~FAN_EVENT_ON_CHILD; +-- +2.35.1 + diff --git a/queue-5.17/fbdev-efifb-cleanup-fb_info-in-.fb_destroy-rather-th.patch b/queue-5.17/fbdev-efifb-cleanup-fb_info-in-.fb_destroy-rather-th.patch new file mode 100644 index 00000000000..deba7cd85f1 --- /dev/null +++ b/queue-5.17/fbdev-efifb-cleanup-fb_info-in-.fb_destroy-rather-th.patch @@ -0,0 +1,75 @@ +From 7a873d64b4f936b37e5127250cdf08dbcd923a45 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 May 2022 00:05:40 +0200 +Subject: fbdev: efifb: Cleanup fb_info in .fb_destroy rather than .remove + +From: Javier Martinez Canillas + +[ Upstream commit d258d00fb9c7c0cdf9d10c1ded84f10339d2d349 ] + +The driver is calling framebuffer_release() in its .remove callback, but +this will cause the struct fb_info to be freed too early. Since it could +be that a reference is still hold to it if user-space opened the fbdev. + +This would lead to a use-after-free error if the framebuffer device was +unregistered but later a user-space process tries to close the fbdev fd. + +To prevent this, move the framebuffer_release() call to fb_ops.fb_destroy +instead of doing it in the driver's .remove callback. + +Strictly speaking, the code flow in the driver is still wrong because all +the hardware cleanupd (i.e: iounmap) should be done in .remove while the +software cleanup (i.e: releasing the framebuffer) should be done in the +.fb_destroy handler. But this at least makes to match the behavior before +commit 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal"). + +Fixes: 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal") +Suggested-by: Daniel Vetter +Signed-off-by: Javier Martinez Canillas +Reviewed-by: Thomas Zimmermann +Reviewed-by: Daniel Vetter +Link: https://patchwork.freedesktop.org/patch/msgid/20220505220540.366218-1-javierm@redhat.com +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/efifb.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c +index ea42ba6445b2..cfa3dc0b4eee 100644 +--- a/drivers/video/fbdev/efifb.c ++++ b/drivers/video/fbdev/efifb.c +@@ -243,6 +243,10 @@ static void efifb_show_boot_graphics(struct fb_info *info) + static inline void efifb_show_boot_graphics(struct fb_info *info) {} + #endif + ++/* ++ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end ++ * of unregister_framebuffer() or fb_release(). Do any cleanup here. ++ */ + static void efifb_destroy(struct fb_info *info) + { + if (efifb_pci_dev) +@@ -254,6 +258,9 @@ static void efifb_destroy(struct fb_info *info) + else + memunmap(info->screen_base); + } ++ ++ framebuffer_release(info); ++ + if (request_mem_succeeded) + release_mem_region(info->apertures->ranges[0].base, + info->apertures->ranges[0].size); +@@ -620,9 +627,9 @@ static int efifb_remove(struct platform_device *pdev) + { + struct fb_info *info = platform_get_drvdata(pdev); + ++ /* efifb_destroy takes care of info cleanup */ + unregister_framebuffer(info); + sysfs_remove_groups(&pdev->dev.kobj, efifb_groups); +- framebuffer_release(info); + + return 0; + } +-- +2.35.1 + diff --git a/queue-5.17/fbdev-efifb-fix-a-use-after-free-due-early-fb_info-c.patch b/queue-5.17/fbdev-efifb-fix-a-use-after-free-due-early-fb_info-c.patch new file mode 100644 index 00000000000..dd0d265eb78 --- /dev/null +++ b/queue-5.17/fbdev-efifb-fix-a-use-after-free-due-early-fb_info-c.patch @@ -0,0 +1,57 @@ +From ddaf9d0f7c917059f4c91e92ea88b404f2e8277d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 May 2022 15:22:25 +0200 +Subject: fbdev: efifb: Fix a use-after-free due early fb_info cleanup +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Javier Martinez Canillas + +[ Upstream commit 1b5853dfab7fdde450f00f145327342238135c8a ] + +Commit d258d00fb9c7 ("fbdev: efifb: Cleanup fb_info in .fb_destroy rather +than .remove") attempted to fix a use-after-free error due driver freeing +the fb_info in the .remove handler instead of doing it in .fb_destroy. + +But ironically that change introduced yet another use-after-free since the +fb_info was still used after the free. + +This should fix for good by freeing the fb_info at the end of the handler. + +Fixes: d258d00fb9c7 ("fbdev: efifb: Cleanup fb_info in .fb_destroy rather than .remove") +Reported-by: Ville Syrjälä +Reported-by: Andrzej Hajda +Signed-off-by: Javier Martinez Canillas +Reviewed-by: Andi Shyti +Reviewed-by: Andrzej Hajda +Reviewed-by: Thomas Zimmermann +Signed-off-by: Lucas De Marchi +Link: https://patchwork.freedesktop.org/patch/msgid/20220506132225.588379-1-javierm@redhat.com +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/efifb.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c +index cfa3dc0b4eee..b3d5f884c544 100644 +--- a/drivers/video/fbdev/efifb.c ++++ b/drivers/video/fbdev/efifb.c +@@ -259,12 +259,12 @@ static void efifb_destroy(struct fb_info *info) + memunmap(info->screen_base); + } + +- framebuffer_release(info); +- + if (request_mem_succeeded) + release_mem_region(info->apertures->ranges[0].base, + info->apertures->ranges[0].size); + fb_dealloc_cmap(&info->cmap); ++ ++ framebuffer_release(info); + } + + static const struct fb_ops efifb_ops = { +-- +2.35.1 + diff --git a/queue-5.17/fbdev-simplefb-cleanup-fb_info-in-.fb_destroy-rather.patch b/queue-5.17/fbdev-simplefb-cleanup-fb_info-in-.fb_destroy-rather.patch new file mode 100644 index 00000000000..4b470a53e58 --- /dev/null +++ b/queue-5.17/fbdev-simplefb-cleanup-fb_info-in-.fb_destroy-rather.patch @@ -0,0 +1,72 @@ +From 570bd3dcbe207a0ef53ee85c98256006f3ba38bb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 May 2022 00:04:56 +0200 +Subject: fbdev: simplefb: Cleanup fb_info in .fb_destroy rather than .remove + +From: Javier Martinez Canillas + +[ Upstream commit 666b90b3ce9e4aac1e1deba266c3a230fb3913b0 ] + +The driver is calling framebuffer_release() in its .remove callback, but +this will cause the struct fb_info to be freed too early. Since it could +be that a reference is still hold to it if user-space opened the fbdev. + +This would lead to a use-after-free error if the framebuffer device was +unregistered but later a user-space process tries to close the fbdev fd. + +To prevent this, move the framebuffer_release() call to fb_ops.fb_destroy +instead of doing it in the driver's .remove callback. + +Strictly speaking, the code flow in the driver is still wrong because all +the hardware cleanupd (i.e: iounmap) should be done in .remove while the +software cleanup (i.e: releasing the framebuffer) should be done in the +.fb_destroy handler. But this at least makes to match the behavior before +commit 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal"). + +Fixes: 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal") +Suggested-by: Daniel Vetter +Signed-off-by: Javier Martinez Canillas +Reviewed-by: Thomas Zimmermann +Reviewed-by: Daniel Vetter +Link: https://patchwork.freedesktop.org/patch/msgid/20220505220456.366090-1-javierm@redhat.com +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/simplefb.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/drivers/video/fbdev/simplefb.c b/drivers/video/fbdev/simplefb.c +index 57541887188b..efce6ef8532d 100644 +--- a/drivers/video/fbdev/simplefb.c ++++ b/drivers/video/fbdev/simplefb.c +@@ -70,12 +70,18 @@ struct simplefb_par; + static void simplefb_clocks_destroy(struct simplefb_par *par); + static void simplefb_regulators_destroy(struct simplefb_par *par); + ++/* ++ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end ++ * of unregister_framebuffer() or fb_release(). Do any cleanup here. ++ */ + static void simplefb_destroy(struct fb_info *info) + { + simplefb_regulators_destroy(info->par); + simplefb_clocks_destroy(info->par); + if (info->screen_base) + iounmap(info->screen_base); ++ ++ framebuffer_release(info); + } + + static const struct fb_ops simplefb_ops = { +@@ -520,8 +526,8 @@ static int simplefb_remove(struct platform_device *pdev) + { + struct fb_info *info = platform_get_drvdata(pdev); + ++ /* simplefb_destroy takes care of info cleanup */ + unregister_framebuffer(info); +- framebuffer_release(info); + + return 0; + } +-- +2.35.1 + diff --git a/queue-5.17/fbdev-vesafb-cleanup-fb_info-in-.fb_destroy-rather-t.patch b/queue-5.17/fbdev-vesafb-cleanup-fb_info-in-.fb_destroy-rather-t.patch new file mode 100644 index 00000000000..274b91ac1da --- /dev/null +++ b/queue-5.17/fbdev-vesafb-cleanup-fb_info-in-.fb_destroy-rather-t.patch @@ -0,0 +1,75 @@ +From 9a87e3740e787d8f01a125a05d8d50c566d97746 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 May 2022 00:06:31 +0200 +Subject: fbdev: vesafb: Cleanup fb_info in .fb_destroy rather than .remove + +From: Javier Martinez Canillas + +[ Upstream commit b3c9a924aab61adbc29df110006aa03afe1a78ba ] + +The driver is calling framebuffer_release() in its .remove callback, but +this will cause the struct fb_info to be freed too early. Since it could +be that a reference is still hold to it if user-space opened the fbdev. + +This would lead to a use-after-free error if the framebuffer device was +unregistered but later a user-space process tries to close the fbdev fd. + +To prevent this, move the framebuffer_release() call to fb_ops.fb_destroy +instead of doing it in the driver's .remove callback. + +Strictly speaking, the code flow in the driver is still wrong because all +the hardware cleanupd (i.e: iounmap) should be done in .remove while the +software cleanup (i.e: releasing the framebuffer) should be done in the +.fb_destroy handler. But this at least makes to match the behavior before +commit 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal"). + +Fixes: 27599aacbaef ("fbdev: Hot-unplug firmware fb devices on forced removal") +Suggested-by: Daniel Vetter +Signed-off-by: Javier Martinez Canillas +Reviewed-by: Thomas Zimmermann +Reviewed-by: Daniel Vetter +Link: https://patchwork.freedesktop.org/patch/msgid/20220505220631.366371-1-javierm@redhat.com +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/vesafb.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/drivers/video/fbdev/vesafb.c b/drivers/video/fbdev/vesafb.c +index df6de5a9dd4c..e25e8de5ff67 100644 +--- a/drivers/video/fbdev/vesafb.c ++++ b/drivers/video/fbdev/vesafb.c +@@ -179,6 +179,10 @@ static int vesafb_setcolreg(unsigned regno, unsigned red, unsigned green, + return err; + } + ++/* ++ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end ++ * of unregister_framebuffer() or fb_release(). Do any cleanup here. ++ */ + static void vesafb_destroy(struct fb_info *info) + { + struct vesafb_par *par = info->par; +@@ -188,6 +192,8 @@ static void vesafb_destroy(struct fb_info *info) + if (info->screen_base) + iounmap(info->screen_base); + release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size); ++ ++ framebuffer_release(info); + } + + static struct fb_ops vesafb_ops = { +@@ -484,10 +490,10 @@ static int vesafb_remove(struct platform_device *pdev) + { + struct fb_info *info = platform_get_drvdata(pdev); + ++ /* vesafb_destroy takes care of info cleanup */ + unregister_framebuffer(info); + if (((struct vesafb_par *)(info->par))->region) + release_region(0x3c0, 32); +- framebuffer_release(info); + + return 0; + } +-- +2.35.1 + diff --git a/queue-5.17/gfs2-fix-filesystem-block-deallocation-for-short-wri.patch b/queue-5.17/gfs2-fix-filesystem-block-deallocation-for-short-wri.patch new file mode 100644 index 00000000000..294205ee00a --- /dev/null +++ b/queue-5.17/gfs2-fix-filesystem-block-deallocation-for-short-wri.patch @@ -0,0 +1,53 @@ +From 7528166bf4f4f6153af58ee157d58a3908ee84d3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 14 Apr 2022 17:52:39 +0200 +Subject: gfs2: Fix filesystem block deallocation for short writes + +From: Andreas Gruenbacher + +[ Upstream commit d031a8866e709c9d1ee5537a321b6192b4d2dc5b ] + +When a write cannot be carried out in full, gfs2_iomap_end() releases +blocks that have been allocated for this write but haven't been used. + +To compute the end of the allocation, gfs2_iomap_end() incorrectly +rounded the end of the attempted write down to the next block boundary +to arrive at the end of the allocation. It would have to round up, but +the end of the allocation is also available as iomap->offset + +iomap->length, so just use that instead. + +In addition, use round_up() for computing the start of the unused range. + +Fixes: 64bc06bb32ee ("gfs2: iomap buffered write support") +Signed-off-by: Andreas Gruenbacher +Signed-off-by: Sasha Levin +--- + fs/gfs2/bmap.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c +index fbdb7a30470a..f785af2aa23c 100644 +--- a/fs/gfs2/bmap.c ++++ b/fs/gfs2/bmap.c +@@ -1154,13 +1154,12 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, + + if (length != written && (iomap->flags & IOMAP_F_NEW)) { + /* Deallocate blocks that were just allocated. */ +- loff_t blockmask = i_blocksize(inode) - 1; +- loff_t end = (pos + length) & ~blockmask; ++ loff_t hstart = round_up(pos + written, i_blocksize(inode)); ++ loff_t hend = iomap->offset + iomap->length; + +- pos = (pos + written + blockmask) & ~blockmask; +- if (pos < end) { +- truncate_pagecache_range(inode, pos, end - 1); +- punch_hole(ip, pos, end - pos); ++ if (hstart < hend) { ++ truncate_pagecache_range(inode, hstart, hend - 1); ++ punch_hole(ip, hstart, hend - hstart); + } + } + +-- +2.35.1 + diff --git a/queue-5.17/hwmon-asus_wmi_sensors-fix-crosshair-vi-hero-name.patch b/queue-5.17/hwmon-asus_wmi_sensors-fix-crosshair-vi-hero-name.patch new file mode 100644 index 00000000000..fe36841e086 --- /dev/null +++ b/queue-5.17/hwmon-asus_wmi_sensors-fix-crosshair-vi-hero-name.patch @@ -0,0 +1,36 @@ +From 9fa37f0b3103c1d9177071faaa4e076c748887af Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 3 Apr 2022 22:34:54 +0300 +Subject: hwmon: (asus_wmi_sensors) Fix CROSSHAIR VI HERO name + +From: Denis Pauk + +[ Upstream commit 4fd45cc8568e6086272d3036f2c29d61e9b776a1 ] + +CROSSHAIR VI HERO motherboard is incorrectly named as +ROG CROSSHAIR VI HERO. + +Signed-off-by: Denis Pauk +Link: https://lore.kernel.org/r/20220403193455.1363-1-pauk.denis@gmail.com +Signed-off-by: Guenter Roeck +Signed-off-by: Sasha Levin +--- + drivers/hwmon/asus_wmi_sensors.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/hwmon/asus_wmi_sensors.c b/drivers/hwmon/asus_wmi_sensors.c +index c80eee874b6c..49784a6ea23a 100644 +--- a/drivers/hwmon/asus_wmi_sensors.c ++++ b/drivers/hwmon/asus_wmi_sensors.c +@@ -71,7 +71,7 @@ static const struct dmi_system_id asus_wmi_dmi_table[] = { + DMI_EXACT_MATCH_ASUS_BOARD_NAME("PRIME X399-A"), + DMI_EXACT_MATCH_ASUS_BOARD_NAME("PRIME X470-PRO"), + DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI EXTREME"), +- DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI HERO"), ++ DMI_EXACT_MATCH_ASUS_BOARD_NAME("CROSSHAIR VI HERO"), + DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VI HERO (WI-FI AC)"), + DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO"), + DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VII HERO (WI-FI)"), +-- +2.35.1 + diff --git a/queue-5.17/hwmon-f71882fg-fix-negative-temperature.patch b/queue-5.17/hwmon-f71882fg-fix-negative-temperature.patch new file mode 100644 index 00000000000..8d0d4318490 --- /dev/null +++ b/queue-5.17/hwmon-f71882fg-fix-negative-temperature.patch @@ -0,0 +1,46 @@ +From 41d6fc0600070d54e1fab20e978aebcc6b1f174d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 18 Apr 2022 17:07:06 +0800 +Subject: hwmon: (f71882fg) Fix negative temperature + +From: Ji-Ze Hong (Peter Hong) + +[ Upstream commit 4aaaaf0f279836f06d3b9d0ffeec7a1e1a04ceef ] + +All temperature of Fintek superio hwmonitor that using 1-byte reg will use +2's complement. + +In show_temp() + temp = data->temp[nr] * 1000; + +When data->temp[nr] read as 255, it indicate -1C, but this code will report +255C to userspace. It'll be ok when change to: + temp = ((s8)data->temp[nr]) * 1000; + +Signed-off-by: Ji-Ze Hong (Peter Hong) +Link: https://lore.kernel.org/r/20220418090706.6339-1-hpeter+linux_kernel@gmail.com +Signed-off-by: Guenter Roeck +Signed-off-by: Sasha Levin +--- + drivers/hwmon/f71882fg.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c +index 938a8b9ec70d..6830e029995d 100644 +--- a/drivers/hwmon/f71882fg.c ++++ b/drivers/hwmon/f71882fg.c +@@ -1578,8 +1578,9 @@ static ssize_t show_temp(struct device *dev, struct device_attribute *devattr, + temp *= 125; + if (sign) + temp -= 128000; +- } else +- temp = data->temp[nr] * 1000; ++ } else { ++ temp = ((s8)data->temp[nr]) * 1000; ++ } + + return sprintf(buf, "%d\n", temp); + } +-- +2.35.1 + diff --git a/queue-5.17/hwmon-ltq-cputemp-restrict-it-to-soc_xway.patch b/queue-5.17/hwmon-ltq-cputemp-restrict-it-to-soc_xway.patch new file mode 100644 index 00000000000..10e711474e6 --- /dev/null +++ b/queue-5.17/hwmon-ltq-cputemp-restrict-it-to-soc_xway.patch @@ -0,0 +1,56 @@ +From 64a2bb9c32a164846aa865e31ec5139fc51232ed Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 May 2022 16:47:40 -0700 +Subject: hwmon: (ltq-cputemp) restrict it to SOC_XWAY + +From: Randy Dunlap + +[ Upstream commit 151d6dcbed836270c6c240932da66f147950cbdb ] + +Building with SENSORS_LTQ_CPUTEMP=y with SOC_FALCON=y causes build +errors since FALCON does not support the same features as XWAY. + +Change this symbol to depend on SOC_XWAY since that provides the +necessary interfaces. + +Repairs these build errors: + +../drivers/hwmon/ltq-cputemp.c: In function 'ltq_cputemp_enable': +../drivers/hwmon/ltq-cputemp.c:23:9: error: implicit declaration of function 'ltq_cgu_w32'; did you mean 'ltq_ebu_w32'? [-Werror=implicit-function-declaration] + 23 | ltq_cgu_w32(ltq_cgu_r32(CGU_GPHY1_CR) | CGU_TEMP_PD, CGU_GPHY1_CR); +../drivers/hwmon/ltq-cputemp.c:23:21: error: implicit declaration of function 'ltq_cgu_r32'; did you mean 'ltq_ebu_r32'? [-Werror=implicit-function-declaration] + 23 | ltq_cgu_w32(ltq_cgu_r32(CGU_GPHY1_CR) | CGU_TEMP_PD, CGU_GPHY1_CR); +../drivers/hwmon/ltq-cputemp.c: In function 'ltq_cputemp_probe': +../drivers/hwmon/ltq-cputemp.c:92:31: error: 'SOC_TYPE_VR9_2' undeclared (first use in this function) + 92 | if (ltq_soc_type() != SOC_TYPE_VR9_2) + +Fixes: 7074d0a92758 ("hwmon: (ltq-cputemp) add cpu temp sensor driver") +Signed-off-by: Randy Dunlap +Reported-by: kernel test robot +Cc: Florian Eckert +Cc: Guenter Roeck +Cc: Jean Delvare +Cc: linux-hwmon@vger.kernel.org +Link: https://lore.kernel.org/r/20220509234740.26841-1-rdunlap@infradead.org +Signed-off-by: Guenter Roeck +Signed-off-by: Sasha Levin +--- + drivers/hwmon/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig +index 8df25f1079ba..d958d87b7edc 100644 +--- a/drivers/hwmon/Kconfig ++++ b/drivers/hwmon/Kconfig +@@ -944,7 +944,7 @@ config SENSORS_LTC4261 + + config SENSORS_LTQ_CPUTEMP + bool "Lantiq cpu temperature sensor driver" +- depends on LANTIQ ++ depends on SOC_XWAY + help + If you say yes here you get support for the temperature + sensor inside your CPU. +-- +2.35.1 + diff --git a/queue-5.17/hwmon-tmp401-add-of-device-id-table.patch b/queue-5.17/hwmon-tmp401-add-of-device-id-table.patch new file mode 100644 index 00000000000..f78c255be6c --- /dev/null +++ b/queue-5.17/hwmon-tmp401-add-of-device-id-table.patch @@ -0,0 +1,70 @@ +From d493cd6732d247f85c91ff8bb2bb6082b9b470a7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 May 2022 13:43:33 +0200 +Subject: hwmon: (tmp401) Add OF device ID table + +From: Camel Guo + +[ Upstream commit 3481551f035725fdc46885425eac3ef9b58ae7b7 ] + +This driver doesn't have of_match_table. This makes the kernel module +tmp401.ko lack alias patterns (e.g: of:N*T*Cti,tmp411) to match DT node +of the supported devices hence this kernel module will not be +automatically loaded. + +After adding of_match_table to this driver, the folllowing alias will be +added into tmp401.ko. +$ modinfo drivers/hwmon/tmp401.ko +filename: drivers/hwmon/tmp401.ko +...... +author: Hans de Goede +alias: of:N*T*Cti,tmp435C* +alias: of:N*T*Cti,tmp435 +alias: of:N*T*Cti,tmp432C* +alias: of:N*T*Cti,tmp432 +alias: of:N*T*Cti,tmp431C* +alias: of:N*T*Cti,tmp431 +alias: of:N*T*Cti,tmp411C* +alias: of:N*T*Cti,tmp411 +alias: of:N*T*Cti,tmp401C* +alias: of:N*T*Cti,tmp401 +...... + +Fixes: af503716ac14 ("i2c: core: report OF style module alias for devices registered via OF") +Signed-off-by: Camel Guo +Link: https://lore.kernel.org/r/20220503114333.456476-1-camel.guo@axis.com +Signed-off-by: Guenter Roeck +Signed-off-by: Sasha Levin +--- + drivers/hwmon/tmp401.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/drivers/hwmon/tmp401.c b/drivers/hwmon/tmp401.c +index b86d9df7105d..52c9e7d3f2ae 100644 +--- a/drivers/hwmon/tmp401.c ++++ b/drivers/hwmon/tmp401.c +@@ -708,10 +708,21 @@ static int tmp401_probe(struct i2c_client *client) + return 0; + } + ++static const struct of_device_id __maybe_unused tmp4xx_of_match[] = { ++ { .compatible = "ti,tmp401", }, ++ { .compatible = "ti,tmp411", }, ++ { .compatible = "ti,tmp431", }, ++ { .compatible = "ti,tmp432", }, ++ { .compatible = "ti,tmp435", }, ++ { }, ++}; ++MODULE_DEVICE_TABLE(of, tmp4xx_of_match); ++ + static struct i2c_driver tmp401_driver = { + .class = I2C_CLASS_HWMON, + .driver = { + .name = "tmp401", ++ .of_match_table = of_match_ptr(tmp4xx_of_match), + }, + .probe_new = tmp401_probe, + .id_table = tmp401_id, +-- +2.35.1 + diff --git a/queue-5.17/ice-clear-stale-tx-queue-settings-before-configuring.patch b/queue-5.17/ice-clear-stale-tx-queue-settings-before-configuring.patch new file mode 100644 index 00000000000..ba2900bfdd2 --- /dev/null +++ b/queue-5.17/ice-clear-stale-tx-queue-settings-before-configuring.patch @@ -0,0 +1,160 @@ +From 481f471fb7e2a80e78fd10e66ba9c6fafe8fcf48 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 28 Apr 2022 12:01:00 +0000 +Subject: ice: clear stale Tx queue settings before configuring + +From: Anatolii Gerasymenko + +[ Upstream commit 6096dae926a22e2892ef9169f582589c16d39639 ] + +The iAVF driver uses 3 virtchnl op codes to communicate with the PF +regarding the VF Tx queues: + +* VIRTCHNL_OP_CONFIG_VSI_QUEUES configures the hardware and firmware +logic for the Tx queues + +* VIRTCHNL_OP_ENABLE_QUEUES configures the queue interrupts + +* VIRTCHNL_OP_DISABLE_QUEUES disables the queue interrupts and Tx rings. + +There is a bug in the iAVF driver due to the race condition between VF +reset request and shutdown being executed in parallel. This leads to a +break in logic and VIRTCHNL_OP_DISABLE_QUEUES is not being sent. + +If this occurs, the PF driver never cleans up the Tx queues. This results +in leaving behind stale Tx queue settings in the hardware and firmware. + +The most obvious outcome is that upon the next +VIRTCHNL_OP_CONFIG_VSI_QUEUES, the PF will fail to program the Tx +scheduler node due to a lack of space. + +We need to protect ICE driver against such situation. + +To fix this, make sure we clear existing stale settings out when +handling VIRTCHNL_OP_CONFIG_VSI_QUEUES. This ensures we remove the +previous settings. + +Calling ice_vf_vsi_dis_single_txq should be safe as it will do nothing if +the queue is not configured. The function already handles the case when the +Tx queue is not currently configured and exits with a 0 return in that +case. + +Fixes: 7ad15440acf8 ("ice: Refactor VIRTCHNL_OP_CONFIG_VSI_QUEUES handling") +Signed-off-by: Jacob Keller +Signed-off-by: Anatolii Gerasymenko +Tested-by: Konrad Jankowski +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + .../net/ethernet/intel/ice/ice_virtchnl_pf.c | 68 ++++++++++++++----- + 1 file changed, 50 insertions(+), 18 deletions(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +index 2bee8f10ad89..0cc8b7e06b72 100644 +--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c ++++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +@@ -3300,13 +3300,52 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) + NULL, 0); + } + ++/** ++ * ice_vf_vsi_dis_single_txq - disable a single Tx queue ++ * @vf: VF to disable queue for ++ * @vsi: VSI for the VF ++ * @q_id: VF relative (0-based) queue ID ++ * ++ * Attempt to disable the Tx queue passed in. If the Tx queue was successfully ++ * disabled then clear q_id bit in the enabled queues bitmap and return ++ * success. Otherwise return error. ++ */ ++static int ++ice_vf_vsi_dis_single_txq(struct ice_vf *vf, struct ice_vsi *vsi, u16 q_id) ++{ ++ struct ice_txq_meta txq_meta = { 0 }; ++ struct ice_tx_ring *ring; ++ int err; ++ ++ if (!test_bit(q_id, vf->txq_ena)) ++ dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n", ++ q_id, vsi->vsi_num); ++ ++ ring = vsi->tx_rings[q_id]; ++ if (!ring) ++ return -EINVAL; ++ ++ ice_fill_txq_meta(vsi, ring, &txq_meta); ++ ++ err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, ring, &txq_meta); ++ if (err) { ++ dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n", ++ q_id, vsi->vsi_num); ++ return err; ++ } ++ ++ /* Clear enabled queues flag */ ++ clear_bit(q_id, vf->txq_ena); ++ ++ return 0; ++} ++ + /** + * ice_vc_dis_qs_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * +- * called from the VF to disable all or specific +- * queue(s) ++ * called from the VF to disable all or specific queue(s) + */ + static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) + { +@@ -3343,30 +3382,15 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) + q_map = vqs->tx_queues; + + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { +- struct ice_tx_ring *ring = vsi->tx_rings[vf_q_id]; +- struct ice_txq_meta txq_meta = { 0 }; +- + if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + +- if (!test_bit(vf_q_id, vf->txq_ena)) +- dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n", +- vf_q_id, vsi->vsi_num); +- +- ice_fill_txq_meta(vsi, ring, &txq_meta); +- +- if (ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, +- ring, &txq_meta)) { +- dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n", +- vf_q_id, vsi->vsi_num); ++ if (ice_vf_vsi_dis_single_txq(vf, vsi, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } +- +- /* Clear enabled queues flag */ +- clear_bit(vf_q_id, vf->txq_ena); + } + } + +@@ -3615,6 +3639,14 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) + if (qpi->txq.ring_len > 0) { + vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr; + vsi->tx_rings[i]->count = qpi->txq.ring_len; ++ ++ /* Disable any existing queue first */ ++ if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) { ++ v_ret = VIRTCHNL_STATUS_ERR_PARAM; ++ goto error_param; ++ } ++ ++ /* Configure a queue with the requested settings */ + if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; +-- +2.35.1 + diff --git a/queue-5.17/ice-fix-ptp-stale-tx-timestamps-cleanup.patch b/queue-5.17/ice-fix-ptp-stale-tx-timestamps-cleanup.patch new file mode 100644 index 00000000000..ea6e0b2d925 --- /dev/null +++ b/queue-5.17/ice-fix-ptp-stale-tx-timestamps-cleanup.patch @@ -0,0 +1,78 @@ +From e5ff0d5291c592a0d77b8a72b86329ff13105224 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Apr 2022 14:23:02 +0200 +Subject: ice: fix PTP stale Tx timestamps cleanup + +From: Michal Michalik + +[ Upstream commit a11b6c1a383ff092f432e040c20e032503785d47 ] + +Read stale PTP Tx timestamps from PHY on cleanup. + +After running out of Tx timestamps request handlers, hardware (HW) stops +reporting finished requests. Function ice_ptp_tx_tstamp_cleanup() used +to only clean up stale handlers in driver and was leaving the hardware +registers not read. Not reading stale PTP Tx timestamps prevents next +interrupts from arriving and makes timestamping unusable. + +Fixes: ea9b847cda64 ("ice: enable transmit timestamps for E810 devices") +Signed-off-by: Michal Michalik +Reviewed-by: Jacob Keller +Reviewed-by: Paul Menzel +Tested-by: Gurucharan (A Contingent worker at Intel) +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice_ptp.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c +index 000c39d163a2..45ae97b8b97d 100644 +--- a/drivers/net/ethernet/intel/ice/ice_ptp.c ++++ b/drivers/net/ethernet/intel/ice/ice_ptp.c +@@ -2279,6 +2279,7 @@ ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx) + + /** + * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped ++ * @hw: pointer to the hw struct + * @tx: PTP Tx tracker to clean up + * + * Loop through the Tx timestamp requests and see if any of them have been +@@ -2287,7 +2288,7 @@ ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx) + * timestamp will never be captured. This might happen if the packet gets + * discarded before it reaches the PHY timestamping block. + */ +-static void ice_ptp_tx_tstamp_cleanup(struct ice_ptp_tx *tx) ++static void ice_ptp_tx_tstamp_cleanup(struct ice_hw *hw, struct ice_ptp_tx *tx) + { + u8 idx; + +@@ -2296,11 +2297,16 @@ static void ice_ptp_tx_tstamp_cleanup(struct ice_ptp_tx *tx) + + for_each_set_bit(idx, tx->in_use, tx->len) { + struct sk_buff *skb; ++ u64 raw_tstamp; + + /* Check if this SKB has been waiting for too long */ + if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ)) + continue; + ++ /* Read tstamp to be able to use this register again */ ++ ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset, ++ &raw_tstamp); ++ + spin_lock(&tx->lock); + skb = tx->tstamps[idx].skb; + tx->tstamps[idx].skb = NULL; +@@ -2322,7 +2328,7 @@ static void ice_ptp_periodic_work(struct kthread_work *work) + + ice_ptp_update_cached_phctime(pf); + +- ice_ptp_tx_tstamp_cleanup(&pf->ptp.port.tx); ++ ice_ptp_tx_tstamp_cleanup(&pf->hw, &pf->ptp.port.tx); + + /* Run twice a second */ + kthread_queue_delayed_work(ptp->kworker, &ptp->work, +-- +2.35.1 + diff --git a/queue-5.17/ice-fix-race-during-aux-device-un-plugging.patch b/queue-5.17/ice-fix-race-during-aux-device-un-plugging.patch new file mode 100644 index 00000000000..66106d5e383 --- /dev/null +++ b/queue-5.17/ice-fix-race-during-aux-device-un-plugging.patch @@ -0,0 +1,234 @@ +From 38f739f36434544e08b69c204572bcd6b5390c5e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 23 Apr 2022 12:20:21 +0200 +Subject: ice: Fix race during aux device (un)plugging + +From: Ivan Vecera + +[ Upstream commit 486b9eee57ddca5c9a2d59fc41153f36002e0a00 ] + +Function ice_plug_aux_dev() assigns pf->adev field too early prior +aux device initialization and on other side ice_unplug_aux_dev() +starts aux device deinit and at the end assigns NULL to pf->adev. +This is wrong because pf->adev should always be non-NULL only when +aux device is fully initialized and ready. This wrong order causes +a crash when ice_send_event_to_aux() call occurs because that function +depends on non-NULL value of pf->adev and does not assume that +aux device is half-initialized or half-destroyed. +After order correction the race window is tiny but it is still there, +as Leon mentioned and manipulation with pf->adev needs to be protected +by mutex. + +Fix (un-)plugging functions so pf->adev field is set after aux device +init and prior aux device destroy and protect pf->adev assignment by +new mutex. This mutex is also held during ice_send_event_to_aux() +call to ensure that aux device is valid during that call. +Note that device lock used ice_send_event_to_aux() needs to be kept +to avoid race with aux drv unload. + +Reproducer: +cycle=1 +while :;do + echo "#### Cycle: $cycle" + + ip link set ens7f0 mtu 9000 + ip link add bond0 type bond mode 1 miimon 100 + ip link set bond0 up + ifenslave bond0 ens7f0 + ip link set bond0 mtu 9000 + ethtool -L ens7f0 combined 1 + ip link del bond0 + ip link set ens7f0 mtu 1500 + sleep 1 + + let cycle++ +done + +In short when the device is added/removed to/from bond the aux device +is unplugged/plugged. When MTU of the device is changed an event is +sent to aux device asynchronously. This can race with (un)plugging +operation and because pf->adev is set too early (plug) or too late +(unplug) the function ice_send_event_to_aux() can touch uninitialized +or destroyed fields. In the case of crash below pf->adev->dev.mutex. + +Crash: +[ 53.372066] bond0: (slave ens7f0): making interface the new active one +[ 53.378622] bond0: (slave ens7f0): Enslaving as an active interface with an u +p link +[ 53.386294] IPv6: ADDRCONF(NETDEV_CHANGE): bond0: link becomes ready +[ 53.549104] bond0: (slave ens7f1): Enslaving as a backup interface with an up + link +[ 54.118906] ice 0000:ca:00.0 ens7f0: Number of in use tx queues changed inval +idating tc mappings. Priority traffic classification disabled! +[ 54.233374] ice 0000:ca:00.1 ens7f1: Number of in use tx queues changed inval +idating tc mappings. Priority traffic classification disabled! +[ 54.248204] bond0: (slave ens7f0): Releasing backup interface +[ 54.253955] bond0: (slave ens7f1): making interface the new active one +[ 54.274875] bond0: (slave ens7f1): Releasing backup interface +[ 54.289153] bond0 (unregistering): Released all slaves +[ 55.383179] MII link monitoring set to 100 ms +[ 55.398696] bond0: (slave ens7f0): making interface the new active one +[ 55.405241] BUG: kernel NULL pointer dereference, address: 0000000000000080 +[ 55.405289] bond0: (slave ens7f0): Enslaving as an active interface with an u +p link +[ 55.412198] #PF: supervisor write access in kernel mode +[ 55.412200] #PF: error_code(0x0002) - not-present page +[ 55.412201] PGD 25d2ad067 P4D 0 +[ 55.412204] Oops: 0002 [#1] PREEMPT SMP NOPTI +[ 55.412207] CPU: 0 PID: 403 Comm: kworker/0:2 Kdump: loaded Tainted: G S + 5.17.0-13579-g57f2d6540f03 #1 +[ 55.429094] bond0: (slave ens7f1): Enslaving as a backup interface with an up + link +[ 55.430224] Hardware name: Dell Inc. PowerEdge R750/06V45N, BIOS 1.4.4 10/07/ +2021 +[ 55.430226] Workqueue: ice ice_service_task [ice] +[ 55.468169] RIP: 0010:mutex_unlock+0x10/0x20 +[ 55.472439] Code: 0f b1 13 74 96 eb e0 4c 89 ee eb d8 e8 79 54 ff ff 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 65 48 8b 04 25 40 ef 01 00 31 d2 48 0f b1 17 75 01 c3 e9 e3 fe ff ff 0f 1f 00 0f 1f 44 00 00 48 +[ 55.491186] RSP: 0018:ff4454230d7d7e28 EFLAGS: 00010246 +[ 55.496413] RAX: ff1a79b208b08000 RBX: ff1a79b2182e8880 RCX: 0000000000000001 +[ 55.503545] RDX: 0000000000000000 RSI: ff4454230d7d7db0 RDI: 0000000000000080 +[ 55.510678] RBP: ff1a79d1c7e48b68 R08: ff4454230d7d7db0 R09: 0000000000000041 +[ 55.517812] R10: 00000000000000a5 R11: 00000000000006e6 R12: ff1a79d1c7e48bc0 +[ 55.524945] R13: 0000000000000000 R14: ff1a79d0ffc305c0 R15: 0000000000000000 +[ 55.532076] FS: 0000000000000000(0000) GS:ff1a79d0ffc00000(0000) knlGS:0000000000000000 +[ 55.540163] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 55.545908] CR2: 0000000000000080 CR3: 00000003487ae003 CR4: 0000000000771ef0 +[ 55.553041] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 55.560173] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 55.567305] PKRU: 55555554 +[ 55.570018] Call Trace: +[ 55.572474] +[ 55.574579] ice_service_task+0xaab/0xef0 [ice] +[ 55.579130] process_one_work+0x1c5/0x390 +[ 55.583141] ? process_one_work+0x390/0x390 +[ 55.587326] worker_thread+0x30/0x360 +[ 55.590994] ? process_one_work+0x390/0x390 +[ 55.595180] kthread+0xe6/0x110 +[ 55.598325] ? kthread_complete_and_exit+0x20/0x20 +[ 55.603116] ret_from_fork+0x1f/0x30 +[ 55.606698] + +Fixes: f9f5301e7e2d ("ice: Register auxiliary device to provide RDMA") +Reviewed-by: Leon Romanovsky +Signed-off-by: Ivan Vecera +Reviewed-by: Dave Ertman +Tested-by: Gurucharan (A Contingent worker at Intel) +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ice/ice.h | 1 + + drivers/net/ethernet/intel/ice/ice_idc.c | 25 +++++++++++++++-------- + drivers/net/ethernet/intel/ice/ice_main.c | 2 ++ + 3 files changed, 20 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h +index 9c04a71a9fca..e2ffdf2726fd 100644 +--- a/drivers/net/ethernet/intel/ice/ice.h ++++ b/drivers/net/ethernet/intel/ice/ice.h +@@ -546,6 +546,7 @@ struct ice_pf { + struct mutex avail_q_mutex; /* protects access to avail_[rx|tx]qs */ + struct mutex sw_mutex; /* lock for protecting VSI alloc flow */ + struct mutex tc_mutex; /* lock to protect TC changes */ ++ struct mutex adev_mutex; /* lock to protect aux device access */ + u32 msg_enable; + struct ice_ptp ptp; + u16 num_rdma_msix; /* Total MSIX vectors for RDMA driver */ +diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c +index 5559230eff8b..0deed090645f 100644 +--- a/drivers/net/ethernet/intel/ice/ice_idc.c ++++ b/drivers/net/ethernet/intel/ice/ice_idc.c +@@ -37,14 +37,17 @@ void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event) + if (WARN_ON_ONCE(!in_task())) + return; + ++ mutex_lock(&pf->adev_mutex); + if (!pf->adev) +- return; ++ goto finish; + + device_lock(&pf->adev->dev); + iadrv = ice_get_auxiliary_drv(pf); + if (iadrv && iadrv->event_handler) + iadrv->event_handler(pf, event); + device_unlock(&pf->adev->dev); ++finish: ++ mutex_unlock(&pf->adev_mutex); + } + + /** +@@ -285,7 +288,6 @@ int ice_plug_aux_dev(struct ice_pf *pf) + return -ENOMEM; + + adev = &iadev->adev; +- pf->adev = adev; + iadev->pf = pf; + + adev->id = pf->aux_idx; +@@ -295,18 +297,20 @@ int ice_plug_aux_dev(struct ice_pf *pf) + + ret = auxiliary_device_init(adev); + if (ret) { +- pf->adev = NULL; + kfree(iadev); + return ret; + } + + ret = auxiliary_device_add(adev); + if (ret) { +- pf->adev = NULL; + auxiliary_device_uninit(adev); + return ret; + } + ++ mutex_lock(&pf->adev_mutex); ++ pf->adev = adev; ++ mutex_unlock(&pf->adev_mutex); ++ + return 0; + } + +@@ -315,12 +319,17 @@ int ice_plug_aux_dev(struct ice_pf *pf) + */ + void ice_unplug_aux_dev(struct ice_pf *pf) + { +- if (!pf->adev) +- return; ++ struct auxiliary_device *adev; + +- auxiliary_device_delete(pf->adev); +- auxiliary_device_uninit(pf->adev); ++ mutex_lock(&pf->adev_mutex); ++ adev = pf->adev; + pf->adev = NULL; ++ mutex_unlock(&pf->adev_mutex); ++ ++ if (adev) { ++ auxiliary_device_delete(adev); ++ auxiliary_device_uninit(adev); ++ } + } + + /** +diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c +index e347030ee2e3..7f6715eb862f 100644 +--- a/drivers/net/ethernet/intel/ice/ice_main.c ++++ b/drivers/net/ethernet/intel/ice/ice_main.c +@@ -3682,6 +3682,7 @@ u16 ice_get_avail_rxq_count(struct ice_pf *pf) + static void ice_deinit_pf(struct ice_pf *pf) + { + ice_service_task_stop(pf); ++ mutex_destroy(&pf->adev_mutex); + mutex_destroy(&pf->sw_mutex); + mutex_destroy(&pf->tc_mutex); + mutex_destroy(&pf->avail_q_mutex); +@@ -3762,6 +3763,7 @@ static int ice_init_pf(struct ice_pf *pf) + + mutex_init(&pf->sw_mutex); + mutex_init(&pf->tc_mutex); ++ mutex_init(&pf->adev_mutex); + + INIT_HLIST_HEAD(&pf->aq_wait_list); + spin_lock_init(&pf->aq_wait_lock); +-- +2.35.1 + diff --git a/queue-5.17/io_uring-assign-non-fixed-early-for-async-work.patch b/queue-5.17/io_uring-assign-non-fixed-early-for-async-work.patch new file mode 100644 index 00000000000..4d42fc4b1f0 --- /dev/null +++ b/queue-5.17/io_uring-assign-non-fixed-early-for-async-work.patch @@ -0,0 +1,44 @@ +From 82d42b574dbf327a51510f41083043efc6bac99e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 1 May 2022 21:19:50 -0600 +Subject: io_uring: assign non-fixed early for async work + +From: Jens Axboe + +[ Upstream commit a196c78b5443fc61af2c0490213b9d125482cbd1 ] + +We defer file assignment to ensure that fixed files work with links +between a direct accept/open and the links that follow it. But this has +the side effect that normal file assignment is then not complete by the +time that request submission has been done. + +For deferred execution, if the file is a regular file, assign it when +we do the async prep anyway. + +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + fs/io_uring.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/fs/io_uring.c b/fs/io_uring.c +index 87df37912055..a0680046ff3c 100644 +--- a/fs/io_uring.c ++++ b/fs/io_uring.c +@@ -6572,7 +6572,12 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) + + static int io_req_prep_async(struct io_kiocb *req) + { +- if (!io_op_defs[req->opcode].needs_async_setup) ++ const struct io_op_def *def = &io_op_defs[req->opcode]; ++ ++ /* assign early for deferred execution for non-fixed file */ ++ if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE)) ++ req->file = io_file_get_normal(req, req->fd); ++ if (!def->needs_async_setup) + return 0; + if (WARN_ON_ONCE(req_has_async_data(req))) + return -EFAULT; +-- +2.35.1 + diff --git a/queue-5.17/iommu-arm-smmu-disable-large-page-mappings-for-nvidi.patch b/queue-5.17/iommu-arm-smmu-disable-large-page-mappings-for-nvidi.patch new file mode 100644 index 00000000000..e7cdb995621 --- /dev/null +++ b/queue-5.17/iommu-arm-smmu-disable-large-page-mappings-for-nvidi.patch @@ -0,0 +1,87 @@ +From cce9490a846b254453f07f2b6c7d43707dcba684 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 21 Apr 2022 13:45:04 +0530 +Subject: iommu: arm-smmu: disable large page mappings for Nvidia arm-smmu + +From: Ashish Mhetre + +[ Upstream commit 4a25f2ea0e030b2fc852c4059a50181bfc5b2f57 ] + +Tegra194 and Tegra234 SoCs have the erratum that causes walk cache +entries to not be invalidated correctly. The problem is that the walk +cache index generated for IOVA is not same across translation and +invalidation requests. This is leading to page faults when PMD entry is +released during unmap and populated with new PTE table during subsequent +map request. Disabling large page mappings avoids the release of PMD +entry and avoid translations seeing stale PMD entry in walk cache. +Fix this by limiting the page mappings to PAGE_SIZE for Tegra194 and +Tegra234 devices. This is recommended fix from Tegra hardware design +team. + +Acked-by: Robin Murphy +Reviewed-by: Krishna Reddy +Co-developed-by: Pritesh Raithatha +Signed-off-by: Pritesh Raithatha +Signed-off-by: Ashish Mhetre +Link: https://lore.kernel.org/r/20220421081504.24678-1-amhetre@nvidia.com +Signed-off-by: Will Deacon +Signed-off-by: Sasha Levin +--- + drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 30 ++++++++++++++++++++ + 1 file changed, 30 insertions(+) + +diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c +index 01e9b50b10a1..87bf522b9d2e 100644 +--- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c ++++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c +@@ -258,6 +258,34 @@ static void nvidia_smmu_probe_finalize(struct arm_smmu_device *smmu, struct devi + dev_name(dev), err); + } + ++static int nvidia_smmu_init_context(struct arm_smmu_domain *smmu_domain, ++ struct io_pgtable_cfg *pgtbl_cfg, ++ struct device *dev) ++{ ++ struct arm_smmu_device *smmu = smmu_domain->smmu; ++ const struct device_node *np = smmu->dev->of_node; ++ ++ /* ++ * Tegra194 and Tegra234 SoCs have the erratum that causes walk cache ++ * entries to not be invalidated correctly. The problem is that the walk ++ * cache index generated for IOVA is not same across translation and ++ * invalidation requests. This is leading to page faults when PMD entry ++ * is released during unmap and populated with new PTE table during ++ * subsequent map request. Disabling large page mappings avoids the ++ * release of PMD entry and avoid translations seeing stale PMD entry in ++ * walk cache. ++ * Fix this by limiting the page mappings to PAGE_SIZE on Tegra194 and ++ * Tegra234. ++ */ ++ if (of_device_is_compatible(np, "nvidia,tegra234-smmu") || ++ of_device_is_compatible(np, "nvidia,tegra194-smmu")) { ++ smmu->pgsize_bitmap = PAGE_SIZE; ++ pgtbl_cfg->pgsize_bitmap = smmu->pgsize_bitmap; ++ } ++ ++ return 0; ++} ++ + static const struct arm_smmu_impl nvidia_smmu_impl = { + .read_reg = nvidia_smmu_read_reg, + .write_reg = nvidia_smmu_write_reg, +@@ -268,10 +296,12 @@ static const struct arm_smmu_impl nvidia_smmu_impl = { + .global_fault = nvidia_smmu_global_fault, + .context_fault = nvidia_smmu_context_fault, + .probe_finalize = nvidia_smmu_probe_finalize, ++ .init_context = nvidia_smmu_init_context, + }; + + static const struct arm_smmu_impl nvidia_smmu_single_impl = { + .probe_finalize = nvidia_smmu_probe_finalize, ++ .init_context = nvidia_smmu_init_context, + }; + + struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu) +-- +2.35.1 + diff --git a/queue-5.17/ionic-fix-missing-pci_release_regions-on-error-in-io.patch b/queue-5.17/ionic-fix-missing-pci_release_regions-on-error-in-io.patch new file mode 100644 index 00000000000..b31197a7877 --- /dev/null +++ b/queue-5.17/ionic-fix-missing-pci_release_regions-on-error-in-io.patch @@ -0,0 +1,44 @@ +From 57047fab93cf44de9dfe95aaeb0160f7f612ab3e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 May 2022 11:40:40 +0800 +Subject: ionic: fix missing pci_release_regions() on error in ionic_probe() + +From: Yang Yingliang + +[ Upstream commit e4b1045bf9cfec6f70ac6d3783be06c3a88dcb25 ] + +If ionic_map_bars() fails, pci_release_regions() need be called. + +Fixes: fbfb8031533c ("ionic: Add hardware init and device commands") +Signed-off-by: Yang Yingliang +Link: https://lore.kernel.org/r/20220506034040.2614129-1-yangyingliang@huawei.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +index 40fa5bce2ac2..d324c292318b 100644 +--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c ++++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +@@ -255,7 +255,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + + err = ionic_map_bars(ionic); + if (err) +- goto err_out_pci_disable_device; ++ goto err_out_pci_release_regions; + + /* Configure the device */ + err = ionic_setup(ionic); +@@ -359,6 +359,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + + err_out_unmap_bars: + ionic_unmap_bars(ionic); ++err_out_pci_release_regions: + pci_release_regions(pdev); + err_out_pci_disable_device: + pci_disable_device(pdev); +-- +2.35.1 + diff --git a/queue-5.17/ipv4-drop-dst-in-multicast-routing-path.patch b/queue-5.17/ipv4-drop-dst-in-multicast-routing-path.patch new file mode 100644 index 00000000000..3c6ab47f842 --- /dev/null +++ b/queue-5.17/ipv4-drop-dst-in-multicast-routing-path.patch @@ -0,0 +1,67 @@ +From 5e669e979230cc8abeb5879a33be38a08a969e85 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 May 2022 14:00:17 +1200 +Subject: ipv4: drop dst in multicast routing path + +From: Lokesh Dhoundiyal + +[ Upstream commit 9e6c6d17d1d6a3f1515ce399f9a011629ec79aa0 ] + +kmemleak reports the following when routing multicast traffic over an +ipsec tunnel. + +Kmemleak output: +unreferenced object 0x8000000044bebb00 (size 256): + comm "softirq", pid 0, jiffies 4294985356 (age 126.810s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 80 00 00 00 05 13 74 80 ..............t. + 80 00 00 00 04 9b bf f9 00 00 00 00 00 00 00 00 ................ + backtrace: + [<00000000f83947e0>] __kmalloc+0x1e8/0x300 + [<00000000b7ed8dca>] metadata_dst_alloc+0x24/0x58 + [<0000000081d32c20>] __ipgre_rcv+0x100/0x2b8 + [<00000000824f6cf1>] gre_rcv+0x178/0x540 + [<00000000ccd4e162>] gre_rcv+0x7c/0xd8 + [<00000000c024b148>] ip_protocol_deliver_rcu+0x124/0x350 + [<000000006a483377>] ip_local_deliver_finish+0x54/0x68 + [<00000000d9271b3a>] ip_local_deliver+0x128/0x168 + [<00000000bd4968ae>] xfrm_trans_reinject+0xb8/0xf8 + [<0000000071672a19>] tasklet_action_common.isra.16+0xc4/0x1b0 + [<0000000062e9c336>] __do_softirq+0x1fc/0x3e0 + [<00000000013d7914>] irq_exit+0xc4/0xe0 + [<00000000a4d73e90>] plat_irq_dispatch+0x7c/0x108 + [<000000000751eb8e>] handle_int+0x16c/0x178 + [<000000001668023b>] _raw_spin_unlock_irqrestore+0x1c/0x28 + +The metadata dst is leaked when ip_route_input_mc() updates the dst for +the skb. Commit f38a9eb1f77b ("dst: Metadata destinations") correctly +handled dropping the dst in ip_route_input_slow() but missed the +multicast case which is handled by ip_route_input_mc(). Drop the dst in +ip_route_input_mc() avoiding the leak. + +Fixes: f38a9eb1f77b ("dst: Metadata destinations") +Signed-off-by: Lokesh Dhoundiyal +Signed-off-by: Chris Packham +Reviewed-by: David Ahern +Link: https://lore.kernel.org/r/20220505020017.3111846-1-chris.packham@alliedtelesis.co.nz +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/route.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/ipv4/route.c b/net/ipv4/route.c +index d5d058de3664..eef07b62b2d8 100644 +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1748,6 +1748,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, + #endif + RT_CACHE_STAT_INC(in_slow_mc); + ++ skb_dst_drop(skb); + skb_dst_set(skb, &rth->dst); + return 0; + } +-- +2.35.1 + diff --git a/queue-5.17/iwlwifi-iwl-dbg-use-del_timer_sync-before-freeing.patch b/queue-5.17/iwlwifi-iwl-dbg-use-del_timer_sync-before-freeing.patch new file mode 100644 index 00000000000..4df5a2e06ba --- /dev/null +++ b/queue-5.17/iwlwifi-iwl-dbg-use-del_timer_sync-before-freeing.patch @@ -0,0 +1,49 @@ +From 5fcfcb3e905b3d3c86dd004c16762e4729b95408 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Apr 2022 08:42:10 -0700 +Subject: iwlwifi: iwl-dbg: Use del_timer_sync() before freeing + +From: Guenter Roeck + +[ Upstream commit 7635a1ad8d92dcc8247b53f949e37795154b5b6f ] + +In Chrome OS, a large number of crashes is observed due to corrupted timer +lists. Steven Rostedt pointed out that this usually happens when a timer +is freed while still active, and that the problem is often triggered +by code calling del_timer() instead of del_timer_sync() just before +freeing. + +Steven also identified the iwlwifi driver as one of the possible culprits +since it does exactly that. + +Reported-by: Steven Rostedt +Cc: Steven Rostedt +Cc: Johannes Berg +Cc: Gregory Greenman +Fixes: 60e8abd9d3e91 ("iwlwifi: dbg_ini: add periodic trigger new API support") +Signed-off-by: Guenter Roeck +Acked-by: Gregory Greenman +Tested-by: Sedat Dilek # Linux v5.17.3-rc1 and Debian LLVM-14 +Signed-off-by: Kalle Valo +Link: https://lore.kernel.org/r/20220411154210.1870008-1-linux@roeck-us.net +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +index 42f6f8bb83be..901600ca6f0e 100644 +--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c ++++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +@@ -362,7 +362,7 @@ void iwl_dbg_tlv_del_timers(struct iwl_trans *trans) + struct iwl_dbg_tlv_timer_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, timer_list, list) { +- del_timer(&node->timer); ++ del_timer_sync(&node->timer); + list_del(&node->list); + kfree(node); + } +-- +2.35.1 + diff --git a/queue-5.17/mac80211-reset-mbssid-parameters-upon-connection.patch b/queue-5.17/mac80211-reset-mbssid-parameters-upon-connection.patch new file mode 100644 index 00000000000..da548cf1900 --- /dev/null +++ b/queue-5.17/mac80211-reset-mbssid-parameters-upon-connection.patch @@ -0,0 +1,60 @@ +From 902ed482f016214b9ab1c6947f8b2c10bdfeb2b2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 28 Apr 2022 10:57:44 +0530 +Subject: mac80211: Reset MBSSID parameters upon connection + +From: Manikanta Pubbisetty + +[ Upstream commit 86af062f40a73bf63321694e6bf637144f0383fe ] + +Currently MBSSID parameters in struct ieee80211_bss_conf +are not reset upon connection. This could be problematic +with some drivers in a scenario where the device first +connects to a non-transmit BSS and then connects to a +transmit BSS of a Multi BSS AP. The MBSSID parameters +which are set after connecting to a non-transmit BSS will +not be reset and the same parameters will be passed on to +the driver during the subsequent connection to a transmit +BSS of a Multi BSS AP. + +For example, firmware running on the ath11k device uses the +Multi BSS data for tracking the beacon of a non-transmit BSS +and reports the driver when there is a beacon miss. If we do +not reset the MBSSID parameters during the subsequent +connection to a transmit BSS, then the driver would have +wrong MBSSID data and FW would be looking for an incorrect +BSSID in the MBSSID beacon of a Multi BSS AP and reports +beacon loss leading to an unstable connection. + +Reset the MBSSID parameters upon every connection to solve this +problem. + +Fixes: 78ac51f81532 ("mac80211: support multi-bssid") +Signed-off-by: Manikanta Pubbisetty +Link: https://lore.kernel.org/r/20220428052744.27040-1-quic_mpubbise@quicinc.com +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/mac80211/mlme.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c +index c4d3e2da73f2..b5ac06b96329 100644 +--- a/net/mac80211/mlme.c ++++ b/net/mac80211/mlme.c +@@ -3574,6 +3574,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, + cbss->transmitted_bss->bssid); + bss_conf->bssid_indicator = cbss->max_bssid_indicator; + bss_conf->bssid_index = cbss->bssid_index; ++ } else { ++ bss_conf->nontransmitted = false; ++ memset(bss_conf->transmitter_bssid, 0, ++ sizeof(bss_conf->transmitter_bssid)); ++ bss_conf->bssid_indicator = 0; ++ bss_conf->bssid_index = 0; + } + + /* +-- +2.35.1 + diff --git a/queue-5.17/mac80211_hwsim-call-ieee80211_tx_prepare_skb-under-r.patch b/queue-5.17/mac80211_hwsim-call-ieee80211_tx_prepare_skb-under-r.patch new file mode 100644 index 00000000000..de659f17816 --- /dev/null +++ b/queue-5.17/mac80211_hwsim-call-ieee80211_tx_prepare_skb-under-r.patch @@ -0,0 +1,52 @@ +From dcba501efe0023b7f1c221c0abe460a318027789 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 May 2022 23:04:22 +0200 +Subject: mac80211_hwsim: call ieee80211_tx_prepare_skb under RCU protection + +From: Johannes Berg + +[ Upstream commit 9e2db50f1ef2238fc2f71c5de1c0418b7a5b0ea2 ] + +This is needed since it might use (and pass out) pointers to +e.g. keys protected by RCU. Can't really happen here as the +frames aren't encrypted, but we need to still adhere to the +rules. + +Fixes: cacfddf82baf ("mac80211_hwsim: initialize ieee80211_tx_info at hw_scan_work") +Signed-off-by: Johannes Berg +Link: https://lore.kernel.org/r/20220505230421.5f139f9de173.I77ae111a28f7c0e9fd1ebcee7f39dbec5c606770@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/mac80211_hwsim.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c +index fc5725f6daee..4a91d5cb75c3 100644 +--- a/drivers/net/wireless/mac80211_hwsim.c ++++ b/drivers/net/wireless/mac80211_hwsim.c +@@ -2336,11 +2336,13 @@ static void hw_scan_work(struct work_struct *work) + if (req->ie_len) + skb_put_data(probe, req->ie, req->ie_len); + ++ rcu_read_lock(); + if (!ieee80211_tx_prepare_skb(hwsim->hw, + hwsim->hw_scan_vif, + probe, + hwsim->tmp_chan->band, + NULL)) { ++ rcu_read_unlock(); + kfree_skb(probe); + continue; + } +@@ -2348,6 +2350,7 @@ static void hw_scan_work(struct work_struct *work) + local_bh_disable(); + mac80211_hwsim_tx_frame(hwsim->hw, probe, + hwsim->tmp_chan); ++ rcu_read_unlock(); + local_bh_enable(); + } + } +-- +2.35.1 + diff --git a/queue-5.17/mlxsw-avoid-warning-during-ip6gre-device-removal.patch b/queue-5.17/mlxsw-avoid-warning-during-ip6gre-device-removal.patch new file mode 100644 index 00000000000..c1305182ce3 --- /dev/null +++ b/queue-5.17/mlxsw-avoid-warning-during-ip6gre-device-removal.patch @@ -0,0 +1,105 @@ +From b32cb7009973163d8b3131de5cf152761c35e97a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 May 2022 14:57:47 +0300 +Subject: mlxsw: Avoid warning during ip6gre device removal + +From: Amit Cohen + +[ Upstream commit 810c2f0a3f86158c1e02e74947b66d811473434a ] + +IPv6 addresses which are used for tunnels are stored in a hash table +with reference counting. When a new GRE tunnel is configured, the driver +is notified and configures it in hardware. + +Currently, any change in the tunnel is not applied in the driver. It +means that if the remote address is changed, the driver is not aware of +this change and the first address will be used. + +This behavior results in a warning [1] in scenarios such as the +following: + + # ip link add name gre1 type ip6gre local 2000::3 remote 2000::fffe tos inherit ttl inherit + # ip link set name gre1 type ip6gre local 2000::3 remote 2000::ffff ttl inherit + # ip link delete gre1 + +The change of the address is not applied in the driver. Currently, the +driver uses the remote address which is stored in the 'parms' of the +overlay device. When the tunnel is removed, the new IPv6 address is +used, the driver tries to release it, but as it is not aware of the +change, this address is not configured and it warns about releasing non +existing IPv6 address. + +Fix it by using the IPv6 address which is cached in the IPIP entry, this +address is the last one that the driver used, so even in cases such the +above, the first address will be released, without any warning. + +[1]: + +WARNING: CPU: 1 PID: 2197 at drivers/net/ethernet/mellanox/mlxsw/spectrum.c:2920 mlxsw_sp_ipv6_addr_put+0x146/0x220 [mlxsw_spectrum] +... +CPU: 1 PID: 2197 Comm: ip Not tainted 5.17.0-rc8-custom-95062-gc1e5ded51a9a #84 +Hardware name: Mellanox Technologies Ltd. MSN4700/VMOD0010, BIOS 5.11 07/12/2021 +RIP: 0010:mlxsw_sp_ipv6_addr_put+0x146/0x220 [mlxsw_spectrum] +... +Call Trace: + + mlxsw_sp2_ipip_rem_addr_unset_gre6+0xf1/0x120 [mlxsw_spectrum] + mlxsw_sp_netdevice_ipip_ol_event+0xdb/0x640 [mlxsw_spectrum] + mlxsw_sp_netdevice_event+0xc4/0x850 [mlxsw_spectrum] + raw_notifier_call_chain+0x3c/0x50 + call_netdevice_notifiers_info+0x2f/0x80 + unregister_netdevice_many+0x311/0x6d0 + rtnl_dellink+0x136/0x360 + rtnetlink_rcv_msg+0x12f/0x380 + netlink_rcv_skb+0x49/0xf0 + netlink_unicast+0x233/0x340 + netlink_sendmsg+0x202/0x440 + ____sys_sendmsg+0x1f3/0x220 + ___sys_sendmsg+0x70/0xb0 + __sys_sendmsg+0x54/0xa0 + do_syscall_64+0x35/0x80 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +Fixes: e846efe2737b ("mlxsw: spectrum: Add hash table for IPv6 address mapping") +Reported-by: Maksym Yaremchuk +Signed-off-by: Amit Cohen +Signed-off-by: Ido Schimmel +Link: https://lore.kernel.org/r/20220511115747.238602-1-idosch@nvidia.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +index 01cf5a6a26bd..a2ee695a3f17 100644 +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +@@ -568,10 +568,8 @@ static int + mlxsw_sp2_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) + { +- struct __ip6_tnl_parm parms6; +- +- parms6 = mlxsw_sp_ipip_netdev_parms6(ipip_entry->ol_dev); +- return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp, &parms6.raddr, ++ return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp, ++ &ipip_entry->parms.daddr.addr6, + &ipip_entry->dip_kvdl_index); + } + +@@ -579,10 +577,7 @@ static void + mlxsw_sp2_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_ipip_entry *ipip_entry) + { +- struct __ip6_tnl_parm parms6; +- +- parms6 = mlxsw_sp_ipip_netdev_parms6(ipip_entry->ol_dev); +- mlxsw_sp_ipv6_addr_put(mlxsw_sp, &parms6.raddr); ++ mlxsw_sp_ipv6_addr_put(mlxsw_sp, &ipip_entry->parms.daddr.addr6); + } + + static const struct mlxsw_sp_ipip_ops mlxsw_sp2_ipip_gre6_ops = { +-- +2.35.1 + diff --git a/queue-5.17/net-bcmgenet-check-for-wake-on-lan-interrupt-probe-d.patch b/queue-5.17/net-bcmgenet-check-for-wake-on-lan-interrupt-probe-d.patch new file mode 100644 index 00000000000..94b5d596a3d --- /dev/null +++ b/queue-5.17/net-bcmgenet-check-for-wake-on-lan-interrupt-probe-d.patch @@ -0,0 +1,44 @@ +From d5587bd4bcdbd28b74f12017ef829b53e19e4aba Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 May 2022 20:17:51 -0700 +Subject: net: bcmgenet: Check for Wake-on-LAN interrupt probe deferral + +From: Florian Fainelli + +[ Upstream commit 6b77c06655b8a749c1a3d9ebc51e9717003f7e5a ] + +The interrupt controller supplying the Wake-on-LAN interrupt line maybe +modular on some platforms (irq-bcm7038-l1.c) and might be probed at a +later time than the GENET driver. We need to specifically check for +-EPROBE_DEFER and propagate that error to ensure that we eventually +fetch the interrupt descriptor. + +Fixes: 9deb48b53e7f ("bcmgenet: add WOL IRQ check") +Fixes: 5b1f0e62941b ("net: bcmgenet: Avoid touching non-existent interrupt") +Signed-off-by: Florian Fainelli +Reviewed-by: Stefan Wahren +Link: https://lore.kernel.org/r/20220511031752.2245566-1-f.fainelli@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c +index c2bfb25e087c..64bf31ceb6d9 100644 +--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c ++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c +@@ -3999,6 +3999,10 @@ static int bcmgenet_probe(struct platform_device *pdev) + goto err; + } + priv->wol_irq = platform_get_irq_optional(pdev, 2); ++ if (priv->wol_irq == -EPROBE_DEFER) { ++ err = priv->wol_irq; ++ goto err; ++ } + + priv->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(priv->base)) { +-- +2.35.1 + diff --git a/queue-5.17/net-chelsio-cxgb4-avoid-potential-negative-array-off.patch b/queue-5.17/net-chelsio-cxgb4-avoid-potential-negative-array-off.patch new file mode 100644 index 00000000000..bf12a861e39 --- /dev/null +++ b/queue-5.17/net-chelsio-cxgb4-avoid-potential-negative-array-off.patch @@ -0,0 +1,96 @@ +From 1f594fb5b6560de3b608d5b032d66cf6e7cac0a7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 May 2022 16:31:01 -0700 +Subject: net: chelsio: cxgb4: Avoid potential negative array offset + +From: Kees Cook + +[ Upstream commit 1c7ab9cd98b78bef1657a5db7204d8d437e24c94 ] + +Using min_t(int, ...) as a potential array index implies to the compiler +that negative offsets should be allowed. This is not the case, though. +Replace "int" with "unsigned int". Fixes the following warning exposed +under future CONFIG_FORTIFY_SOURCE improvements: + +In file included from include/linux/string.h:253, + from include/linux/bitmap.h:11, + from include/linux/cpumask.h:12, + from include/linux/smp.h:13, + from include/linux/lockdep.h:14, + from include/linux/rcupdate.h:29, + from include/linux/rculist.h:11, + from include/linux/pid.h:5, + from include/linux/sched.h:14, + from include/linux/delay.h:23, + from drivers/net/ethernet/chelsio/cxgb4/t4_hw.c:35: +drivers/net/ethernet/chelsio/cxgb4/t4_hw.c: In function 't4_get_raw_vpd_params': +include/linux/fortify-string.h:46:33: warning: '__builtin_memcpy' pointer overflow between offset 29 and size [2147483648, 4294967295] [-Warray-bounds] + 46 | #define __underlying_memcpy __builtin_memcpy + | ^ +include/linux/fortify-string.h:388:9: note: in expansion of macro '__underlying_memcpy' + 388 | __underlying_##op(p, q, __fortify_size); \ + | ^~~~~~~~~~~~~ +include/linux/fortify-string.h:433:26: note: in expansion of macro '__fortify_memcpy_chk' + 433 | #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ + | ^~~~~~~~~~~~~~~~~~~~ +drivers/net/ethernet/chelsio/cxgb4/t4_hw.c:2796:9: note: in expansion of macro 'memcpy' + 2796 | memcpy(p->id, vpd + id, min_t(int, id_len, ID_LEN)); + | ^~~~~~ +include/linux/fortify-string.h:46:33: warning: '__builtin_memcpy' pointer overflow between offset 0 and size [2147483648, 4294967295] [-Warray-bounds] + 46 | #define __underlying_memcpy __builtin_memcpy + | ^ +include/linux/fortify-string.h:388:9: note: in expansion of macro '__underlying_memcpy' + 388 | __underlying_##op(p, q, __fortify_size); \ + | ^~~~~~~~~~~~~ +include/linux/fortify-string.h:433:26: note: in expansion of macro '__fortify_memcpy_chk' + 433 | #define memcpy(p, q, s) __fortify_memcpy_chk(p, q, s, \ + | ^~~~~~~~~~~~~~~~~~~~ +drivers/net/ethernet/chelsio/cxgb4/t4_hw.c:2798:9: note: in expansion of macro 'memcpy' + 2798 | memcpy(p->sn, vpd + sn, min_t(int, sn_len, SERNUM_LEN)); + | ^~~~~~ + +Additionally remove needless cast from u8[] to char * in last strim() +call. + +Reported-by: kernel test robot +Link: https://lore.kernel.org/lkml/202205031926.FVP7epJM-lkp@intel.com +Fixes: fc9279298e3a ("cxgb4: Search VPD with pci_vpd_find_ro_info_keyword()") +Fixes: 24c521f81c30 ("cxgb4: Use pci_vpd_find_id_string() to find VPD ID string") +Cc: Raju Rangoju +Cc: Eric Dumazet +Cc: Paolo Abeni +Signed-off-by: Kees Cook +Link: https://lore.kernel.org/r/20220505233101.1224230-1-keescook@chromium.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +index e7b4e3ed056c..8d719f82854a 100644 +--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c ++++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +@@ -2793,14 +2793,14 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p) + goto out; + na = ret; + +- memcpy(p->id, vpd + id, min_t(int, id_len, ID_LEN)); ++ memcpy(p->id, vpd + id, min_t(unsigned int, id_len, ID_LEN)); + strim(p->id); +- memcpy(p->sn, vpd + sn, min_t(int, sn_len, SERNUM_LEN)); ++ memcpy(p->sn, vpd + sn, min_t(unsigned int, sn_len, SERNUM_LEN)); + strim(p->sn); +- memcpy(p->pn, vpd + pn, min_t(int, pn_len, PN_LEN)); ++ memcpy(p->pn, vpd + pn, min_t(unsigned int, pn_len, PN_LEN)); + strim(p->pn); +- memcpy(p->na, vpd + na, min_t(int, na_len, MACADDR_LEN)); +- strim((char *)p->na); ++ memcpy(p->na, vpd + na, min_t(unsigned int, na_len, MACADDR_LEN)); ++ strim(p->na); + + out: + vfree(vpd); +-- +2.35.1 + diff --git a/queue-5.17/net-dsa-bcm_sf2-fix-wake-on-lan-with-mac_link_down.patch b/queue-5.17/net-dsa-bcm_sf2-fix-wake-on-lan-with-mac_link_down.patch new file mode 100644 index 00000000000..33644ab882a --- /dev/null +++ b/queue-5.17/net-dsa-bcm_sf2-fix-wake-on-lan-with-mac_link_down.patch @@ -0,0 +1,43 @@ +From 17dea91233fb33692711d29ef39b6010ffc8fcda Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 May 2022 19:17:31 -0700 +Subject: net: dsa: bcm_sf2: Fix Wake-on-LAN with mac_link_down() + +From: Florian Fainelli + +[ Upstream commit b7be130c5d52e5224ac7d89568737b37b4c4b785 ] + +After commit 2d1f90f9ba83 ("net: dsa/bcm_sf2: fix incorrect usage of +state->link") the interface suspend path would call our mac_link_down() +call back which would forcibly set the link down, thus preventing +Wake-on-LAN packets from reaching our management port. + +Fix this by looking at whether the port is enabled for Wake-on-LAN and +not clearing the link status in that case to let packets go through. + +Fixes: 2d1f90f9ba83 ("net: dsa/bcm_sf2: fix incorrect usage of state->link") +Signed-off-by: Florian Fainelli +Link: https://lore.kernel.org/r/20220512021731.2494261-1-f.fainelli@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/bcm_sf2.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c +index 6afb5db8244c..6d15a743219f 100644 +--- a/drivers/net/dsa/bcm_sf2.c ++++ b/drivers/net/dsa/bcm_sf2.c +@@ -833,6 +833,9 @@ static void bcm_sf2_sw_mac_link_down(struct dsa_switch *ds, int port, + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); + u32 reg, offset; + ++ if (priv->wol_ports_mask & BIT(port)) ++ return; ++ + if (port != core_readl(priv, CORE_IMP0_PRT_ID)) { + if (priv->type == BCM4908_DEVICE_ID || + priv->type == BCM7445_DEVICE_ID) +-- +2.35.1 + diff --git a/queue-5.17/net-dsa-flush-switchdev-workqueue-on-bridge-join-err.patch b/queue-5.17/net-dsa-flush-switchdev-workqueue-on-bridge-join-err.patch new file mode 100644 index 00000000000..cc36bfe554d --- /dev/null +++ b/queue-5.17/net-dsa-flush-switchdev-workqueue-on-bridge-join-err.patch @@ -0,0 +1,66 @@ +From 4152906281e141b289026c68da7c59ed0c4da2c7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 7 May 2022 16:45:50 +0300 +Subject: net: dsa: flush switchdev workqueue on bridge join error path + +From: Vladimir Oltean + +[ Upstream commit 630fd4822af2374cd75c682b7665dcb367613765 ] + +There is a race between switchdev_bridge_port_offload() and the +dsa_port_switchdev_sync_attrs() call right below it. + +When switchdev_bridge_port_offload() finishes, FDB entries have been +replayed by the bridge, but are scheduled for deferred execution later. + +However dsa_port_switchdev_sync_attrs -> dsa_port_can_apply_vlan_filtering() +may impose restrictions on the vlan_filtering attribute and refuse +offloading. + +When this happens, the delayed FDB entries will dereference dp->bridge, +which is a NULL pointer because we have stopped the process of +offloading this bridge. + +Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 +Workqueue: dsa_ordered dsa_slave_switchdev_event_work +pc : dsa_port_bridge_host_fdb_del+0x64/0x100 +lr : dsa_slave_switchdev_event_work+0x130/0x1bc +Call trace: + dsa_port_bridge_host_fdb_del+0x64/0x100 + dsa_slave_switchdev_event_work+0x130/0x1bc + process_one_work+0x294/0x670 + worker_thread+0x80/0x460 +---[ end trace 0000000000000000 ]--- +Error: dsa_core: Must first remove VLAN uppers having VIDs also present in bridge. + +Fix the bug by doing what we do on the normal bridge leave path as well, +which is to wait until the deferred FDB entries complete executing, then +exit. + +The placement of dsa_flush_workqueue() after switchdev_bridge_port_unoffload() +guarantees that both the FDB additions and deletions on rollback are waited for. + +Fixes: d7d0d423dbaa ("net: dsa: flush switchdev workqueue when leaving the bridge") +Signed-off-by: Vladimir Oltean +Link: https://lore.kernel.org/r/20220507134550.1849834-1-vladimir.oltean@nxp.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/dsa/port.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/dsa/port.c b/net/dsa/port.c +index 4368fd32c4a5..f4bd063f8315 100644 +--- a/net/dsa/port.c ++++ b/net/dsa/port.c +@@ -367,6 +367,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, + switchdev_bridge_port_unoffload(brport_dev, dp, + &dsa_slave_switchdev_notifier, + &dsa_slave_switchdev_blocking_notifier); ++ dsa_flush_workqueue(); + out_rollback_unbridge: + dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); + out_rollback: +-- +2.35.1 + diff --git a/queue-5.17/net-emaclite-don-t-advertise-1000base-t-and-do-auto-.patch b/queue-5.17/net-emaclite-don-t-advertise-1000base-t-and-do-auto-.patch new file mode 100644 index 00000000000..fb5667789d2 --- /dev/null +++ b/queue-5.17/net-emaclite-don-t-advertise-1000base-t-and-do-auto-.patch @@ -0,0 +1,62 @@ +From dbae02c390f06658242904d56ad82cd1a3dbea52 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 May 2022 12:57:49 +0530 +Subject: net: emaclite: Don't advertise 1000BASE-T and do auto negotiation + +From: Shravya Kumbham + +[ Upstream commit b800528b97d0adc3a5ba42d78a8b0d3f07a31f44 ] + +In xemaclite_open() function we are setting the max speed of +emaclite to 100Mb using phy_set_max_speed() function so, +there is no need to write the advertising registers to stop +giga-bit speed and the phy_start() function starts the +auto-negotiation so, there is no need to handle it separately +using advertising registers. Remove the phy_read and phy_write +of advertising registers in xemaclite_open() function. + +Signed-off-by: Shravya Kumbham +Signed-off-by: Radhey Shyam Pandey +Reviewed-by: Andrew Lunn +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/xilinx/xilinx_emaclite.c | 15 --------------- + 1 file changed, 15 deletions(-) + +diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c +index 08a670bf2cd1..c2b142cf75eb 100644 +--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c ++++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c +@@ -935,8 +935,6 @@ static int xemaclite_open(struct net_device *dev) + xemaclite_disable_interrupts(lp); + + if (lp->phy_node) { +- u32 bmcr; +- + lp->phy_dev = of_phy_connect(lp->ndev, lp->phy_node, + xemaclite_adjust_link, 0, + PHY_INTERFACE_MODE_MII); +@@ -947,19 +945,6 @@ static int xemaclite_open(struct net_device *dev) + + /* EmacLite doesn't support giga-bit speeds */ + phy_set_max_speed(lp->phy_dev, SPEED_100); +- +- /* Don't advertise 1000BASE-T Full/Half duplex speeds */ +- phy_write(lp->phy_dev, MII_CTRL1000, 0); +- +- /* Advertise only 10 and 100mbps full/half duplex speeds */ +- phy_write(lp->phy_dev, MII_ADVERTISE, ADVERTISE_ALL | +- ADVERTISE_CSMA); +- +- /* Restart auto negotiation */ +- bmcr = phy_read(lp->phy_dev, MII_BMCR); +- bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART); +- phy_write(lp->phy_dev, MII_BMCR, bmcr); +- + phy_start(lp->phy_dev); + } + +-- +2.35.1 + diff --git a/queue-5.17/net-ethernet-mediatek-ppe-fix-wrong-size-passed-to-m.patch b/queue-5.17/net-ethernet-mediatek-ppe-fix-wrong-size-passed-to-m.patch new file mode 100644 index 00000000000..12f7d66d0e3 --- /dev/null +++ b/queue-5.17/net-ethernet-mediatek-ppe-fix-wrong-size-passed-to-m.patch @@ -0,0 +1,38 @@ +From ec7559d7e7f901546ea91db57951941024428661 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 May 2022 11:08:29 +0800 +Subject: net: ethernet: mediatek: ppe: fix wrong size passed to memset() + +From: Yang Yingliang + +[ Upstream commit 00832b1d1a393dfb1b9491d085e5b27e8c25d103 ] + +'foe_table' is a pointer, the real size of struct mtk_foe_entry +should be pass to memset(). + +Fixes: ba37b7caf1ed ("net: ethernet: mtk_eth_soc: add support for initializing the PPE") +Signed-off-by: Yang Yingliang +Acked-by: Felix Fietkau +Link: https://lore.kernel.org/r/20220511030829.3308094-1-yangyingliang@huawei.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mediatek/mtk_ppe.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c +index 3ad10c793308..66298e2235c9 100644 +--- a/drivers/net/ethernet/mediatek/mtk_ppe.c ++++ b/drivers/net/ethernet/mediatek/mtk_ppe.c +@@ -395,7 +395,7 @@ static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe) + static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 }; + int i, k; + +- memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(ppe->foe_table)); ++ memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table)); + + if (!IS_ENABLED(CONFIG_SOC_MT7621)) + return; +-- +2.35.1 + diff --git a/queue-5.17/net-fix-features-skip-in-for_each_netdev_feature.patch b/queue-5.17/net-fix-features-skip-in-for_each_netdev_feature.patch new file mode 100644 index 00000000000..149cb358277 --- /dev/null +++ b/queue-5.17/net-fix-features-skip-in-for_each_netdev_feature.patch @@ -0,0 +1,49 @@ +From 3d0afc3d00b49318035c6c4ba8e49bc48771b9c7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 May 2022 11:09:14 +0300 +Subject: net: Fix features skip in for_each_netdev_feature() + +From: Tariq Toukan + +[ Upstream commit 85db6352fc8a158a893151baa1716463d34a20d0 ] + +The find_next_netdev_feature() macro gets the "remaining length", +not bit index. +Passing "bit - 1" for the following iteration is wrong as it skips +the adjacent bit. Pass "bit" instead. + +Fixes: 3b89ea9c5902 ("net: Fix for_each_netdev_feature on Big endian") +Signed-off-by: Tariq Toukan +Reviewed-by: Gal Pressman +Link: https://lore.kernel.org/r/20220504080914.1918-1-tariqt@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/linux/netdev_features.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h +index 2c6b9e416225..7c2d77d75a88 100644 +--- a/include/linux/netdev_features.h ++++ b/include/linux/netdev_features.h +@@ -169,7 +169,7 @@ enum { + #define NETIF_F_HW_HSR_FWD __NETIF_F(HW_HSR_FWD) + #define NETIF_F_HW_HSR_DUP __NETIF_F(HW_HSR_DUP) + +-/* Finds the next feature with the highest number of the range of start till 0. ++/* Finds the next feature with the highest number of the range of start-1 till 0. + */ + static inline int find_next_netdev_feature(u64 feature, unsigned long start) + { +@@ -188,7 +188,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) + for ((bit) = find_next_netdev_feature((mask_addr), \ + NETDEV_FEATURE_COUNT); \ + (bit) >= 0; \ +- (bit) = find_next_netdev_feature((mask_addr), (bit) - 1)) ++ (bit) = find_next_netdev_feature((mask_addr), (bit))) + + /* Features valid for ethtool to change */ + /* = all defined minus driver/device-class-related */ +-- +2.35.1 + diff --git a/queue-5.17/net-mscc-ocelot-avoid-corrupting-hardware-counters-w.patch b/queue-5.17/net-mscc-ocelot-avoid-corrupting-hardware-counters-w.patch new file mode 100644 index 00000000000..5f367f6d78a --- /dev/null +++ b/queue-5.17/net-mscc-ocelot-avoid-corrupting-hardware-counters-w.patch @@ -0,0 +1,115 @@ +From 0dc6d1dc4deb5b3e2b67c495de4369de31606d01 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 May 2022 02:55:03 +0300 +Subject: net: mscc: ocelot: avoid corrupting hardware counters when moving + VCAP filters + +From: Vladimir Oltean + +[ Upstream commit 93a8417088ea570b5721d2b526337a2d3aed9fa3 ] + +Given the following order of operations: + +(1) we add filter A using tc-flower +(2) we send a packet that matches it +(3) we read the filter's statistics to find a hit count of 1 +(4) we add a second filter B with a higher preference than A, and A + moves one position to the right to make room in the TCAM for it +(5) we send another packet, and this matches the second filter B +(6) we read the filter statistics again. + +When this happens, the hit count of filter A is 2 and of filter B is 1, +despite a single packet having matched each filter. + +Furthermore, in an alternate history, reading the filter stats a second +time between steps (3) and (4) makes the hit count of filter A remain at +1 after step (6), as expected. + +The reason why this happens has to do with the filter->stats.pkts field, +which is written to hardware through the call path below: + + vcap_entry_set + / | \ + / | \ + / | \ + / | \ +es0_entry_set is1_entry_set is2_entry_set + \ | / + \ | / + \ | / + vcap_data_set(data.counter, ...) + +The primary role of filter->stats.pkts is to transport the filter hit +counters from the last readout all the way from vcap_entry_get() -> +ocelot_vcap_filter_stats_update() -> ocelot_cls_flower_stats(). +The reason why vcap_entry_set() writes it to hardware is so that the +counters (saturating and having a limited bit width) are cleared +after each user space readout. + +The writing of filter->stats.pkts to hardware during the TCAM entry +movement procedure is an unintentional consequence of the code design, +because the hit count isn't up to date at this point. + +So at step (4), when filter A is moved by ocelot_vcap_filter_add() to +make room for filter B, the hardware hit count is 0 (no packet matched +on it in the meantime), but filter->stats.pkts is 1, because the last +readout saw the earlier packet. The movement procedure programs the old +hit count back to hardware, so this creates the impression to user space +that more packets have been matched than they really were. + +The bug can be seen when running the gact_drop_and_ok_test() from the +tc_actions.sh selftest. + +Fix the issue by reading back the hit count to tmp->stats.pkts before +migrating the VCAP filter. Sure, this is a best-effort technique, since +the packets that hit the rule between vcap_entry_get() and +vcap_entry_set() won't be counted, but at least it allows the counters +to be reliably used for selftests where the traffic is under control. + +The vcap_entry_get() name is a bit unintuitive, but it only reads back +the counter portion of the TCAM entry, not the entire entry. + +The index from which we retrieve the counter is also a bit unintuitive +(i - 1 during add, i + 1 during del), but this is the way in which TCAM +entry movement works. The "entry index" isn't a stored integer for a +TCAM filter, instead it is dynamically computed by +ocelot_vcap_block_get_filter_index() based on the entry's position in +the &block->rules list. That position (as well as block->count) is +automatically updated by ocelot_vcap_filter_add_to_block() on add, and +by ocelot_vcap_block_remove_filter() on del. So "i" is the new filter +index, and "i - 1" or "i + 1" respectively are the old addresses of that +TCAM entry (we only support installing/deleting one filter at a time). + +Fixes: b596229448dd ("net: mscc: ocelot: Add support for tcam") +Signed-off-by: Vladimir Oltean +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mscc/ocelot_vcap.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c +index 6c643936c675..f159726788ba 100644 +--- a/drivers/net/ethernet/mscc/ocelot_vcap.c ++++ b/drivers/net/ethernet/mscc/ocelot_vcap.c +@@ -1181,6 +1181,8 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot, + struct ocelot_vcap_filter *tmp; + + tmp = ocelot_vcap_block_find_filter_by_index(block, i); ++ /* Read back the filter's counters before moving it */ ++ vcap_entry_get(ocelot, i - 1, tmp); + vcap_entry_set(ocelot, i, tmp); + } + +@@ -1239,6 +1241,8 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot, + struct ocelot_vcap_filter *tmp; + + tmp = ocelot_vcap_block_find_filter_by_index(block, i); ++ /* Read back the filter's counters before moving it */ ++ vcap_entry_get(ocelot, i + 1, tmp); + vcap_entry_set(ocelot, i, tmp); + } + +-- +2.35.1 + diff --git a/queue-5.17/net-mscc-ocelot-fix-last-vcap-is1-is2-filter-persist.patch b/queue-5.17/net-mscc-ocelot-fix-last-vcap-is1-is2-filter-persist.patch new file mode 100644 index 00000000000..047b102b865 --- /dev/null +++ b/queue-5.17/net-mscc-ocelot-fix-last-vcap-is1-is2-filter-persist.patch @@ -0,0 +1,56 @@ +From 7a489850ff3e0ef995fd02bb731f945e9fbee7ff Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 May 2022 02:55:00 +0300 +Subject: net: mscc: ocelot: fix last VCAP IS1/IS2 filter persisting in + hardware when deleted + +From: Vladimir Oltean + +[ Upstream commit 16bbebd35629c93a8c68c6d8d28557e100bcee73 ] + +ocelot_vcap_filter_del() works by moving the next filters over the +current one, and then deleting the last filter by calling vcap_entry_set() +with a del_filter which was specially created by memsetting its memory +to zeroes. vcap_entry_set() then programs this to the TCAM and action +RAM via the cache registers. + +The problem is that vcap_entry_set() is a dispatch function which looks +at del_filter->block_id. But since del_filter is zeroized memory, the +block_id is 0, or otherwise said, VCAP_ES0. So practically, what we do +is delete the entry at the same TCAM index from VCAP ES0 instead of IS1 +or IS2. + +The code was not always like this. vcap_entry_set() used to simply be +is2_entry_set(), and then, the logic used to work. + +Restore the functionality by populating the block_id of the del_filter +based on the VCAP block of the filter that we're deleting. This makes +vcap_entry_set() know what to do. + +Fixes: 1397a2eb52e2 ("net: mscc: ocelot: create TCAM skeleton from tc filter chains") +Signed-off-by: Vladimir Oltean +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mscc/ocelot_vcap.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c +index d3544413a8a4..e650afef12af 100644 +--- a/drivers/net/ethernet/mscc/ocelot_vcap.c ++++ b/drivers/net/ethernet/mscc/ocelot_vcap.c +@@ -1221,7 +1221,11 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot, + struct ocelot_vcap_filter del_filter; + int i, index; + ++ /* Need to inherit the block_id so that vcap_entry_set() ++ * does not get confused and knows where to install it. ++ */ + memset(&del_filter, 0, sizeof(del_filter)); ++ del_filter.block_id = filter->block_id; + + /* Gets index of the filter */ + index = ocelot_vcap_block_get_filter_index(block, filter); +-- +2.35.1 + diff --git a/queue-5.17/net-mscc-ocelot-fix-vcap-is2-filters-matching-on-bot.patch b/queue-5.17/net-mscc-ocelot-fix-vcap-is2-filters-matching-on-bot.patch new file mode 100644 index 00000000000..9cf03127f2a --- /dev/null +++ b/queue-5.17/net-mscc-ocelot-fix-vcap-is2-filters-matching-on-bot.patch @@ -0,0 +1,69 @@ +From 59896bbc9dadb53fe8e641cd8c231f293078fb95 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 May 2022 02:55:01 +0300 +Subject: net: mscc: ocelot: fix VCAP IS2 filters matching on both lookups + +From: Vladimir Oltean + +[ Upstream commit 6741e11880003e35802d78cc58035057934f4dab ] + +The VCAP IS2 TCAM is looked up twice per packet, and each filter can be +configured to only match during the first, second lookup, or both, or +none. + +The blamed commit wrote the code for making VCAP IS2 filters match only +on the given lookup. But right below that code, there was another line +that explicitly made the lookup a "don't care", and this is overwriting +the lookup we've selected. So the code had no effect. + +Some of the more noticeable effects of having filters match on both +lookups: + +- in "tc -s filter show dev swp0 ingress", we see each packet matching a + VCAP IS2 filter counted twice. This throws off scripts such as + tools/testing/selftests/net/forwarding/tc_actions.sh and makes them + fail. + +- a "tc-drop" action offloaded to VCAP IS2 needs a policer as well, + because once the CPU port becomes a member of the destination port + mask of a packet, nothing removes it, not even a PERMIT/DENY mask mode + with a port mask of 0. But VCAP IS2 rules with the POLICE_ENA bit in + the action vector can only appear in the first lookup. What happens + when a filter matches both lookups is that the action vector is + combined, and this makes the POLICE_ENA bit ineffective, since the + last lookup in which it has appeared is the second one. In other + words, "tc-drop" actions do not drop packets for the CPU port, dropped + packets are still seen by software unless there was an FDB entry that + directed those packets to some other place different from the CPU. + +The last bit used to work, because in the initial commit b596229448dd +("net: mscc: ocelot: Add support for tcam"), we were writing the FIRST +field of the VCAP IS2 half key with a 1, not with a "don't care". +The change to "don't care" was made inadvertently by me in commit +c1c3993edb7c ("net: mscc: ocelot: generalize existing code for VCAP"), +which I just realized, and which needs a separate fix from this one, +for "stable" kernels that lack the commit blamed below. + +Fixes: 226e9cd82a96 ("net: mscc: ocelot: only install TCAM entries into a specific lookup and PAG") +Signed-off-by: Vladimir Oltean +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mscc/ocelot_vcap.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c +index e650afef12af..6c643936c675 100644 +--- a/drivers/net/ethernet/mscc/ocelot_vcap.c ++++ b/drivers/net/ethernet/mscc/ocelot_vcap.c +@@ -373,7 +373,6 @@ static void is2_entry_set(struct ocelot *ocelot, int ix, + OCELOT_VCAP_BIT_0); + vcap_key_set(vcap, &data, VCAP_IS2_HK_IGR_PORT_MASK, 0, + ~filter->ingress_port_mask); +- vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_FIRST, OCELOT_VCAP_BIT_ANY); + vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_HOST_MATCH, + OCELOT_VCAP_BIT_ANY); + vcap_key_bit_set(vcap, &data, VCAP_IS2_HK_L2_MC, filter->dmac_mc); +-- +2.35.1 + diff --git a/queue-5.17/net-mscc-ocelot-restrict-tc-trap-actions-to-vcap-is2.patch b/queue-5.17/net-mscc-ocelot-restrict-tc-trap-actions-to-vcap-is2.patch new file mode 100644 index 00000000000..841bfc05a31 --- /dev/null +++ b/queue-5.17/net-mscc-ocelot-restrict-tc-trap-actions-to-vcap-is2.patch @@ -0,0 +1,52 @@ +From 808828f67f29dc73ad2261c4ea5a928eb8dafd19 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 May 2022 02:55:02 +0300 +Subject: net: mscc: ocelot: restrict tc-trap actions to VCAP IS2 lookup 0 + +From: Vladimir Oltean + +[ Upstream commit 477d2b91623e682e9a8126ea92acb8f684969cc7 ] + +Once the CPU port was added to the destination port mask of a packet, it +can never be cleared, so even packets marked as dropped by the MASK_MODE +of a VCAP IS2 filter will still reach it. This is why we need the +OCELOT_POLICER_DISCARD to "kill dropped packets dead" and make software +stop seeing them. + +We disallow policer rules from being put on any other chain than the one +for the first lookup, but we don't do this for "drop" rules, although we +should. This change is merely ascertaining that the rules dont't +(completely) work and letting the user know. + +The blamed commit is the one that introduced the multi-chain architecture +in ocelot. Prior to that, we should have always offloaded the filters to +VCAP IS2 lookup 0, where they did work. + +Fixes: 1397a2eb52e2 ("net: mscc: ocelot: create TCAM skeleton from tc filter chains") +Signed-off-by: Vladimir Oltean +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mscc/ocelot_flower.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c +index fdb4d7e7296c..cb602a226149 100644 +--- a/drivers/net/ethernet/mscc/ocelot_flower.c ++++ b/drivers/net/ethernet/mscc/ocelot_flower.c +@@ -278,9 +278,10 @@ static int ocelot_flower_parse_action(struct ocelot *ocelot, int port, + filter->type = OCELOT_VCAP_FILTER_OFFLOAD; + break; + case FLOW_ACTION_TRAP: +- if (filter->block_id != VCAP_IS2) { ++ if (filter->block_id != VCAP_IS2 || ++ filter->lookup != 0) { + NL_SET_ERR_MSG_MOD(extack, +- "Trap action can only be offloaded to VCAP IS2"); ++ "Trap action can only be offloaded to VCAP IS2 lookup 0"); + return -EOPNOTSUPP; + } + if (filter->goto_target != -1) { +-- +2.35.1 + diff --git a/queue-5.17/net-rds-use-maybe_get_net-when-acquiring-refcount-on.patch b/queue-5.17/net-rds-use-maybe_get_net-when-acquiring-refcount-on.patch new file mode 100644 index 00000000000..b50905e456a --- /dev/null +++ b/queue-5.17/net-rds-use-maybe_get_net-when-acquiring-refcount-on.patch @@ -0,0 +1,129 @@ +From 09514b0a317774596b800ba63c93d7add4c3c8e2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 May 2022 10:53:53 +0900 +Subject: net: rds: use maybe_get_net() when acquiring refcount on TCP sockets + +From: Tetsuo Handa + +[ Upstream commit 6997fbd7a3dafa754f81d541498ace35b43246d8 ] + +Eric Dumazet is reporting addition on 0 problem at rds_tcp_tune(), for +delayed works queued in rds_wq might be invoked after a net namespace's +refcount already reached 0. + +Since rds_tcp_exit_net() from cleanup_net() calls flush_workqueue(rds_wq), +it is guaranteed that we can instead use maybe_get_net() from delayed work +functions until rds_tcp_exit_net() returns. + +Note that I'm not convinced that all works which might access a net +namespace are already queued in rds_wq by the moment rds_tcp_exit_net() +calls flush_workqueue(rds_wq). If some race is there, rds_tcp_exit_net() +will fail to wait for work functions, and kmem_cache_free() could be +called from net_free() before maybe_get_net() is called from +rds_tcp_tune(). + +Reported-by: Eric Dumazet +Fixes: 3a58f13a881ed351 ("net: rds: acquire refcount on TCP sockets") +Signed-off-by: Tetsuo Handa +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/41d09faf-bc78-1a87-dfd1-c6d1b5984b61@I-love.SAKURA.ne.jp +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/rds/tcp.c | 12 +++++++++--- + net/rds/tcp.h | 2 +- + net/rds/tcp_connect.c | 5 ++++- + net/rds/tcp_listen.c | 5 ++++- + 4 files changed, 18 insertions(+), 6 deletions(-) + +diff --git a/net/rds/tcp.c b/net/rds/tcp.c +index 2f638f8b7b1e..73ee2771093d 100644 +--- a/net/rds/tcp.c ++++ b/net/rds/tcp.c +@@ -487,11 +487,11 @@ struct rds_tcp_net { + /* All module specific customizations to the RDS-TCP socket should be done in + * rds_tcp_tune() and applied after socket creation. + */ +-void rds_tcp_tune(struct socket *sock) ++bool rds_tcp_tune(struct socket *sock) + { + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); +- struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); ++ struct rds_tcp_net *rtn; + + tcp_sock_set_nodelay(sock->sk); + lock_sock(sk); +@@ -499,10 +499,15 @@ void rds_tcp_tune(struct socket *sock) + * a process which created this net namespace terminated. + */ + if (!sk->sk_net_refcnt) { ++ if (!maybe_get_net(net)) { ++ release_sock(sk); ++ return false; ++ } + sk->sk_net_refcnt = 1; +- get_net_track(net, &sk->ns_tracker, GFP_KERNEL); ++ netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); + } ++ rtn = net_generic(net, rds_tcp_netid); + if (rtn->sndbuf_size > 0) { + sk->sk_sndbuf = rtn->sndbuf_size; + sk->sk_userlocks |= SOCK_SNDBUF_LOCK; +@@ -512,6 +517,7 @@ void rds_tcp_tune(struct socket *sock) + sk->sk_userlocks |= SOCK_RCVBUF_LOCK; + } + release_sock(sk); ++ return true; + } + + static void rds_tcp_accept_worker(struct work_struct *work) +diff --git a/net/rds/tcp.h b/net/rds/tcp.h +index dc8d745d6857..f8b5930d7b34 100644 +--- a/net/rds/tcp.h ++++ b/net/rds/tcp.h +@@ -49,7 +49,7 @@ struct rds_tcp_statistics { + }; + + /* tcp.c */ +-void rds_tcp_tune(struct socket *sock); ++bool rds_tcp_tune(struct socket *sock); + void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); + void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); + void rds_tcp_restore_callbacks(struct socket *sock, +diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c +index 5461d77fff4f..f0c477c5d1db 100644 +--- a/net/rds/tcp_connect.c ++++ b/net/rds/tcp_connect.c +@@ -124,7 +124,10 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) + if (ret < 0) + goto out; + +- rds_tcp_tune(sock); ++ if (!rds_tcp_tune(sock)) { ++ ret = -EINVAL; ++ goto out; ++ } + + if (isv6) { + sin6.sin6_family = AF_INET6; +diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c +index 09cadd556d1e..7edf2e69d3fe 100644 +--- a/net/rds/tcp_listen.c ++++ b/net/rds/tcp_listen.c +@@ -133,7 +133,10 @@ int rds_tcp_accept_one(struct socket *sock) + __module_get(new_sock->ops->owner); + + rds_tcp_keepalive(new_sock); +- rds_tcp_tune(new_sock); ++ if (!rds_tcp_tune(new_sock)) { ++ ret = -EINVAL; ++ goto out; ++ } + + inet = inet_sk(new_sock->sk); + +-- +2.35.1 + diff --git a/queue-5.17/net-sched-act_pedit-really-ensure-the-skb-is-writabl.patch b/queue-5.17/net-sched-act_pedit-really-ensure-the-skb-is-writabl.patch new file mode 100644 index 00000000000..93b3b5418d8 --- /dev/null +++ b/queue-5.17/net-sched-act_pedit-really-ensure-the-skb-is-writabl.patch @@ -0,0 +1,123 @@ +From 7fd20c1d43911dc5e230a13bde37ff8116ae5202 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 May 2022 16:57:34 +0200 +Subject: net/sched: act_pedit: really ensure the skb is writable + +From: Paolo Abeni + +[ Upstream commit 8b796475fd7882663a870456466a4fb315cc1bd6 ] + +Currently pedit tries to ensure that the accessed skb offset +is writable via skb_unclone(). The action potentially allows +touching any skb bytes, so it may end-up modifying shared data. + +The above causes some sporadic MPTCP self-test failures, due to +this code: + + tc -n $ns2 filter add dev ns2eth$i egress \ + protocol ip prio 1000 \ + handle 42 fw \ + action pedit munge offset 148 u8 invert \ + pipe csum tcp \ + index 100 + +The above modifies a data byte outside the skb head and the skb is +a cloned one, carrying a TCP output packet. + +This change addresses the issue by keeping track of a rough +over-estimate highest skb offset accessed by the action and ensuring +such offset is really writable. + +Note that this may cause performance regressions in some scenarios, +but hopefully pedit is not in the critical path. + +Fixes: db2c24175d14 ("act_pedit: access skb->data safely") +Acked-by: Mat Martineau +Tested-by: Geliang Tang +Signed-off-by: Paolo Abeni +Acked-by: Jamal Hadi Salim +Link: https://lore.kernel.org/r/1fcf78e6679d0a287dd61bb0f04730ce33b3255d.1652194627.git.pabeni@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/tc_act/tc_pedit.h | 1 + + net/sched/act_pedit.c | 26 ++++++++++++++++++++++---- + 2 files changed, 23 insertions(+), 4 deletions(-) + +diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h +index 748cf87a4d7e..3e02709a1df6 100644 +--- a/include/net/tc_act/tc_pedit.h ++++ b/include/net/tc_act/tc_pedit.h +@@ -14,6 +14,7 @@ struct tcf_pedit { + struct tc_action common; + unsigned char tcfp_nkeys; + unsigned char tcfp_flags; ++ u32 tcfp_off_max_hint; + struct tc_pedit_key *tcfp_keys; + struct tcf_pedit_key_ex *tcfp_keys_ex; + }; +diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c +index 31fcd279c177..0eaaf1f45de1 100644 +--- a/net/sched/act_pedit.c ++++ b/net/sched/act_pedit.c +@@ -149,7 +149,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, + struct nlattr *pattr; + struct tcf_pedit *p; + int ret = 0, err; +- int ksize; ++ int i, ksize; + u32 index; + + if (!nla) { +@@ -228,6 +228,18 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, + p->tcfp_nkeys = parm->nkeys; + } + memcpy(p->tcfp_keys, parm->keys, ksize); ++ p->tcfp_off_max_hint = 0; ++ for (i = 0; i < p->tcfp_nkeys; ++i) { ++ u32 cur = p->tcfp_keys[i].off; ++ ++ /* The AT option can read a single byte, we can bound the actual ++ * value with uchar max. ++ */ ++ cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift; ++ ++ /* Each key touches 4 bytes starting from the computed offset */ ++ p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4); ++ } + + p->tcfp_flags = parm->flags; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); +@@ -308,13 +320,18 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) + { + struct tcf_pedit *p = to_pedit(a); ++ u32 max_offset; + int i; + +- if (skb_unclone(skb, GFP_ATOMIC)) +- return p->tcf_action; +- + spin_lock(&p->tcf_lock); + ++ max_offset = (skb_transport_header_was_set(skb) ? ++ skb_transport_offset(skb) : ++ skb_network_offset(skb)) + ++ p->tcfp_off_max_hint; ++ if (skb_ensure_writable(skb, min(skb->len, max_offset))) ++ goto unlock; ++ + tcf_lastuse_update(&p->tcf_tm); + + if (p->tcfp_nkeys > 0) { +@@ -403,6 +420,7 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a, + p->tcf_qstats.overlimits++; + done: + bstats_update(&p->tcf_bstats, skb); ++unlock: + spin_unlock(&p->tcf_lock); + return p->tcf_action; + } +-- +2.35.1 + diff --git a/queue-5.17/net-sfc-ef10-fix-memory-leak-in-efx_ef10_mtd_probe.patch b/queue-5.17/net-sfc-ef10-fix-memory-leak-in-efx_ef10_mtd_probe.patch new file mode 100644 index 00000000000..9bc403075e1 --- /dev/null +++ b/queue-5.17/net-sfc-ef10-fix-memory-leak-in-efx_ef10_mtd_probe.patch @@ -0,0 +1,72 @@ +From 39cfdc158b3a5c4814feda4f51ad0d2e6ca37e45 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 May 2022 05:47:09 +0000 +Subject: net: sfc: ef10: fix memory leak in efx_ef10_mtd_probe() + +From: Taehee Yoo + +[ Upstream commit 1fa89ffbc04545b7582518e57f4b63e2a062870f ] + +In the NIC ->probe() callback, ->mtd_probe() callback is called. +If NIC has 2 ports, ->probe() is called twice and ->mtd_probe() too. +In the ->mtd_probe(), which is efx_ef10_mtd_probe() it allocates and +initializes mtd partiion. +But mtd partition for sfc is shared data. +So that allocated mtd partition data from last called +efx_ef10_mtd_probe() will not be used. +Therefore it must be freed. +But it doesn't free a not used mtd partition data in efx_ef10_mtd_probe(). + +kmemleak reports: +unreferenced object 0xffff88811ddb0000 (size 63168): + comm "systemd-udevd", pid 265, jiffies 4294681048 (age 348.586s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [] kmalloc_order_trace+0x19/0x120 + [] __kmalloc+0x20e/0x250 + [] efx_ef10_mtd_probe+0x11f/0x270 [sfc] + [] efx_pci_probe.cold.17+0x3df/0x53d [sfc] + [] local_pci_probe+0xdc/0x170 + [] pci_device_probe+0x235/0x680 + [] really_probe+0x1c2/0x8f0 + [] __driver_probe_device+0x2ab/0x460 + [] driver_probe_device+0x4a/0x120 + [] __driver_attach+0x16e/0x320 + [] bus_for_each_dev+0x110/0x190 + [] bus_add_driver+0x39e/0x560 + [] driver_register+0x18e/0x310 + [] 0xffffffffc02e2055 + [] do_one_initcall+0xc3/0x450 + [] do_init_module+0x1b4/0x700 + +Acked-by: Martin Habets +Fixes: 8127d661e77f ("sfc: Add support for Solarflare SFC9100 family") +Signed-off-by: Taehee Yoo +Link: https://lore.kernel.org/r/20220512054709.12513-1-ap420073@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/sfc/ef10.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c +index cf366ed2557c..1ab725d554a5 100644 +--- a/drivers/net/ethernet/sfc/ef10.c ++++ b/drivers/net/ethernet/sfc/ef10.c +@@ -3579,6 +3579,11 @@ static int efx_ef10_mtd_probe(struct efx_nic *efx) + n_parts++; + } + ++ if (!n_parts) { ++ kfree(parts); ++ return 0; ++ } ++ + rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts)); + fail: + if (rc) +-- +2.35.1 + diff --git a/queue-5.17/net-sfc-fix-memory-leak-due-to-ptp-channel.patch b/queue-5.17/net-sfc-fix-memory-leak-due-to-ptp-channel.patch new file mode 100644 index 00000000000..ae339244ff0 --- /dev/null +++ b/queue-5.17/net-sfc-fix-memory-leak-due-to-ptp-channel.patch @@ -0,0 +1,183 @@ +From 877f7b629fb6fcb6fd5905fd8d03783d2f6fc9e0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 May 2022 12:32:27 +0000 +Subject: net: sfc: fix memory leak due to ptp channel + +From: Taehee Yoo + +[ Upstream commit 49e6123c65dac6393b04f39ceabf79c44f66b8be ] + +It fixes memory leak in ring buffer change logic. + +When ring buffer size is changed(ethtool -G eth0 rx 4096), sfc driver +works like below. +1. stop all channels and remove ring buffers. +2. allocates new buffer array. +3. allocates rx buffers. +4. start channels. + +While the above steps are working, it skips some steps if the channel +doesn't have a ->copy callback function. +Due to ptp channel doesn't have ->copy callback, these above steps are +skipped for ptp channel. +It eventually makes some problems. +a. ptp channel's ring buffer size is not changed, it works only + 1024(default). +b. memory leak. + +The reason for memory leak is to use the wrong ring buffer values. +There are some values, which is related to ring buffer size. +a. efx->rxq_entries + - This is global value of rx queue size. +b. rx_queue->ptr_mask + - used for access ring buffer as circular ring. + - roundup_pow_of_two(efx->rxq_entries) - 1 +c. rx_queue->max_fill + - efx->rxq_entries - EFX_RXD_HEAD_ROOM + +These all values should be based on ring buffer size consistently. +But ptp channel's values are not. +a. efx->rxq_entries + - This is global(for sfc) value, always new ring buffer size. +b. rx_queue->ptr_mask + - This is always 1023(default). +c. rx_queue->max_fill + - This is new ring buffer size - EFX_RXD_HEAD_ROOM. + +Let's assume we set 4096 for rx ring buffer, + + normal channel ptp channel +efx->rxq_entries 4096 4096 +rx_queue->ptr_mask 4095 1023 +rx_queue->max_fill 4086 4086 + +sfc driver allocates rx ring buffers based on these values. +When it allocates ptp channel's ring buffer, 4086 ring buffers are +allocated then, these buffers are attached to the allocated array. +But ptp channel's ring buffer array size is still 1024(default) +and ptr_mask is still 1023 too. +So, 3062 ring buffers will be overwritten to the array. +This is the reason for memory leak. + +Test commands: + ethtool -G rx 4096 + while : + do + ip link set up + ip link set down + done + +In order to avoid this problem, it adds ->copy callback to ptp channel +type. +So that rx_queue->ptr_mask value will be updated correctly. + +Fixes: 7c236c43b838 ("sfc: Add support for IEEE-1588 PTP") +Signed-off-by: Taehee Yoo +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/sfc/efx_channels.c | 7 ++++++- + drivers/net/ethernet/sfc/ptp.c | 14 +++++++++++++- + drivers/net/ethernet/sfc/ptp.h | 1 + + 3 files changed, 20 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c +index 40bfd0ad7d05..eec0db76d888 100644 +--- a/drivers/net/ethernet/sfc/efx_channels.c ++++ b/drivers/net/ethernet/sfc/efx_channels.c +@@ -845,7 +845,9 @@ static void efx_set_xdp_channels(struct efx_nic *efx) + + int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) + { +- struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; ++ struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel, ++ *ptp_channel = efx_ptp_channel(efx); ++ struct efx_ptp_data *ptp_data = efx->ptp_data; + unsigned int i, next_buffer_table = 0; + u32 old_rxq_entries, old_txq_entries; + int rc, rc2; +@@ -916,6 +918,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) + + efx_set_xdp_channels(efx); + out: ++ efx->ptp_data = NULL; + /* Destroy unused channel structures */ + for (i = 0; i < efx->n_channels; i++) { + channel = other_channel[i]; +@@ -926,6 +929,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) + } + } + ++ efx->ptp_data = ptp_data; + rc2 = efx_soft_enable_interrupts(efx); + if (rc2) { + rc = rc ? rc : rc2; +@@ -944,6 +948,7 @@ int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) + efx->txq_entries = old_txq_entries; + for (i = 0; i < efx->n_channels; i++) + swap(efx->channel[i], other_channel[i]); ++ efx_ptp_update_channel(efx, ptp_channel); + goto out; + } + +diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c +index f0ef515e2ade..4625f85acab2 100644 +--- a/drivers/net/ethernet/sfc/ptp.c ++++ b/drivers/net/ethernet/sfc/ptp.c +@@ -45,6 +45,7 @@ + #include "farch_regs.h" + #include "tx.h" + #include "nic.h" /* indirectly includes ptp.h */ ++#include "efx_channels.h" + + /* Maximum number of events expected to make up a PTP event */ + #define MAX_EVENT_FRAGS 3 +@@ -541,6 +542,12 @@ struct efx_channel *efx_ptp_channel(struct efx_nic *efx) + return efx->ptp_data ? efx->ptp_data->channel : NULL; + } + ++void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel *channel) ++{ ++ if (efx->ptp_data) ++ efx->ptp_data->channel = channel; ++} ++ + static u32 last_sync_timestamp_major(struct efx_nic *efx) + { + struct efx_channel *channel = efx_ptp_channel(efx); +@@ -1443,6 +1450,11 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel) + int rc = 0; + unsigned int pos; + ++ if (efx->ptp_data) { ++ efx->ptp_data->channel = channel; ++ return 0; ++ } ++ + ptp = kzalloc(sizeof(struct efx_ptp_data), GFP_KERNEL); + efx->ptp_data = ptp; + if (!efx->ptp_data) +@@ -2176,7 +2188,7 @@ static const struct efx_channel_type efx_ptp_channel_type = { + .pre_probe = efx_ptp_probe_channel, + .post_remove = efx_ptp_remove_channel, + .get_name = efx_ptp_get_channel_name, +- /* no copy operation; there is no need to reallocate this channel */ ++ .copy = efx_copy_channel, + .receive_skb = efx_ptp_rx, + .want_txqs = efx_ptp_want_txqs, + .keep_eventq = false, +diff --git a/drivers/net/ethernet/sfc/ptp.h b/drivers/net/ethernet/sfc/ptp.h +index 9855e8c9e544..7b1ef7002b3f 100644 +--- a/drivers/net/ethernet/sfc/ptp.h ++++ b/drivers/net/ethernet/sfc/ptp.h +@@ -16,6 +16,7 @@ struct ethtool_ts_info; + int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel); + void efx_ptp_defer_probe_with_channel(struct efx_nic *efx); + struct efx_channel *efx_ptp_channel(struct efx_nic *efx); ++void efx_ptp_update_channel(struct efx_nic *efx, struct efx_channel *channel); + void efx_ptp_remove(struct efx_nic *efx); + int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr); + int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr); +-- +2.35.1 + diff --git a/queue-5.17/net-sfp-add-tx-fault-workaround-for-huawei-ma5671a-s.patch b/queue-5.17/net-sfp-add-tx-fault-workaround-for-huawei-ma5671a-s.patch new file mode 100644 index 00000000000..2b7318fe6ca --- /dev/null +++ b/queue-5.17/net-sfp-add-tx-fault-workaround-for-huawei-ma5671a-s.patch @@ -0,0 +1,68 @@ +From 553de5d6dabbdecb7fec727514d41804465be9ec Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 May 2022 23:33:15 +0100 +Subject: net: sfp: Add tx-fault workaround for Huawei MA5671A SFP ONT + +From: Matthew Hagan + +[ Upstream commit 2069624dac19d62c558bb6468fe03678553ab01d ] + +As noted elsewhere, various GPON SFP modules exhibit non-standard +TX-fault behaviour. In the tested case, the Huawei MA5671A, when used +in combination with a Marvell mv88e6085 switch, was found to +persistently assert TX-fault, resulting in the module being disabled. + +This patch adds a quirk to ignore the SFP_F_TX_FAULT state, allowing the +module to function. + +Change from v1: removal of erroneous return statment (Andrew Lunn) + +Signed-off-by: Matthew Hagan +Reviewed-by: Andrew Lunn +Link: https://lore.kernel.org/r/20220502223315.1973376-1-mnhagan88@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/phy/sfp.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c +index 4720b24ca51b..90dfefc1f5f8 100644 +--- a/drivers/net/phy/sfp.c ++++ b/drivers/net/phy/sfp.c +@@ -250,6 +250,7 @@ struct sfp { + struct sfp_eeprom_id id; + unsigned int module_power_mW; + unsigned int module_t_start_up; ++ bool tx_fault_ignore; + + #if IS_ENABLED(CONFIG_HWMON) + struct sfp_diag diag; +@@ -1945,6 +1946,12 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) + else + sfp->module_t_start_up = T_START_UP; + ++ if (!memcmp(id.base.vendor_name, "HUAWEI ", 16) && ++ !memcmp(id.base.vendor_pn, "MA5671A ", 16)) ++ sfp->tx_fault_ignore = true; ++ else ++ sfp->tx_fault_ignore = false; ++ + return 0; + } + +@@ -2397,7 +2404,10 @@ static void sfp_check_state(struct sfp *sfp) + mutex_lock(&sfp->st_mutex); + state = sfp_get_state(sfp); + changed = state ^ sfp->state; +- changed &= SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT; ++ if (sfp->tx_fault_ignore) ++ changed &= SFP_F_PRESENT | SFP_F_LOS; ++ else ++ changed &= SFP_F_PRESENT | SFP_F_LOS | SFP_F_TX_FAULT; + + for (i = 0; i < GPIO_MAX; i++) + if (changed & BIT(i)) +-- +2.35.1 + diff --git a/queue-5.17/net-smc-non-blocking-recvmsg-return-eagain-when-no-d.patch b/queue-5.17/net-smc-non-blocking-recvmsg-return-eagain-when-no-d.patch new file mode 100644 index 00000000000..0ddd5504ea1 --- /dev/null +++ b/queue-5.17/net-smc-non-blocking-recvmsg-return-eagain-when-no-d.patch @@ -0,0 +1,49 @@ +From d1aa34533c0e4375afeeb5a263be51b6525f5b85 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 May 2022 11:08:20 +0800 +Subject: net/smc: non blocking recvmsg() return -EAGAIN when no data and + signal_pending + +From: Guangguan Wang + +[ Upstream commit f3c46e41b32b6266cf60b0985c61748f53bf1c61 ] + +Non blocking sendmsg will return -EAGAIN when any signal pending +and no send space left, while non blocking recvmsg return -EINTR +when signal pending and no data received. This may makes confused. +As TCP returns -EAGAIN in the conditions described above. Align the +behavior of smc with TCP. + +Fixes: 846e344eb722 ("net/smc: add receive timeout check") +Signed-off-by: Guangguan Wang +Reviewed-by: Tony Lu +Acked-by: Karsten Graul +Link: https://lore.kernel.org/r/20220512030820.73848-1-guangguan.wang@linux.alibaba.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/smc/smc_rx.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c +index 51e8eb2933ff..338b9ef806e8 100644 +--- a/net/smc/smc_rx.c ++++ b/net/smc/smc_rx.c +@@ -355,12 +355,12 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, + } + break; + } ++ if (!timeo) ++ return -EAGAIN; + if (signal_pending(current)) { + read_done = sock_intr_errno(timeo); + break; + } +- if (!timeo) +- return -EAGAIN; + } + + if (!smc_rx_data_available(conn)) { +-- +2.35.1 + diff --git a/queue-5.17/netlink-do-not-reset-transport-header-in-netlink_rec.patch b/queue-5.17/netlink-do-not-reset-transport-header-in-netlink_rec.patch new file mode 100644 index 00000000000..c645b76617a --- /dev/null +++ b/queue-5.17/netlink-do-not-reset-transport-header-in-netlink_rec.patch @@ -0,0 +1,76 @@ +From 18cf33c6e0178d24e7b2ae419de57ae843c25e38 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 May 2022 09:19:46 -0700 +Subject: netlink: do not reset transport header in netlink_recvmsg() + +From: Eric Dumazet + +[ Upstream commit d5076fe4049cadef1f040eda4aaa001bb5424225 ] + +netlink_recvmsg() does not need to change transport header. + +If transport header was needed, it should have been reset +by the producer (netlink_dump()), not the consumer(s). + +The following trace probably happened when multiple threads +were using MSG_PEEK. + +BUG: KCSAN: data-race in netlink_recvmsg / netlink_recvmsg + +write to 0xffff88811e9f15b2 of 2 bytes by task 32012 on cpu 1: + skb_reset_transport_header include/linux/skbuff.h:2760 [inline] + netlink_recvmsg+0x1de/0x790 net/netlink/af_netlink.c:1978 + sock_recvmsg_nosec net/socket.c:948 [inline] + sock_recvmsg net/socket.c:966 [inline] + __sys_recvfrom+0x204/0x2c0 net/socket.c:2097 + __do_sys_recvfrom net/socket.c:2115 [inline] + __se_sys_recvfrom net/socket.c:2111 [inline] + __x64_sys_recvfrom+0x74/0x90 net/socket.c:2111 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +write to 0xffff88811e9f15b2 of 2 bytes by task 32005 on cpu 0: + skb_reset_transport_header include/linux/skbuff.h:2760 [inline] + netlink_recvmsg+0x1de/0x790 net/netlink/af_netlink.c:1978 + ____sys_recvmsg+0x162/0x2f0 + ___sys_recvmsg net/socket.c:2674 [inline] + __sys_recvmsg+0x209/0x3f0 net/socket.c:2704 + __do_sys_recvmsg net/socket.c:2714 [inline] + __se_sys_recvmsg net/socket.c:2711 [inline] + __x64_sys_recvmsg+0x42/0x50 net/socket.c:2711 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +value changed: 0xffff -> 0x0000 + +Reported by Kernel Concurrency Sanitizer on: +CPU: 0 PID: 32005 Comm: syz-executor.4 Not tainted 5.18.0-rc1-syzkaller-00328-ge1f700ebd6be-dirty #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Link: https://lore.kernel.org/r/20220505161946.2867638-1-eric.dumazet@gmail.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/netlink/af_netlink.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c +index 05a3795eac8e..73e9c0a9c187 100644 +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -1975,7 +1975,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + copied = len; + } + +- skb_reset_transport_header(data_skb); + err = skb_copy_datagram_msg(data_skb, 0, msg, copied); + + if (msg->msg_name) { +-- +2.35.1 + diff --git a/queue-5.17/nfs-fix-broken-handling-of-the-softreval-mount-optio.patch b/queue-5.17/nfs-fix-broken-handling-of-the-softreval-mount-optio.patch new file mode 100644 index 00000000000..e52352c3205 --- /dev/null +++ b/queue-5.17/nfs-fix-broken-handling-of-the-softreval-mount-optio.patch @@ -0,0 +1,37 @@ +From c0121d9451116460139f6ac1a73c87523f5175ab Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 8 May 2022 15:54:50 +0300 +Subject: nfs: fix broken handling of the softreval mount option + +From: Dan Aloni + +[ Upstream commit 085d16d5f949b64713d5e960d6c9bbf51bc1d511 ] + +Turns out that ever since this mount option was added, passing +`softreval` in NFS mount options cancelled all other flags while not +affecting the underlying flag `NFS_MOUNT_SOFTREVAL`. + +Fixes: c74dfe97c104 ("NFS: Add mount option 'softreval'") +Signed-off-by: Dan Aloni +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + fs/nfs/fs_context.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c +index ea17fa1f31ec..d20891162145 100644 +--- a/fs/nfs/fs_context.c ++++ b/fs/nfs/fs_context.c +@@ -515,7 +515,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, + if (result.negated) + ctx->flags &= ~NFS_MOUNT_SOFTREVAL; + else +- ctx->flags &= NFS_MOUNT_SOFTREVAL; ++ ctx->flags |= NFS_MOUNT_SOFTREVAL; + break; + case Opt_posix: + if (result.negated) +-- +2.35.1 + diff --git a/queue-5.17/platform-surface-aggregator-fix-initialization-order.patch b/queue-5.17/platform-surface-aggregator-fix-initialization-order.patch new file mode 100644 index 00000000000..f945692713d --- /dev/null +++ b/queue-5.17/platform-surface-aggregator-fix-initialization-order.patch @@ -0,0 +1,56 @@ +From 3b456ed48c34ad63fcffc33562a6be241a0c32f1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Apr 2022 21:57:38 +0200 +Subject: platform/surface: aggregator: Fix initialization order when compiling + as builtin module +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Maximilian Luz + +[ Upstream commit 44acfc22c7d055d9c4f8f0974ee28422405b971a ] + +When building the Surface Aggregator Module (SAM) core, registry, and +other SAM client drivers as builtin modules (=y), proper initialization +order is not guaranteed. Due to this, client driver registration +(triggered by device registration in the registry) races against bus +initialization in the core. + +If any attempt is made at registering the device driver before the bus +has been initialized (i.e. if bus initialization fails this race) driver +registration will fail with a message similar to: + + Driver surface_battery was unable to register with bus_type surface_aggregator because the bus was not initialized + +Switch from module_init() to subsys_initcall() to resolve this issue. +Note that the serdev subsystem uses postcore_initcall() so we are still +able to safely register the serdev device driver for the core. + +Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem") +Reported-by: Blaž Hrastnik +Signed-off-by: Maximilian Luz +Link: https://lore.kernel.org/r/20220429195738.535751-1-luzmaximilian@gmail.com +Reviewed-by: Hans de Goede +Signed-off-by: Hans de Goede +Signed-off-by: Sasha Levin +--- + drivers/platform/surface/aggregator/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/platform/surface/aggregator/core.c b/drivers/platform/surface/aggregator/core.c +index d384d36098c2..a62c5dfe42d6 100644 +--- a/drivers/platform/surface/aggregator/core.c ++++ b/drivers/platform/surface/aggregator/core.c +@@ -817,7 +817,7 @@ static int __init ssam_core_init(void) + err_bus: + return status; + } +-module_init(ssam_core_init); ++subsys_initcall(ssam_core_init); + + static void __exit ssam_core_exit(void) + { +-- +2.35.1 + diff --git a/queue-5.17/procfs-prevent-unprivileged-processes-accessing-fdin.patch b/queue-5.17/procfs-prevent-unprivileged-processes-accessing-fdin.patch new file mode 100644 index 00000000000..d4689cddec8 --- /dev/null +++ b/queue-5.17/procfs-prevent-unprivileged-processes-accessing-fdin.patch @@ -0,0 +1,91 @@ +From 81cddf60f8ac33bdbfe21d5cddca0eb05d2c096e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 May 2022 17:34:28 -0700 +Subject: procfs: prevent unprivileged processes accessing fdinfo dir + +From: Kalesh Singh + +[ Upstream commit 1927e498aee1757b3df755a194cbfc5cc0f2b663 ] + +The file permissions on the fdinfo dir from were changed from +S_IRUSR|S_IXUSR to S_IRUGO|S_IXUGO, and a PTRACE_MODE_READ check was added +for opening the fdinfo files [1]. However, the ptrace permission check +was not added to the directory, allowing anyone to get the open FD numbers +by reading the fdinfo directory. + +Add the missing ptrace permission check for opening the fdinfo directory. + +[1] https://lkml.kernel.org/r/20210308170651.919148-1-kaleshsingh@google.com + +Link: https://lkml.kernel.org/r/20210713162008.1056986-1-kaleshsingh@google.com +Fixes: 7bc3fa0172a4 ("procfs: allow reading fdinfo with PTRACE_MODE_READ") +Signed-off-by: Kalesh Singh +Cc: Kees Cook +Cc: Eric W. Biederman +Cc: Christian Brauner +Cc: Suren Baghdasaryan +Cc: Hridya Valsaraju +Cc: Jann Horn +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + fs/proc/fd.c | 23 ++++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +diff --git a/fs/proc/fd.c b/fs/proc/fd.c +index 172c86270b31..913bef0d2a36 100644 +--- a/fs/proc/fd.c ++++ b/fs/proc/fd.c +@@ -72,7 +72,7 @@ static int seq_show(struct seq_file *m, void *v) + return 0; + } + +-static int seq_fdinfo_open(struct inode *inode, struct file *file) ++static int proc_fdinfo_access_allowed(struct inode *inode) + { + bool allowed = false; + struct task_struct *task = get_proc_task(inode); +@@ -86,6 +86,16 @@ static int seq_fdinfo_open(struct inode *inode, struct file *file) + if (!allowed) + return -EACCES; + ++ return 0; ++} ++ ++static int seq_fdinfo_open(struct inode *inode, struct file *file) ++{ ++ int ret = proc_fdinfo_access_allowed(inode); ++ ++ if (ret) ++ return ret; ++ + return single_open(file, seq_show, inode); + } + +@@ -348,12 +358,23 @@ static int proc_readfdinfo(struct file *file, struct dir_context *ctx) + proc_fdinfo_instantiate); + } + ++static int proc_open_fdinfo(struct inode *inode, struct file *file) ++{ ++ int ret = proc_fdinfo_access_allowed(inode); ++ ++ if (ret) ++ return ret; ++ ++ return 0; ++} ++ + const struct inode_operations proc_fdinfo_inode_operations = { + .lookup = proc_lookupfdinfo, + .setattr = proc_setattr, + }; + + const struct file_operations proc_fdinfo_operations = { ++ .open = proc_open_fdinfo, + .read = generic_read_dir, + .iterate_shared = proc_readfdinfo, + .llseek = generic_file_llseek, +-- +2.35.1 + diff --git a/queue-5.17/rdma-irdma-fix-deadlock-in-irdma_cleanup_cm_core.patch b/queue-5.17/rdma-irdma-fix-deadlock-in-irdma_cleanup_cm_core.patch new file mode 100644 index 00000000000..8c96f177eb5 --- /dev/null +++ b/queue-5.17/rdma-irdma-fix-deadlock-in-irdma_cleanup_cm_core.patch @@ -0,0 +1,62 @@ +From c6acff51476b39523ed0a12f59b79ee4327fb453 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 18 Apr 2022 23:33:22 +0800 +Subject: RDMA/irdma: Fix deadlock in irdma_cleanup_cm_core() + +From: Duoming Zhou + +[ Upstream commit 679ab61bf5f5f519377d812afb4fb93634782c74 ] + +There is a deadlock in irdma_cleanup_cm_core(), which is shown below: + + (Thread 1) | (Thread 2) + | irdma_schedule_cm_timer() +irdma_cleanup_cm_core() | add_timer() + spin_lock_irqsave() //(1) | (wait a time) + ... | irdma_cm_timer_tick() + del_timer_sync() | spin_lock_irqsave() //(2) + (wait timer to stop) | ... + +We hold cm_core->ht_lock in position (1) of thread 1 and use +del_timer_sync() to wait timer to stop, but timer handler also need +cm_core->ht_lock in position (2) of thread 2. As a result, +irdma_cleanup_cm_core() will block forever. + +This patch removes the check of timer_pending() in +irdma_cleanup_cm_core(), because the del_timer_sync() function will just +return directly if there isn't a pending timer. As a result, the lock is +redundant, because there is no resource it could protect. + +Link: https://lore.kernel.org/r/20220418153322.42524-1-duoming@zju.edu.cn +Signed-off-by: Duoming Zhou +Reviewed-by: Shiraz Saleem +Signed-off-by: Jason Gunthorpe +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/irdma/cm.c | 7 +------ + 1 file changed, 1 insertion(+), 6 deletions(-) + +diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c +index 082a3ddb0fa3..632f65e53b63 100644 +--- a/drivers/infiniband/hw/irdma/cm.c ++++ b/drivers/infiniband/hw/irdma/cm.c +@@ -3242,15 +3242,10 @@ enum irdma_status_code irdma_setup_cm_core(struct irdma_device *iwdev, + */ + void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core) + { +- unsigned long flags; +- + if (!cm_core) + return; + +- spin_lock_irqsave(&cm_core->ht_lock, flags); +- if (timer_pending(&cm_core->tcp_timer)) +- del_timer_sync(&cm_core->tcp_timer); +- spin_unlock_irqrestore(&cm_core->ht_lock, flags); ++ del_timer_sync(&cm_core->tcp_timer); + + destroy_workqueue(cm_core->event_wq); + cm_core->dev->ws_reset(&cm_core->iwdev->vsi); +-- +2.35.1 + diff --git a/queue-5.17/s390-ctcm-fix-potential-memory-leak.patch b/queue-5.17/s390-ctcm-fix-potential-memory-leak.patch new file mode 100644 index 00000000000..48de6f9eb4e --- /dev/null +++ b/queue-5.17/s390-ctcm-fix-potential-memory-leak.patch @@ -0,0 +1,67 @@ +From 8bc6df150ddee72dcc713231107ebcc4820ca940 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 May 2022 09:05:07 +0200 +Subject: s390/ctcm: fix potential memory leak + +From: Alexandra Winter + +[ Upstream commit 0c0b20587b9f25a2ad14db7f80ebe49bdf29920a ] + +smatch complains about +drivers/s390/net/ctcm_mpc.c:1210 ctcmpc_unpack_skb() warn: possible memory leak of 'mpcginfo' + +mpc_action_discontact() did not free mpcginfo. Consolidate the freeing in +ctcmpc_unpack_skb(). + +Fixes: 293d984f0e36 ("ctcm: infrastructure for replaced ctc driver") +Signed-off-by: Alexandra Winter +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/s390/net/ctcm_mpc.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/drivers/s390/net/ctcm_mpc.c b/drivers/s390/net/ctcm_mpc.c +index 88abfb5e8045..8ac213a55141 100644 +--- a/drivers/s390/net/ctcm_mpc.c ++++ b/drivers/s390/net/ctcm_mpc.c +@@ -626,8 +626,6 @@ static void mpc_rcvd_sweep_resp(struct mpcg_info *mpcginfo) + ctcm_clear_busy_do(dev); + } + +- kfree(mpcginfo); +- + return; + + } +@@ -1192,10 +1190,10 @@ static void ctcmpc_unpack_skb(struct channel *ch, struct sk_buff *pskb) + CTCM_FUNTAIL, dev->name); + priv->stats.rx_dropped++; + /* mpcginfo only used for non-data transfers */ +- kfree(mpcginfo); + if (do_debug_data) + ctcmpc_dump_skb(pskb, -8); + } ++ kfree(mpcginfo); + } + done: + +@@ -1977,7 +1975,6 @@ static void mpc_action_rcvd_xid0(fsm_instance *fsm, int event, void *arg) + } + break; + } +- kfree(mpcginfo); + + CTCM_PR_DEBUG("ctcmpc:%s() %s xid2:%i xid7:%i xidt_p2:%i \n", + __func__, ch->id, grp->outstanding_xid2, +@@ -2038,7 +2035,6 @@ static void mpc_action_rcvd_xid7(fsm_instance *fsm, int event, void *arg) + mpc_validate_xid(mpcginfo); + break; + } +- kfree(mpcginfo); + return; + } + +-- +2.35.1 + diff --git a/queue-5.17/s390-ctcm-fix-variable-dereferenced-before-check.patch b/queue-5.17/s390-ctcm-fix-variable-dereferenced-before-check.patch new file mode 100644 index 00000000000..d630b7752ae --- /dev/null +++ b/queue-5.17/s390-ctcm-fix-variable-dereferenced-before-check.patch @@ -0,0 +1,44 @@ +From a3e9d2db9cb6aebd9790cd51e2eb554713b4f891 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 May 2022 09:05:06 +0200 +Subject: s390/ctcm: fix variable dereferenced before check + +From: Alexandra Winter + +[ Upstream commit 2c50c6867c85afee6f2b3bcbc50fc9d0083d1343 ] + +Found by cppcheck and smatch. +smatch complains about +drivers/s390/net/ctcm_sysfs.c:43 ctcm_buffer_write() warn: variable dereferenced before check 'priv' (see line 42) + +Fixes: 3c09e2647b5e ("ctcm: rename READ/WRITE defines to avoid redefinitions") +Reported-by: Colin Ian King +Signed-off-by: Alexandra Winter +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/s390/net/ctcm_sysfs.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/s390/net/ctcm_sysfs.c b/drivers/s390/net/ctcm_sysfs.c +index ded1930a00b2..e3813a7aa5e6 100644 +--- a/drivers/s390/net/ctcm_sysfs.c ++++ b/drivers/s390/net/ctcm_sysfs.c +@@ -39,11 +39,12 @@ static ssize_t ctcm_buffer_write(struct device *dev, + struct ctcm_priv *priv = dev_get_drvdata(dev); + int rc; + +- ndev = priv->channel[CTCM_READ]->netdev; +- if (!(priv && priv->channel[CTCM_READ] && ndev)) { ++ if (!(priv && priv->channel[CTCM_READ] && ++ priv->channel[CTCM_READ]->netdev)) { + CTCM_DBF_TEXT(SETUP, CTC_DBF_ERROR, "bfnondev"); + return -ENODEV; + } ++ ndev = priv->channel[CTCM_READ]->netdev; + + rc = kstrtouint(buf, 0, &bs1); + if (rc) +-- +2.35.1 + diff --git a/queue-5.17/s390-disable-warray-bounds.patch b/queue-5.17/s390-disable-warray-bounds.patch new file mode 100644 index 00000000000..74f377dd32d --- /dev/null +++ b/queue-5.17/s390-disable-warray-bounds.patch @@ -0,0 +1,54 @@ +From 340eb2464b43e84c208de38ac8173f1a5b120dee Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 Apr 2022 14:17:42 +0200 +Subject: s390: disable -Warray-bounds + +From: Sven Schnelle + +[ Upstream commit 8b202ee218395319aec1ef44f72043e1fbaccdd6 ] + +gcc-12 shows a lot of array bound warnings on s390. This is caused +by the S390_lowcore macro which uses a hardcoded address of 0. + +Wrapping that with absolute_pointer() works, but gcc no longer knows +that a 12 bit displacement is sufficient to access lowcore. So it +emits instructions like 'lghi %r1,0; l %rx,xxx(%r1)' instead of a +single load/store instruction. As s390 stores variables often +read/written in lowcore, this is considered problematic. Therefore +disable -Warray-bounds on s390 for gcc-12 for the time being, until +there is a better solution. + +Signed-off-by: Sven Schnelle +Link: https://lore.kernel.org/r/yt9dzgkelelc.fsf@linux.ibm.com +Link: https://lore.kernel.org/r/20220422134308.1613610-1-svens@linux.ibm.com +Link: https://lore.kernel.org/r/20220425121742.3222133-1-svens@linux.ibm.com +Signed-off-by: Heiko Carstens +Signed-off-by: Sasha Levin +--- + arch/s390/Makefile | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/arch/s390/Makefile b/arch/s390/Makefile +index 609e3697324b..6e42252214dd 100644 +--- a/arch/s390/Makefile ++++ b/arch/s390/Makefile +@@ -30,6 +30,16 @@ KBUILD_CFLAGS_DECOMPRESSOR += -fno-stack-protector + KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member) + KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) + KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) ++ ++ifdef CONFIG_CC_IS_GCC ++ ifeq ($(call cc-ifversion, -ge, 1200, y), y) ++ ifeq ($(call cc-ifversion, -lt, 1300, y), y) ++ KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds) ++ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, array-bounds) ++ endif ++ endif ++endif ++ + UTS_MACHINE := s390x + STACK_SIZE := $(if $(CONFIG_KASAN),65536,16384) + CHECKFLAGS += -D__s390__ -D__s390x__ +-- +2.35.1 + diff --git a/queue-5.17/s390-lcs-fix-variable-dereferenced-before-check.patch b/queue-5.17/s390-lcs-fix-variable-dereferenced-before-check.patch new file mode 100644 index 00000000000..9833234e483 --- /dev/null +++ b/queue-5.17/s390-lcs-fix-variable-dereferenced-before-check.patch @@ -0,0 +1,42 @@ +From ae9cf3f6e8fc01bdc588303c4dc8631f4e375088 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 May 2022 09:05:08 +0200 +Subject: s390/lcs: fix variable dereferenced before check + +From: Alexandra Winter + +[ Upstream commit 671bb35c8e746439f0ed70815968f9a4f20a8deb ] + +smatch complains about +drivers/s390/net/lcs.c:1741 lcs_get_control() warn: variable dereferenced before check 'card->dev' (see line 1739) + +Fixes: 27eb5ac8f015 ("[PATCH] s390: lcs driver bug fixes and improvements [1/2]") +Signed-off-by: Alexandra Winter +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/s390/net/lcs.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c +index a61d38a1b4ed..66c2893badad 100644 +--- a/drivers/s390/net/lcs.c ++++ b/drivers/s390/net/lcs.c +@@ -1736,10 +1736,11 @@ lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd) + lcs_schedule_recovery(card); + break; + case LCS_CMD_STOPLAN: +- pr_warn("Stoplan for %s initiated by LGW\n", +- card->dev->name); +- if (card->dev) ++ if (card->dev) { ++ pr_warn("Stoplan for %s initiated by LGW\n", ++ card->dev->name); + netif_carrier_off(card->dev); ++ } + break; + default: + LCS_DBF_TEXT(5, trace, "noLGWcmd"); +-- +2.35.1 + diff --git a/queue-5.17/secure_seq-use-the-64-bits-of-the-siphash-for-port-o.patch b/queue-5.17/secure_seq-use-the-64-bits-of-the-siphash-for-port-o.patch new file mode 100644 index 00000000000..1f8d7415c9e --- /dev/null +++ b/queue-5.17/secure_seq-use-the-64-bits-of-the-siphash-for-port-o.patch @@ -0,0 +1,151 @@ +From 447f0806a694ea4b5465994770c86d08ee51d37a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 May 2022 10:46:08 +0200 +Subject: secure_seq: use the 64 bits of the siphash for port offset + calculation + +From: Willy Tarreau + +[ Upstream commit b2d057560b8107c633b39aabe517ff9d93f285e3 ] + +SipHash replaced MD5 in secure_ipv{4,6}_port_ephemeral() via commit +7cd23e5300c1 ("secure_seq: use SipHash in place of MD5"), but the output +remained truncated to 32-bit only. In order to exploit more bits from the +hash, let's make the functions return the full 64-bit of siphash_3u32(). +We also make sure the port offset calculation in __inet_hash_connect() +remains done on 32-bit to avoid the need for div_u64_rem() and an extra +cost on 32-bit systems. + +Cc: Jason A. Donenfeld +Cc: Moshe Kol +Cc: Yossi Gilad +Cc: Amit Klein +Reviewed-by: Eric Dumazet +Signed-off-by: Willy Tarreau +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + include/net/inet_hashtables.h | 2 +- + include/net/secure_seq.h | 4 ++-- + net/core/secure_seq.c | 4 ++-- + net/ipv4/inet_hashtables.c | 10 ++++++---- + net/ipv6/inet6_hashtables.c | 4 ++-- + 5 files changed, 13 insertions(+), 11 deletions(-) + +diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h +index f72ec113ae56..98e1ec1a14f0 100644 +--- a/include/net/inet_hashtables.h ++++ b/include/net/inet_hashtables.h +@@ -425,7 +425,7 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr) + } + + int __inet_hash_connect(struct inet_timewait_death_row *death_row, +- struct sock *sk, u32 port_offset, ++ struct sock *sk, u64 port_offset, + int (*check_established)(struct inet_timewait_death_row *, + struct sock *, __u16, + struct inet_timewait_sock **)); +diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h +index d7d2495f83c2..dac91aa38c5a 100644 +--- a/include/net/secure_seq.h ++++ b/include/net/secure_seq.h +@@ -4,8 +4,8 @@ + + #include + +-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); +-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, ++u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); ++u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport); + u32 secure_tcp_seq(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); +diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c +index 9b8443774449..55aa5cc258e3 100644 +--- a/net/core/secure_seq.c ++++ b/net/core/secure_seq.c +@@ -94,7 +94,7 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, + } + EXPORT_SYMBOL(secure_tcpv6_seq); + +-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, ++u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport) + { + const struct { +@@ -142,7 +142,7 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr, + } + EXPORT_SYMBOL_GPL(secure_tcp_seq); + +-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) ++u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) + { + net_secret_init(); + return siphash_3u32((__force u32)saddr, (__force u32)daddr, +diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c +index 17440840a791..9d24d9319f3d 100644 +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -504,7 +504,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, + return -EADDRNOTAVAIL; + } + +-static u32 inet_sk_port_offset(const struct sock *sk) ++static u64 inet_sk_port_offset(const struct sock *sk) + { + const struct inet_sock *inet = inet_sk(sk); + +@@ -734,7 +734,7 @@ EXPORT_SYMBOL_GPL(inet_unhash); + static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT]; + + int __inet_hash_connect(struct inet_timewait_death_row *death_row, +- struct sock *sk, u32 port_offset, ++ struct sock *sk, u64 port_offset, + int (*check_established)(struct inet_timewait_death_row *, + struct sock *, __u16, struct inet_timewait_sock **)) + { +@@ -777,7 +777,9 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, + net_get_random_once(table_perturb, sizeof(table_perturb)); + index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); + +- offset = (READ_ONCE(table_perturb[index]) + port_offset) % remaining; ++ offset = READ_ONCE(table_perturb[index]) + port_offset; ++ offset %= remaining; ++ + /* In first pass we try ports of @low parity. + * inet_csk_get_port() does the opposite choice. + */ +@@ -859,7 +861,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, + int inet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk) + { +- u32 port_offset = 0; ++ u64 port_offset = 0; + + if (!inet_sk(sk)->inet_num) + port_offset = inet_sk_port_offset(sk); +diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c +index 4740afecf7c6..32ccac10bd62 100644 +--- a/net/ipv6/inet6_hashtables.c ++++ b/net/ipv6/inet6_hashtables.c +@@ -308,7 +308,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, + return -EADDRNOTAVAIL; + } + +-static u32 inet6_sk_port_offset(const struct sock *sk) ++static u64 inet6_sk_port_offset(const struct sock *sk) + { + const struct inet_sock *inet = inet_sk(sk); + +@@ -320,7 +320,7 @@ static u32 inet6_sk_port_offset(const struct sock *sk) + int inet6_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk) + { +- u32 port_offset = 0; ++ u64 port_offset = 0; + + if (!inet_sk(sk)->inet_num) + port_offset = inet6_sk_port_offset(sk); +-- +2.35.1 + diff --git a/queue-5.17/selftests-vm-makefile-rename-targets-to-vmtargets.patch b/queue-5.17/selftests-vm-makefile-rename-targets-to-vmtargets.patch new file mode 100644 index 00000000000..a1ec5f17363 --- /dev/null +++ b/queue-5.17/selftests-vm-makefile-rename-targets-to-vmtargets.patch @@ -0,0 +1,84 @@ +From 55dabb261f13eaf506419d36ced2ca89d1cdfdad Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 May 2022 17:34:29 -0700 +Subject: selftests: vm: Makefile: rename TARGETS to VMTARGETS + +From: Joel Savitz + +[ Upstream commit 41c240099fe09377b6b9f8272e45d2267c843d3e ] + +The tools/testing/selftests/vm/Makefile uses the variable TARGETS +internally to generate a list of platform-specific binary build targets +suffixed with _{32,64}. When building the selftests using its own +Makefile directly, such as via the following command run in a kernel tree: + +One receives an error such as the following: + +make: Entering directory '/root/linux/tools/testing/selftests' +make --no-builtin-rules ARCH=x86 -C ../../.. headers_install +make[1]: Entering directory '/root/linux' + INSTALL ./usr/include +make[1]: Leaving directory '/root/linux' +make[1]: Entering directory '/root/linux/tools/testing/selftests/vm' +make[1]: *** No rule to make target 'vm.c', needed by '/root/linux/tools/testing/selftests/vm/vm_64'. Stop. +make[1]: Leaving directory '/root/linux/tools/testing/selftests/vm' +make: *** [Makefile:175: all] Error 2 +make: Leaving directory '/root/linux/tools/testing/selftests' + +The TARGETS variable passed to tools/testing/selftests/Makefile collides +with the TARGETS used in tools/testing/selftests/vm/Makefile, so rename +the latter to VMTARGETS, eliminating the collision with no functional +change. + +Link: https://lkml.kernel.org/r/20220504213454.1282532-1-jsavitz@redhat.com +Fixes: f21fda8f6453 ("selftests: vm: pkeys: fix multilib builds for x86") +Signed-off-by: Joel Savitz +Acked-by: Nico Pache +Cc: Joel Savitz +Cc: Shuah Khan +Cc: Sandipan Das +Cc: Dave Hansen +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/vm/Makefile | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile +index 1530c3e0242e..259df83ecd2e 100644 +--- a/tools/testing/selftests/vm/Makefile ++++ b/tools/testing/selftests/vm/Makefile +@@ -55,9 +55,9 @@ CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_prog + CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c) + CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie) + +-TARGETS := protection_keys +-BINARIES_32 := $(TARGETS:%=%_32) +-BINARIES_64 := $(TARGETS:%=%_64) ++VMTARGETS := protection_keys ++BINARIES_32 := $(VMTARGETS:%=%_32) ++BINARIES_64 := $(VMTARGETS:%=%_64) + + ifeq ($(CAN_BUILD_WITH_NOPIE),1) + CFLAGS += -no-pie +@@ -110,7 +110,7 @@ $(BINARIES_32): CFLAGS += -m32 -mxsave + $(BINARIES_32): LDLIBS += -lrt -ldl -lm + $(BINARIES_32): $(OUTPUT)/%_32: %.c + $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ +-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t)))) ++$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-32,$(t)))) + endif + + ifeq ($(CAN_BUILD_X86_64),1) +@@ -118,7 +118,7 @@ $(BINARIES_64): CFLAGS += -m64 -mxsave + $(BINARIES_64): LDLIBS += -lrt -ldl + $(BINARIES_64): $(OUTPUT)/%_64: %.c + $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@ +-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t)))) ++$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-64,$(t)))) + endif + + # x86_64 users should be encouraged to install 32-bit libraries +-- +2.35.1 + diff --git a/queue-5.17/series b/queue-5.17/series new file mode 100644 index 00000000000..acb4d69d2d0 --- /dev/null +++ b/queue-5.17/series @@ -0,0 +1,68 @@ +batman-adv-don-t-skb_split-skbuffs-with-frag_list.patch +iwlwifi-iwl-dbg-use-del_timer_sync-before-freeing.patch +hwmon-tmp401-add-of-device-id-table.patch +mac80211-reset-mbssid-parameters-upon-connection.patch +net-rds-use-maybe_get_net-when-acquiring-refcount-on.patch +net-fix-features-skip-in-for_each_netdev_feature.patch +net-mscc-ocelot-fix-last-vcap-is1-is2-filter-persist.patch +net-mscc-ocelot-fix-vcap-is2-filters-matching-on-bot.patch +net-mscc-ocelot-restrict-tc-trap-actions-to-vcap-is2.patch +net-mscc-ocelot-avoid-corrupting-hardware-counters-w.patch +fbdev-simplefb-cleanup-fb_info-in-.fb_destroy-rather.patch +fbdev-efifb-cleanup-fb_info-in-.fb_destroy-rather-th.patch +fbdev-vesafb-cleanup-fb_info-in-.fb_destroy-rather-t.patch +platform-surface-aggregator-fix-initialization-order.patch +ice-fix-race-during-aux-device-un-plugging.patch +ice-clear-stale-tx-queue-settings-before-configuring.patch +ice-fix-ptp-stale-tx-timestamps-cleanup.patch +ipv4-drop-dst-in-multicast-routing-path.patch +drm-nouveau-fix-a-potential-theorical-leak-in-nouvea.patch +netlink-do-not-reset-transport-header-in-netlink_rec.patch +net-chelsio-cxgb4-avoid-potential-negative-array-off.patch +fbdev-efifb-fix-a-use-after-free-due-early-fb_info-c.patch +net-sfc-fix-memory-leak-due-to-ptp-channel.patch +fanotify-do-not-allow-setting-dirent-events-in-mask-.patch +mac80211_hwsim-call-ieee80211_tx_prepare_skb-under-r.patch +nfs-fix-broken-handling-of-the-softreval-mount-optio.patch +ionic-fix-missing-pci_release_regions-on-error-in-io.patch +dim-initialize-all-struct-fields.patch +hwmon-ltq-cputemp-restrict-it-to-soc_xway.patch +procfs-prevent-unprivileged-processes-accessing-fdin.patch +selftests-vm-makefile-rename-targets-to-vmtargets.patch +net-dsa-flush-switchdev-workqueue-on-bridge-join-err.patch +arm64-vdso-fix-makefile-dependency-on-vdso.so.patch +virtio-fix-virtio-transitional-ids.patch +s390-ctcm-fix-variable-dereferenced-before-check.patch +s390-ctcm-fix-potential-memory-leak.patch +s390-lcs-fix-variable-dereferenced-before-check.patch +net-sched-act_pedit-really-ensure-the-skb-is-writabl.patch +net-ethernet-mediatek-ppe-fix-wrong-size-passed-to-m.patch +net-bcmgenet-check-for-wake-on-lan-interrupt-probe-d.patch +drm-vc4-hdmi-fix-build-error-for-implicit-function-d.patch +mlxsw-avoid-warning-during-ip6gre-device-removal.patch +net-dsa-bcm_sf2-fix-wake-on-lan-with-mac_link_down.patch +net-smc-non-blocking-recvmsg-return-eagain-when-no-d.patch +net-sfc-ef10-fix-memory-leak-in-efx_ef10_mtd_probe.patch +tls-fix-context-leak-on-tls_device_down.patch +drm-vmwgfx-fix-fencing-on-svgav3.patch +gfs2-fix-filesystem-block-deallocation-for-short-wri.patch +hwmon-asus_wmi_sensors-fix-crosshair-vi-hero-name.patch +hwmon-f71882fg-fix-negative-temperature.patch +rdma-irdma-fix-deadlock-in-irdma_cleanup_cm_core.patch +iommu-arm-smmu-disable-large-page-mappings-for-nvidi.patch +asoc-max98090-reject-invalid-values-in-custom-contro.patch +asoc-max98090-generate-notifications-on-changes-for-.patch +asoc-ops-validate-input-values-in-snd_soc_put_volsw_.patch +s390-disable-warray-bounds.patch +asoc-sof-fix-null-pointer-exception-in-sof_pci_probe.patch +io_uring-assign-non-fixed-early-for-async-work.patch +net-emaclite-don-t-advertise-1000base-t-and-do-auto-.patch +net-sfp-add-tx-fault-workaround-for-huawei-ma5671a-s.patch +secure_seq-use-the-64-bits-of-the-siphash-for-port-o.patch +tcp-use-different-parts-of-the-port_offset-for-index.patch +tcp-resalt-the-secret-every-10-seconds.patch +tcp-add-small-random-increments-to-the-source-port.patch +tcp-dynamically-allocate-the-perturb-table-used-by-s.patch +tcp-increase-source-port-perturb-table-to-2-16.patch +tcp-drop-the-hash_32-part-from-the-index-calculation.patch +block-do-not-call-folio_next-on-an-unreferenced-foli.patch diff --git a/queue-5.17/tcp-add-small-random-increments-to-the-source-port.patch b/queue-5.17/tcp-add-small-random-increments-to-the-source-port.patch new file mode 100644 index 00000000000..6b8c25add87 --- /dev/null +++ b/queue-5.17/tcp-add-small-random-increments-to-the-source-port.patch @@ -0,0 +1,57 @@ +From 897367fefd921985700d0c65342d4c1d621fd3b6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 May 2022 10:46:11 +0200 +Subject: tcp: add small random increments to the source port + +From: Willy Tarreau + +[ Upstream commit ca7af0402550f9a0b3316d5f1c30904e42ed257d ] + +Here we're randomly adding between 0 and 7 random increments to the +selected source port in order to add some noise in the source port +selection that will make the next port less predictable. + +With the default port range of 32768-60999 this means a worst case +reuse scenario of 14116/8=1764 connections between two consecutive +uses of the same port, with an average of 14116/4.5=3137. This code +was stressed at more than 800000 connections per second to a fixed +target with all connections closed by the client using RSTs (worst +condition) and only 2 connections failed among 13 billion, despite +the hash being reseeded every 10 seconds, indicating a perfectly +safe situation. + +Cc: Moshe Kol +Cc: Yossi Gilad +Cc: Amit Klein +Reviewed-by: Eric Dumazet +Signed-off-by: Willy Tarreau +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/inet_hashtables.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c +index 29c701cd8312..63bb4902f018 100644 +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -833,11 +833,12 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, + return -EADDRNOTAVAIL; + + ok: +- /* If our first attempt found a candidate, skip next candidate +- * in 1/16 of cases to add some noise. ++ /* Here we want to add a little bit of randomness to the next source ++ * port that will be chosen. We use a max() with a random here so that ++ * on low contention the randomness is maximal and on high contention ++ * it may be inexistent. + */ +- if (!i && !(prandom_u32() % 16)) +- i = 2; ++ i = max_t(int, i, (prandom_u32() & 7) * 2); + WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2); + + /* Head lock still held and bh's disabled */ +-- +2.35.1 + diff --git a/queue-5.17/tcp-drop-the-hash_32-part-from-the-index-calculation.patch b/queue-5.17/tcp-drop-the-hash_32-part-from-the-index-calculation.patch new file mode 100644 index 00000000000..9b8881b3dc1 --- /dev/null +++ b/queue-5.17/tcp-drop-the-hash_32-part-from-the-index-calculation.patch @@ -0,0 +1,41 @@ +From 1df880f87c1ce4ecb4cd4e7565791b7d261ab4f2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 May 2022 10:46:14 +0200 +Subject: tcp: drop the hash_32() part from the index calculation + +From: Willy Tarreau + +[ Upstream commit e8161345ddbb66e449abde10d2fdce93f867eba9 ] + +In commit 190cc82489f4 ("tcp: change source port randomizarion at +connect() time"), the table_perturb[] array was introduced and an +index was taken from the port_offset via hash_32(). But it turns +out that hash_32() performs a multiplication while the input here +comes from the output of SipHash in secure_seq, that is well +distributed enough to avoid the need for yet another hash. + +Suggested-by: Amit Klein +Reviewed-by: Eric Dumazet +Signed-off-by: Willy Tarreau +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/inet_hashtables.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c +index cc5f66328b47..a5d57fa679ca 100644 +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -778,7 +778,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, + + net_get_random_once(table_perturb, + INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); +- index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); ++ index = port_offset & (INET_TABLE_PERTURB_SIZE - 1); + + offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); + offset %= remaining; +-- +2.35.1 + diff --git a/queue-5.17/tcp-dynamically-allocate-the-perturb-table-used-by-s.patch b/queue-5.17/tcp-dynamically-allocate-the-perturb-table-used-by-s.patch new file mode 100644 index 00000000000..6a92f2e9e54 --- /dev/null +++ b/queue-5.17/tcp-dynamically-allocate-the-perturb-table-used-by-s.patch @@ -0,0 +1,65 @@ +From 82c77915e295de29671b8160438253a27254f13a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 May 2022 10:46:12 +0200 +Subject: tcp: dynamically allocate the perturb table used by source ports + +From: Willy Tarreau + +[ Upstream commit e9261476184be1abd486c9434164b2acbe0ed6c2 ] + +We'll need to further increase the size of this table and it's likely +that at some point its size will not be suitable anymore for a static +table. Let's allocate it on boot from inet_hashinfo2_init(), which is +called from tcp_init(). + +Cc: Moshe Kol +Cc: Yossi Gilad +Cc: Amit Klein +Reviewed-by: Eric Dumazet +Signed-off-by: Willy Tarreau +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/inet_hashtables.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c +index 63bb4902f018..48ca07853068 100644 +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -731,7 +731,8 @@ EXPORT_SYMBOL_GPL(inet_unhash); + * privacy, this only consumes 1 KB of kernel memory. + */ + #define INET_TABLE_PERTURB_SHIFT 8 +-static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT]; ++#define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT) ++static u32 *table_perturb; + + int __inet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk, u64 port_offset, +@@ -774,7 +775,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, + if (likely(remaining > 1)) + remaining &= ~1U; + +- net_get_random_once(table_perturb, sizeof(table_perturb)); ++ net_get_random_once(table_perturb, ++ INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); + index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); + + offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); +@@ -912,6 +914,12 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, + low_limit, + high_limit); + init_hashinfo_lhash2(h); ++ ++ /* this one is used for source ports of outgoing connections */ ++ table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE, ++ sizeof(*table_perturb), GFP_KERNEL); ++ if (!table_perturb) ++ panic("TCP: failed to alloc table_perturb"); + } + + int inet_hashinfo2_init_mod(struct inet_hashinfo *h) +-- +2.35.1 + diff --git a/queue-5.17/tcp-increase-source-port-perturb-table-to-2-16.patch b/queue-5.17/tcp-increase-source-port-perturb-table-to-2-16.patch new file mode 100644 index 00000000000..d44648ec3c6 --- /dev/null +++ b/queue-5.17/tcp-increase-source-port-perturb-table-to-2-16.patch @@ -0,0 +1,64 @@ +From 97d8501f5d986cb8ab0f441349bc981cd12c233b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 May 2022 10:46:13 +0200 +Subject: tcp: increase source port perturb table to 2^16 + +From: Willy Tarreau + +[ Upstream commit 4c2c8f03a5ab7cb04ec64724d7d176d00bcc91e5 ] + +Moshe Kol, Amit Klein, and Yossi Gilad reported being able to accurately +identify a client by forcing it to emit only 40 times more connections +than there are entries in the table_perturb[] table. The previous two +improvements consisting in resalting the secret every 10s and adding +randomness to each port selection only slightly improved the situation, +and the current value of 2^8 was too small as it's not very difficult +to make a client emit 10k connections in less than 10 seconds. + +Thus we're increasing the perturb table from 2^8 to 2^16 so that the +same precision now requires 2.6M connections, which is more difficult in +this time frame and harder to hide as a background activity. The impact +is that the table now uses 256 kB instead of 1 kB, which could mostly +affect devices making frequent outgoing connections. However such +components usually target a small set of destinations (load balancers, +database clients, perf assessment tools), and in practice only a few +entries will be visited, like before. + +A live test at 1 million connections per second showed no performance +difference from the previous value. + +Reported-by: Moshe Kol +Reported-by: Yossi Gilad +Reported-by: Amit Klein +Reviewed-by: Eric Dumazet +Signed-off-by: Willy Tarreau +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/inet_hashtables.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c +index 48ca07853068..cc5f66328b47 100644 +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -726,11 +726,12 @@ EXPORT_SYMBOL_GPL(inet_unhash); + * Note that we use 32bit integers (vs RFC 'short integers') + * because 2^16 is not a multiple of num_ephemeral and this + * property might be used by clever attacker. +- * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, +- * we use 256 instead to really give more isolation and +- * privacy, this only consumes 1 KB of kernel memory. ++ * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though ++ * attacks were since demonstrated, thus we use 65536 instead to really ++ * give more isolation and privacy, at the expense of 256kB of kernel ++ * memory. + */ +-#define INET_TABLE_PERTURB_SHIFT 8 ++#define INET_TABLE_PERTURB_SHIFT 16 + #define INET_TABLE_PERTURB_SIZE (1 << INET_TABLE_PERTURB_SHIFT) + static u32 *table_perturb; + +-- +2.35.1 + diff --git a/queue-5.17/tcp-resalt-the-secret-every-10-seconds.patch b/queue-5.17/tcp-resalt-the-secret-every-10-seconds.patch new file mode 100644 index 00000000000..a08d569a235 --- /dev/null +++ b/queue-5.17/tcp-resalt-the-secret-every-10-seconds.patch @@ -0,0 +1,70 @@ +From 6d482bb45d1dd2eb156a44428d26eb3495dd89a7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 May 2022 10:46:10 +0200 +Subject: tcp: resalt the secret every 10 seconds + +From: Eric Dumazet + +[ Upstream commit 4dfa9b438ee34caca4e6a4e5e961641807367f6f ] + +In order to limit the ability for an observer to recognize the source +ports sequence used to contact a set of destinations, we should +periodically shuffle the secret. 10 seconds looks effective enough +without causing particular issues. + +Cc: Moshe Kol +Cc: Yossi Gilad +Cc: Amit Klein +Cc: Jason A. Donenfeld +Tested-by: Willy Tarreau +Signed-off-by: Eric Dumazet +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/core/secure_seq.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c +index 55aa5cc258e3..5f85e01d4093 100644 +--- a/net/core/secure_seq.c ++++ b/net/core/secure_seq.c +@@ -22,6 +22,8 @@ + static siphash_aligned_key_t net_secret; + static siphash_aligned_key_t ts_secret; + ++#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ) ++ + static __always_inline void net_secret_init(void) + { + net_get_random_once(&net_secret, sizeof(net_secret)); +@@ -100,11 +102,13 @@ u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + const struct { + struct in6_addr saddr; + struct in6_addr daddr; ++ unsigned int timeseed; + __be16 dport; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .saddr = *(struct in6_addr *)saddr, + .daddr = *(struct in6_addr *)daddr, +- .dport = dport ++ .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, ++ .dport = dport, + }; + net_secret_init(); + return siphash(&combined, offsetofend(typeof(combined), dport), +@@ -145,8 +149,10 @@ EXPORT_SYMBOL_GPL(secure_tcp_seq); + u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) + { + net_secret_init(); +- return siphash_3u32((__force u32)saddr, (__force u32)daddr, +- (__force u16)dport, &net_secret); ++ return siphash_4u32((__force u32)saddr, (__force u32)daddr, ++ (__force u16)dport, ++ jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, ++ &net_secret); + } + EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); + #endif +-- +2.35.1 + diff --git a/queue-5.17/tcp-use-different-parts-of-the-port_offset-for-index.patch b/queue-5.17/tcp-use-different-parts-of-the-port_offset-for-index.patch new file mode 100644 index 00000000000..6753c3bbf28 --- /dev/null +++ b/queue-5.17/tcp-use-different-parts-of-the-port_offset-for-index.patch @@ -0,0 +1,41 @@ +From 2635e8769829ba31f4d8b1d73b16ed06a17fdda1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 May 2022 10:46:09 +0200 +Subject: tcp: use different parts of the port_offset for index and offset + +From: Willy Tarreau + +[ Upstream commit 9e9b70ae923baf2b5e8a0ea4fd0c8451801ac526 ] + +Amit Klein suggests that we use different parts of port_offset for the +table's index and the port offset so that there is no direct relation +between them. + +Cc: Jason A. Donenfeld +Cc: Moshe Kol +Cc: Yossi Gilad +Cc: Amit Klein +Reviewed-by: Eric Dumazet +Signed-off-by: Willy Tarreau +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/inet_hashtables.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c +index 9d24d9319f3d..29c701cd8312 100644 +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -777,7 +777,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, + net_get_random_once(table_perturb, sizeof(table_perturb)); + index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); + +- offset = READ_ONCE(table_perturb[index]) + port_offset; ++ offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); + offset %= remaining; + + /* In first pass we try ports of @low parity. +-- +2.35.1 + diff --git a/queue-5.17/tls-fix-context-leak-on-tls_device_down.patch b/queue-5.17/tls-fix-context-leak-on-tls_device_down.patch new file mode 100644 index 00000000000..a9739ba4998 --- /dev/null +++ b/queue-5.17/tls-fix-context-leak-on-tls_device_down.patch @@ -0,0 +1,53 @@ +From 7577181562b0e31ed3a8296a84bb676c1396f1c9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 May 2022 12:18:30 +0300 +Subject: tls: Fix context leak on tls_device_down + +From: Maxim Mikityanskiy + +[ Upstream commit 3740651bf7e200109dd42d5b2fb22226b26f960a ] + +The commit cited below claims to fix a use-after-free condition after +tls_device_down. Apparently, the description wasn't fully accurate. The +context stayed alive, but ctx->netdev became NULL, and the offload was +torn down without a proper fallback, so a bug was present, but a +different kind of bug. + +Due to misunderstanding of the issue, the original patch dropped the +refcount_dec_and_test line for the context to avoid the alleged +premature deallocation. That line has to be restored, because it matches +the refcount_inc_not_zero from the same function, otherwise the contexts +that survived tls_device_down are leaked. + +This patch fixes the described issue by restoring refcount_dec_and_test. +After this change, there is no leak anymore, and the fallback to +software kTLS still works. + +Fixes: c55dcdd435aa ("net/tls: Fix use-after-free after the TLS device goes down and up") +Signed-off-by: Maxim Mikityanskiy +Reviewed-by: Tariq Toukan +Link: https://lore.kernel.org/r/20220512091830.678684-1-maximmi@nvidia.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/tls/tls_device.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c +index a40553e83f8b..f3e3d009cf1c 100644 +--- a/net/tls/tls_device.c ++++ b/net/tls/tls_device.c +@@ -1347,7 +1347,10 @@ static int tls_device_down(struct net_device *netdev) + + /* Device contexts for RX and TX will be freed in on sk_destruct + * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW. ++ * Now release the ref taken above. + */ ++ if (refcount_dec_and_test(&ctx->refcount)) ++ tls_device_free_ctx(ctx); + } + + up_write(&device_offload_lock); +-- +2.35.1 + diff --git a/queue-5.17/virtio-fix-virtio-transitional-ids.patch b/queue-5.17/virtio-fix-virtio-transitional-ids.patch new file mode 100644 index 00000000000..021205a9c53 --- /dev/null +++ b/queue-5.17/virtio-fix-virtio-transitional-ids.patch @@ -0,0 +1,47 @@ +From da0407ae931dfc85408a92a15eb947eac8ff6e36 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 10 May 2022 19:27:23 +0900 +Subject: virtio: fix virtio transitional ids + +From: Shunsuke Mie + +[ Upstream commit 7ff960a6fe399fdcbca6159063684671ae57eee9 ] + +This commit fixes the transitional PCI device ID. + +Fixes: d61914ea6ada ("virtio: update virtio id table, add transitional ids") +Signed-off-by: Shunsuke Mie +Link: https://lore.kernel.org/r/20220510102723.87666-1-mie@igel.co.jp +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Sasha Levin +--- + include/uapi/linux/virtio_ids.h | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h +index 80d76b75bccd..7aa2eb766205 100644 +--- a/include/uapi/linux/virtio_ids.h ++++ b/include/uapi/linux/virtio_ids.h +@@ -73,12 +73,12 @@ + * Virtio Transitional IDs + */ + +-#define VIRTIO_TRANS_ID_NET 1000 /* transitional virtio net */ +-#define VIRTIO_TRANS_ID_BLOCK 1001 /* transitional virtio block */ +-#define VIRTIO_TRANS_ID_BALLOON 1002 /* transitional virtio balloon */ +-#define VIRTIO_TRANS_ID_CONSOLE 1003 /* transitional virtio console */ +-#define VIRTIO_TRANS_ID_SCSI 1004 /* transitional virtio SCSI */ +-#define VIRTIO_TRANS_ID_RNG 1005 /* transitional virtio rng */ +-#define VIRTIO_TRANS_ID_9P 1009 /* transitional virtio 9p console */ ++#define VIRTIO_TRANS_ID_NET 0x1000 /* transitional virtio net */ ++#define VIRTIO_TRANS_ID_BLOCK 0x1001 /* transitional virtio block */ ++#define VIRTIO_TRANS_ID_BALLOON 0x1002 /* transitional virtio balloon */ ++#define VIRTIO_TRANS_ID_CONSOLE 0x1003 /* transitional virtio console */ ++#define VIRTIO_TRANS_ID_SCSI 0x1004 /* transitional virtio SCSI */ ++#define VIRTIO_TRANS_ID_RNG 0x1005 /* transitional virtio rng */ ++#define VIRTIO_TRANS_ID_9P 0x1009 /* transitional virtio 9p console */ + + #endif /* _LINUX_VIRTIO_IDS_H */ +-- +2.35.1 +