From: Greg Kroah-Hartman Date: Sat, 21 May 2022 14:31:28 +0000 (+0200) Subject: 5.17-stable patches X-Git-Tag: v4.9.316~32 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=3c7a7af5e8e559e4aa5498e85a0bca26753f1ed6;p=thirdparty%2Fkernel%2Fstable-queue.git 5.17-stable patches added patches: arm64-mte-ensure-the-cleared-tags-are-visible-before-setting-the-pte.patch arm64-paravirt-use-rcu-read-locks-to-guard-stolen_time.patch audit-io_uring-io-wq-call-__audit_uring_exit-for-dummy-contexts.patch crypto-qcom-rng-fix-infinite-loop-on-requests-not-multiple-of-word_sz.patch dma-buf-ensure-unique-directory-name-for-dmabuf-stats.patch dma-buf-fix-use-of-dma_buf_set_name_-a-b-in-userspace.patch drm-amd-don-t-reset-dgpus-if-the-system-is-going-to-s2idle.patch drm-dp-mst-fix-a-possible-memory-leak-in-fetch_monitor_name.patch drm-i915-dmc-add-mmio-range-restrictions.patch fix-double-fget-in-vhost_net_set_backend.patch kvm-arm64-vgic-v3-consistently-populate-id_aa64pfr0_el1.gic.patch kvm-free-new-dirty-bitmap-if-creating-a-new-memslot-fails.patch kvm-x86-mmu-update-number-of-zapped-pages-even-if-page-list-is-stable.patch libceph-fix-potential-use-after-free-on-linger-ping-and-resends.patch mmc-core-fix-busy-polling-for-mmc_send_op_cond-again.patch pci-pm-avoid-putting-elo-i2-pcie-ports-in-d3cold.patch perf-fix-sys_perf_event_open-race-against-self.patch revert-can-m_can-pci-use-custom-bit-timings-for-elkhart-lake.patch selinux-fix-bad-cleanup-on-error-in-hashtab_duplicate.patch --- diff --git a/queue-5.17/arm64-mte-ensure-the-cleared-tags-are-visible-before-setting-the-pte.patch b/queue-5.17/arm64-mte-ensure-the-cleared-tags-are-visible-before-setting-the-pte.patch new file mode 100644 index 00000000000..62266a8b926 --- /dev/null +++ b/queue-5.17/arm64-mte-ensure-the-cleared-tags-are-visible-before-setting-the-pte.patch @@ -0,0 +1,41 @@ +From 1d0cb4c8864addc362bae98e8ffa5500c87e1227 Mon Sep 17 00:00:00 2001 +From: Catalin Marinas +Date: Tue, 17 May 2022 10:35:32 +0100 +Subject: arm64: mte: Ensure the cleared tags are visible before setting the PTE + +From: Catalin Marinas + +commit 1d0cb4c8864addc362bae98e8ffa5500c87e1227 upstream. + +As an optimisation, only pages mapped with PROT_MTE in user space have +the MTE tags zeroed. This is done lazily at the set_pte_at() time via +mte_sync_tags(). However, this function is missing a barrier and another +CPU may see the PTE updated before the zeroed tags are visible. Add an +smp_wmb() barrier if the mapping is Normal Tagged. + +Signed-off-by: Catalin Marinas +Fixes: 34bfeea4a9e9 ("arm64: mte: Clear the tags when a page is mapped in user-space with PROT_MTE") +Cc: # 5.10.x +Reported-by: Vladimir Murzin +Cc: Will Deacon +Reviewed-by: Steven Price +Tested-by: Vladimir Murzin +Link: https://lore.kernel.org/r/20220517093532.127095-1-catalin.marinas@arm.com +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/mte.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/arch/arm64/kernel/mte.c ++++ b/arch/arm64/kernel/mte.c +@@ -76,6 +76,9 @@ void mte_sync_tags(pte_t old_pte, pte_t + mte_sync_page_tags(page, old_pte, check_swap, + pte_is_tagged); + } ++ ++ /* ensure the tags are visible before the PTE is set */ ++ smp_wmb(); + } + + int memcmp_pages(struct page *page1, struct page *page2) diff --git a/queue-5.17/arm64-paravirt-use-rcu-read-locks-to-guard-stolen_time.patch b/queue-5.17/arm64-paravirt-use-rcu-read-locks-to-guard-stolen_time.patch new file mode 100644 index 00000000000..39641e438a5 --- /dev/null +++ b/queue-5.17/arm64-paravirt-use-rcu-read-locks-to-guard-stolen_time.patch @@ -0,0 +1,145 @@ +From 19bef63f951e47dd4ba54810e6f7c7ff9344a3ef Mon Sep 17 00:00:00 2001 +From: Prakruthi Deepak Heragu +Date: Fri, 13 May 2022 10:46:54 -0700 +Subject: arm64: paravirt: Use RCU read locks to guard stolen_time + +From: Prakruthi Deepak Heragu + +commit 19bef63f951e47dd4ba54810e6f7c7ff9344a3ef upstream. + +During hotplug, the stolen time data structure is unmapped and memset. +There is a possibility of the timer IRQ being triggered before memset +and stolen time is getting updated as part of this timer IRQ handler. This +causes the below crash in timer handler - + + [ 3457.473139][ C5] Unable to handle kernel paging request at virtual address ffffffc03df05148 + ... + [ 3458.154398][ C5] Call trace: + [ 3458.157648][ C5] para_steal_clock+0x30/0x50 + [ 3458.162319][ C5] irqtime_account_process_tick+0x30/0x194 + [ 3458.168148][ C5] account_process_tick+0x3c/0x280 + [ 3458.173274][ C5] update_process_times+0x5c/0xf4 + [ 3458.178311][ C5] tick_sched_timer+0x180/0x384 + [ 3458.183164][ C5] __run_hrtimer+0x160/0x57c + [ 3458.187744][ C5] hrtimer_interrupt+0x258/0x684 + [ 3458.192698][ C5] arch_timer_handler_virt+0x5c/0xa0 + [ 3458.198002][ C5] handle_percpu_devid_irq+0xdc/0x414 + [ 3458.203385][ C5] handle_domain_irq+0xa8/0x168 + [ 3458.208241][ C5] gic_handle_irq.34493+0x54/0x244 + [ 3458.213359][ C5] call_on_irq_stack+0x40/0x70 + [ 3458.218125][ C5] do_interrupt_handler+0x60/0x9c + [ 3458.223156][ C5] el1_interrupt+0x34/0x64 + [ 3458.227560][ C5] el1h_64_irq_handler+0x1c/0x2c + [ 3458.232503][ C5] el1h_64_irq+0x7c/0x80 + [ 3458.236736][ C5] free_vmap_area_noflush+0x108/0x39c + [ 3458.242126][ C5] remove_vm_area+0xbc/0x118 + [ 3458.246714][ C5] vm_remove_mappings+0x48/0x2a4 + [ 3458.251656][ C5] __vunmap+0x154/0x278 + [ 3458.255796][ C5] stolen_time_cpu_down_prepare+0xc0/0xd8 + [ 3458.261542][ C5] cpuhp_invoke_callback+0x248/0xc34 + [ 3458.266842][ C5] cpuhp_thread_fun+0x1c4/0x248 + [ 3458.271696][ C5] smpboot_thread_fn+0x1b0/0x400 + [ 3458.276638][ C5] kthread+0x17c/0x1e0 + [ 3458.280691][ C5] ret_from_fork+0x10/0x20 + +As a fix, introduce rcu lock to update stolen time structure. + +Fixes: 75df529bec91 ("arm64: paravirt: Initialize steal time when cpu is online") +Cc: stable@vger.kernel.org +Suggested-by: Will Deacon +Signed-off-by: Prakruthi Deepak Heragu +Signed-off-by: Elliot Berman +Reviewed-by: Srivatsa S. Bhat (VMware) +Link: https://lore.kernel.org/r/20220513174654.362169-1-quic_eberman@quicinc.com +Signed-off-by: Will Deacon +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kernel/paravirt.c | 29 +++++++++++++++++++++-------- + 1 file changed, 21 insertions(+), 8 deletions(-) + +--- a/arch/arm64/kernel/paravirt.c ++++ b/arch/arm64/kernel/paravirt.c +@@ -35,7 +35,7 @@ static u64 native_steal_clock(int cpu) + DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); + + struct pv_time_stolen_time_region { +- struct pvclock_vcpu_stolen_time *kaddr; ++ struct pvclock_vcpu_stolen_time __rcu *kaddr; + }; + + static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region); +@@ -52,7 +52,9 @@ early_param("no-steal-acc", parse_no_ste + /* return stolen time in ns by asking the hypervisor */ + static u64 para_steal_clock(int cpu) + { ++ struct pvclock_vcpu_stolen_time *kaddr = NULL; + struct pv_time_stolen_time_region *reg; ++ u64 ret = 0; + + reg = per_cpu_ptr(&stolen_time_region, cpu); + +@@ -61,28 +63,37 @@ static u64 para_steal_clock(int cpu) + * online notification callback runs. Until the callback + * has run we just return zero. + */ +- if (!reg->kaddr) ++ rcu_read_lock(); ++ kaddr = rcu_dereference(reg->kaddr); ++ if (!kaddr) { ++ rcu_read_unlock(); + return 0; ++ } + +- return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time)); ++ ret = le64_to_cpu(READ_ONCE(kaddr->stolen_time)); ++ rcu_read_unlock(); ++ return ret; + } + + static int stolen_time_cpu_down_prepare(unsigned int cpu) + { ++ struct pvclock_vcpu_stolen_time *kaddr = NULL; + struct pv_time_stolen_time_region *reg; + + reg = this_cpu_ptr(&stolen_time_region); + if (!reg->kaddr) + return 0; + +- memunmap(reg->kaddr); +- memset(reg, 0, sizeof(*reg)); ++ kaddr = rcu_replace_pointer(reg->kaddr, NULL, true); ++ synchronize_rcu(); ++ memunmap(kaddr); + + return 0; + } + + static int stolen_time_cpu_online(unsigned int cpu) + { ++ struct pvclock_vcpu_stolen_time *kaddr = NULL; + struct pv_time_stolen_time_region *reg; + struct arm_smccc_res res; + +@@ -93,17 +104,19 @@ static int stolen_time_cpu_online(unsign + if (res.a0 == SMCCC_RET_NOT_SUPPORTED) + return -EINVAL; + +- reg->kaddr = memremap(res.a0, ++ kaddr = memremap(res.a0, + sizeof(struct pvclock_vcpu_stolen_time), + MEMREMAP_WB); + ++ rcu_assign_pointer(reg->kaddr, kaddr); ++ + if (!reg->kaddr) { + pr_warn("Failed to map stolen time data structure\n"); + return -ENOMEM; + } + +- if (le32_to_cpu(reg->kaddr->revision) != 0 || +- le32_to_cpu(reg->kaddr->attributes) != 0) { ++ if (le32_to_cpu(kaddr->revision) != 0 || ++ le32_to_cpu(kaddr->attributes) != 0) { + pr_warn_once("Unexpected revision or attributes in stolen time data\n"); + return -ENXIO; + } diff --git a/queue-5.17/audit-io_uring-io-wq-call-__audit_uring_exit-for-dummy-contexts.patch b/queue-5.17/audit-io_uring-io-wq-call-__audit_uring_exit-for-dummy-contexts.patch new file mode 100644 index 00000000000..4fe51516955 --- /dev/null +++ b/queue-5.17/audit-io_uring-io-wq-call-__audit_uring_exit-for-dummy-contexts.patch @@ -0,0 +1,69 @@ +From 69e9cd66ae1392437234a63a3a1d60b6655f92ef Mon Sep 17 00:00:00 2001 +From: Julian Orth +Date: Tue, 17 May 2022 12:32:53 +0200 +Subject: audit,io_uring,io-wq: call __audit_uring_exit for dummy contexts + +From: Julian Orth + +commit 69e9cd66ae1392437234a63a3a1d60b6655f92ef upstream. + +Not calling the function for dummy contexts will cause the context to +not be reset. During the next syscall, this will cause an error in +__audit_syscall_entry: + + WARN_ON(context->context != AUDIT_CTX_UNUSED); + WARN_ON(context->name_count); + if (context->context != AUDIT_CTX_UNUSED || context->name_count) { + audit_panic("unrecoverable error in audit_syscall_entry()"); + return; + } + +These problematic dummy contexts are created via the following call +chain: + + exit_to_user_mode_prepare + -> arch_do_signal_or_restart + -> get_signal + -> task_work_run + -> tctx_task_work + -> io_req_task_submit + -> io_issue_sqe + -> audit_uring_entry + +Cc: stable@vger.kernel.org +Fixes: 5bd2182d58e9 ("audit,io_uring,io-wq: add some basic audit support to io_uring") +Signed-off-by: Julian Orth +[PM: subject line tweaks] +Signed-off-by: Paul Moore +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/audit.h | 2 +- + kernel/auditsc.c | 6 ++++++ + 2 files changed, 7 insertions(+), 1 deletion(-) + +--- a/include/linux/audit.h ++++ b/include/linux/audit.h +@@ -339,7 +339,7 @@ static inline void audit_uring_entry(u8 + } + static inline void audit_uring_exit(int success, long code) + { +- if (unlikely(!audit_dummy_context())) ++ if (unlikely(audit_context())) + __audit_uring_exit(success, code); + } + static inline void audit_syscall_entry(int major, unsigned long a0, +--- a/kernel/auditsc.c ++++ b/kernel/auditsc.c +@@ -1959,6 +1959,12 @@ void __audit_uring_exit(int success, lon + { + struct audit_context *ctx = audit_context(); + ++ if (ctx->dummy) { ++ if (ctx->context != AUDIT_CTX_URING) ++ return; ++ goto out; ++ } ++ + if (ctx->context == AUDIT_CTX_SYSCALL) { + /* + * NOTE: See the note in __audit_uring_entry() about the case diff --git a/queue-5.17/crypto-qcom-rng-fix-infinite-loop-on-requests-not-multiple-of-word_sz.patch b/queue-5.17/crypto-qcom-rng-fix-infinite-loop-on-requests-not-multiple-of-word_sz.patch new file mode 100644 index 00000000000..44f9d806e7b --- /dev/null +++ b/queue-5.17/crypto-qcom-rng-fix-infinite-loop-on-requests-not-multiple-of-word_sz.patch @@ -0,0 +1,40 @@ +From 16287397ec5c08aa58db6acf7dbc55470d78087d Mon Sep 17 00:00:00 2001 +From: Ondrej Mosnacek +Date: Tue, 3 May 2022 13:50:10 +0200 +Subject: crypto: qcom-rng - fix infinite loop on requests not multiple of WORD_SZ + +From: Ondrej Mosnacek + +commit 16287397ec5c08aa58db6acf7dbc55470d78087d upstream. + +The commit referenced in the Fixes tag removed the 'break' from the else +branch in qcom_rng_read(), causing an infinite loop whenever 'max' is +not a multiple of WORD_SZ. This can be reproduced e.g. by running: + + kcapi-rng -b 67 >/dev/null + +There are many ways to fix this without adding back the 'break', but +they all seem more awkward than simply adding it back, so do just that. + +Tested on a machine with Qualcomm Amberwing processor. + +Fixes: a680b1832ced ("crypto: qcom-rng - ensure buffer for generate is completely filled") +Cc: stable@vger.kernel.org +Signed-off-by: Ondrej Mosnacek +Reviewed-by: Brian Masney +Signed-off-by: Herbert Xu +Signed-off-by: Greg Kroah-Hartman +--- + drivers/crypto/qcom-rng.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/crypto/qcom-rng.c ++++ b/drivers/crypto/qcom-rng.c +@@ -65,6 +65,7 @@ static int qcom_rng_read(struct qcom_rng + } else { + /* copy only remaining bytes */ + memcpy(data, &val, max - currsize); ++ break; + } + } while (currsize < max); + diff --git a/queue-5.17/dma-buf-ensure-unique-directory-name-for-dmabuf-stats.patch b/queue-5.17/dma-buf-ensure-unique-directory-name-for-dmabuf-stats.patch new file mode 100644 index 00000000000..9eeee4fc421 --- /dev/null +++ b/queue-5.17/dma-buf-ensure-unique-directory-name-for-dmabuf-stats.patch @@ -0,0 +1,74 @@ +From 370704e707a5f2d3c9a1d4ed8bd8cd67507d7bb5 Mon Sep 17 00:00:00 2001 +From: Charan Teja Kalla +Date: Fri, 13 May 2022 16:58:16 +0530 +Subject: dma-buf: ensure unique directory name for dmabuf stats +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Charan Teja Kalla + +commit 370704e707a5f2d3c9a1d4ed8bd8cd67507d7bb5 upstream. + +The dmabuf file uses get_next_ino()(through dma_buf_getfile() -> +alloc_anon_inode()) to get an inode number and uses the same as a +directory name under /sys/kernel/dmabuf/buffers/. This directory is +used to collect the dmabuf stats and it is created through +dma_buf_stats_setup(). At current, failure to create this directory +entry can make the dma_buf_export() to fail. + +Now, as the get_next_ino() can definitely give a repetitive inode no +causing the directory entry creation to fail with -EEXIST. This is a +problem on the systems where dmabuf stats functionality is enabled on +the production builds can make the dma_buf_export(), though the dmabuf +memory is allocated successfully, to fail just because it couldn't +create stats entry. + +This issue we are able to see on the snapdragon system within 13 days +where there already exists a directory with inode no "122602" so +dma_buf_stats_setup() failed with -EEXIST as it is trying to create +the same directory entry. + +To make the dentry name as unique, use the dmabuf fs specific inode +which is based on the simple atomic variable increment. There is tmpfs +subsystem too which relies on its own inode generation rather than +relying on the get_next_ino() for the same reason of avoiding the +duplicate inodes[1]. + +[1] https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/patch/?id=e809d5f0b5c912fe981dce738f3283b2010665f0 + +Signed-off-by: Charan Teja Kalla +Cc: # 5.15.x+ +Reviewed-by: Greg Kroah-Hartman +Reviewed-by: Christian König +Link: https://patchwork.freedesktop.org/patch/msgid/1652441296-1986-1-git-send-email-quic_charante@quicinc.com +Signed-off-by: Christian König +Signed-off-by: Greg Kroah-Hartman +--- + drivers/dma-buf/dma-buf.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/drivers/dma-buf/dma-buf.c ++++ b/drivers/dma-buf/dma-buf.c +@@ -407,6 +407,7 @@ static inline int is_dma_buf_file(struct + + static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) + { ++ static atomic64_t dmabuf_inode = ATOMIC64_INIT(0); + struct file *file; + struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb); + +@@ -416,6 +417,13 @@ static struct file *dma_buf_getfile(stru + inode->i_size = dmabuf->size; + inode_set_bytes(inode, dmabuf->size); + ++ /* ++ * The ->i_ino acquired from get_next_ino() is not unique thus ++ * not suitable for using it as dentry name by dmabuf stats. ++ * Override ->i_ino with the unique and dmabuffs specific ++ * value. ++ */ ++ inode->i_ino = atomic64_add_return(1, &dmabuf_inode); + file = alloc_file_pseudo(inode, dma_buf_mnt, "dmabuf", + flags, &dma_buf_fops); + if (IS_ERR(file)) diff --git a/queue-5.17/dma-buf-fix-use-of-dma_buf_set_name_-a-b-in-userspace.patch b/queue-5.17/dma-buf-fix-use-of-dma_buf_set_name_-a-b-in-userspace.patch new file mode 100644 index 00000000000..ad48be7a286 --- /dev/null +++ b/queue-5.17/dma-buf-fix-use-of-dma_buf_set_name_-a-b-in-userspace.patch @@ -0,0 +1,54 @@ +From 7c3e9fcad9c7d8bb5d69a576044fb16b1d2e8a01 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Pouiller?= +Date: Tue, 17 May 2022 09:27:08 +0200 +Subject: dma-buf: fix use of DMA_BUF_SET_NAME_{A,B} in userspace +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jérôme Pouiller + +commit 7c3e9fcad9c7d8bb5d69a576044fb16b1d2e8a01 upstream. + +The typedefs u32 and u64 are not available in userspace. Thus user get +an error he try to use DMA_BUF_SET_NAME_A or DMA_BUF_SET_NAME_B: + + $ gcc -Wall -c -MMD -c -o ioctls_list.o ioctls_list.c + In file included from /usr/include/x86_64-linux-gnu/asm/ioctl.h:1, + from /usr/include/linux/ioctl.h:5, + from /usr/include/asm-generic/ioctls.h:5, + from ioctls_list.c:11: + ioctls_list.c:463:29: error: ‘u32’ undeclared here (not in a function) + 463 | { "DMA_BUF_SET_NAME_A", DMA_BUF_SET_NAME_A, -1, -1 }, // linux/dma-buf.h + | ^~~~~~~~~~~~~~~~~~ + ioctls_list.c:464:29: error: ‘u64’ undeclared here (not in a function) + 464 | { "DMA_BUF_SET_NAME_B", DMA_BUF_SET_NAME_B, -1, -1 }, // linux/dma-buf.h + | ^~~~~~~~~~~~~~~~~~ + +The issue was initially reported here[1]. + +[1]: https://github.com/jerome-pouiller/ioctl/pull/14 + +Signed-off-by: Jérôme Pouiller +Reviewed-by: Christian König +Fixes: a5bff92eaac4 ("dma-buf: Fix SET_NAME ioctl uapi") +CC: stable@vger.kernel.org +Link: https://patchwork.freedesktop.org/patch/msgid/20220517072708.245265-1-Jerome.Pouiller@silabs.com +Signed-off-by: Christian König +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/dma-buf.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/include/uapi/linux/dma-buf.h ++++ b/include/uapi/linux/dma-buf.h +@@ -92,7 +92,7 @@ struct dma_buf_sync { + * between them in actual uapi, they're just different numbers. + */ + #define DMA_BUF_SET_NAME _IOW(DMA_BUF_BASE, 1, const char *) +-#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, u32) +-#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, u64) ++#define DMA_BUF_SET_NAME_A _IOW(DMA_BUF_BASE, 1, __u32) ++#define DMA_BUF_SET_NAME_B _IOW(DMA_BUF_BASE, 1, __u64) + + #endif diff --git a/queue-5.17/drm-amd-don-t-reset-dgpus-if-the-system-is-going-to-s2idle.patch b/queue-5.17/drm-amd-don-t-reset-dgpus-if-the-system-is-going-to-s2idle.patch new file mode 100644 index 00000000000..b10545d51b2 --- /dev/null +++ b/queue-5.17/drm-amd-don-t-reset-dgpus-if-the-system-is-going-to-s2idle.patch @@ -0,0 +1,80 @@ +From 7123d39dc24dcd21ff23d75f46f926b15269b9da Mon Sep 17 00:00:00 2001 +From: Mario Limonciello +Date: Tue, 17 May 2022 12:00:37 -0500 +Subject: drm/amd: Don't reset dGPUs if the system is going to s2idle + +From: Mario Limonciello + +commit 7123d39dc24dcd21ff23d75f46f926b15269b9da upstream. + +An A+A configuration on ASUS ROG Strix G513QY proves that the ASIC +reset for handling aborted suspend can't work with s2idle. + +This functionality was introduced in commit daf8de0874ab5b ("drm/amdgpu: +always reset the asic in suspend (v2)"). A few other commits have +gone on top of the ASIC reset, but this still doesn't work on the A+A +configuration in s2idle. + +Avoid doing the reset on dGPUs specifically when using s2idle. + +Fixes: daf8de0874ab5b ("drm/amdgpu: always reset the asic in suspend (v2)") +Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2008 +Reviewed-by: Alex Deucher +Signed-off-by: Mario Limonciello +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ + drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 14 ++++++++++++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- + 3 files changed, 17 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -1422,9 +1422,11 @@ static inline int amdgpu_acpi_smart_shif + + #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) + bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); ++bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev); + bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); + #else + static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } ++static inline bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) { return false; } + static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } + #endif + +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +@@ -1046,6 +1046,20 @@ bool amdgpu_acpi_is_s3_active(struct amd + } + + /** ++ * amdgpu_acpi_should_gpu_reset ++ * ++ * @adev: amdgpu_device_pointer ++ * ++ * returns true if should reset GPU, false if not ++ */ ++bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev) ++{ ++ if (adev->flags & AMD_IS_APU) ++ return false; ++ return pm_suspend_target_state != PM_SUSPEND_TO_IDLE; ++} ++ ++/** + * amdgpu_acpi_is_s0ix_active + * + * @adev: amdgpu_device_pointer +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +@@ -2289,7 +2289,7 @@ static int amdgpu_pmops_suspend_noirq(st + struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + +- if (!adev->in_s0ix) ++ if (amdgpu_acpi_should_gpu_reset(adev)) + return amdgpu_asic_reset(adev); + + return 0; diff --git a/queue-5.17/drm-dp-mst-fix-a-possible-memory-leak-in-fetch_monitor_name.patch b/queue-5.17/drm-dp-mst-fix-a-possible-memory-leak-in-fetch_monitor_name.patch new file mode 100644 index 00000000000..d1a7d37aed8 --- /dev/null +++ b/queue-5.17/drm-dp-mst-fix-a-possible-memory-leak-in-fetch_monitor_name.patch @@ -0,0 +1,32 @@ +From 6e03b13cc7d9427c2c77feed1549191015615202 Mon Sep 17 00:00:00 2001 +From: Hangyu Hua +Date: Mon, 16 May 2022 11:20:42 +0800 +Subject: drm/dp/mst: fix a possible memory leak in fetch_monitor_name() + +From: Hangyu Hua + +commit 6e03b13cc7d9427c2c77feed1549191015615202 upstream. + +drm_dp_mst_get_edid call kmemdup to create mst_edid. So mst_edid need to be +freed after use. + +Signed-off-by: Hangyu Hua +Reviewed-by: Lyude Paul +Signed-off-by: Lyude Paul +Cc: stable@vger.kernel.org +Link: https://patchwork.freedesktop.org/patch/msgid/20220516032042.13166-1-hbh25y@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/drm_dp_mst_topology.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/gpu/drm/drm_dp_mst_topology.c ++++ b/drivers/gpu/drm/drm_dp_mst_topology.c +@@ -4852,6 +4852,7 @@ static void fetch_monitor_name(struct dr + + mst_edid = drm_dp_mst_get_edid(port->connector, mgr, port); + drm_edid_get_monitor_name(mst_edid, name, namelen); ++ kfree(mst_edid); + } + + /** diff --git a/queue-5.17/drm-i915-dmc-add-mmio-range-restrictions.patch b/queue-5.17/drm-i915-dmc-add-mmio-range-restrictions.patch new file mode 100644 index 00000000000..c2159db1174 --- /dev/null +++ b/queue-5.17/drm-i915-dmc-add-mmio-range-restrictions.patch @@ -0,0 +1,122 @@ +From 54395a33718af1c04b5098203335b25382291a16 Mon Sep 17 00:00:00 2001 +From: Anusha Srivatsa +Date: Tue, 10 May 2022 17:08:47 -0700 +Subject: drm/i915/dmc: Add MMIO range restrictions + +From: Anusha Srivatsa + +commit 54395a33718af1c04b5098203335b25382291a16 upstream. + +Bspec has added some steps that check forDMC MMIO range before +programming them + +v2: Fix for CI +v3: move register defines to .h (Anusha) +- Check MMIO restrictions per pipe +- Add MMIO restricton for v1 dmc header as well (Lucas) +v4: s/_PICK/_PICK_EVEN and use it only for Pipe DMC scenario. +- clean up sanity check logic.(Lucas) +- Add MMIO range for RKL as well.(Anusha) +v5: Use DISPLAY_VER instead of per platform check (Lucas) + +BSpec: 49193 + +Cc: stable@vger.kernel.org +Cc: Lucas De Marchi +Signed-off-by: Anusha Srivatsa +Reviewed-by: Lucas De Marchi +Signed-off-by: Lucas De Marchi +Link: https://patchwork.freedesktop.org/patch/msgid/20220511000847.1068302-1-anusha.srivatsa@intel.com +(cherry picked from commit 21c47196aec3a93f913a7515e1e7b30e6c54d6c6) +Signed-off-by: Joonas Lahtinen +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/i915/display/intel_dmc.c | 44 +++++++++++++++++++++++++++++++ + drivers/gpu/drm/i915/i915_reg.h | 16 +++++++++++ + 2 files changed, 60 insertions(+) + +--- a/drivers/gpu/drm/i915/display/intel_dmc.c ++++ b/drivers/gpu/drm/i915/display/intel_dmc.c +@@ -367,6 +367,44 @@ static void dmc_set_fw_offset(struct int + } + } + ++static bool dmc_mmio_addr_sanity_check(struct intel_dmc *dmc, ++ const u32 *mmioaddr, u32 mmio_count, ++ int header_ver, u8 dmc_id) ++{ ++ struct drm_i915_private *i915 = container_of(dmc, typeof(*i915), dmc); ++ u32 start_range, end_range; ++ int i; ++ ++ if (dmc_id >= DMC_FW_MAX) { ++ drm_warn(&i915->drm, "Unsupported firmware id %u\n", dmc_id); ++ return false; ++ } ++ ++ if (header_ver == 1) { ++ start_range = DMC_MMIO_START_RANGE; ++ end_range = DMC_MMIO_END_RANGE; ++ } else if (dmc_id == DMC_FW_MAIN) { ++ start_range = TGL_MAIN_MMIO_START; ++ end_range = TGL_MAIN_MMIO_END; ++ } else if (DISPLAY_VER(i915) >= 13) { ++ start_range = ADLP_PIPE_MMIO_START; ++ end_range = ADLP_PIPE_MMIO_END; ++ } else if (DISPLAY_VER(i915) >= 12) { ++ start_range = TGL_PIPE_MMIO_START(dmc_id); ++ end_range = TGL_PIPE_MMIO_END(dmc_id); ++ } else { ++ drm_warn(&i915->drm, "Unknown mmio range for sanity check"); ++ return false; ++ } ++ ++ for (i = 0; i < mmio_count; i++) { ++ if (mmioaddr[i] < start_range || mmioaddr[i] > end_range) ++ return false; ++ } ++ ++ return true; ++} ++ + static u32 parse_dmc_fw_header(struct intel_dmc *dmc, + const struct intel_dmc_header_base *dmc_header, + size_t rem_size, u8 dmc_id) +@@ -436,6 +474,12 @@ static u32 parse_dmc_fw_header(struct in + return 0; + } + ++ if (!dmc_mmio_addr_sanity_check(dmc, mmioaddr, mmio_count, ++ dmc_header->header_ver, dmc_id)) { ++ drm_err(&i915->drm, "DMC firmware has Wrong MMIO Addresses\n"); ++ return 0; ++ } ++ + for (i = 0; i < mmio_count; i++) { + dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]); + dmc_info->mmiodata[i] = mmiodata[i]; +--- a/drivers/gpu/drm/i915/i915_reg.h ++++ b/drivers/gpu/drm/i915/i915_reg.h +@@ -7938,6 +7938,22 @@ enum { + /* MMIO address range for DMC program (0x80000 - 0x82FFF) */ + #define DMC_MMIO_START_RANGE 0x80000 + #define DMC_MMIO_END_RANGE 0x8FFFF ++#define DMC_V1_MMIO_START_RANGE 0x80000 ++#define TGL_MAIN_MMIO_START 0x8F000 ++#define TGL_MAIN_MMIO_END 0x8FFFF ++#define _TGL_PIPEA_MMIO_START 0x92000 ++#define _TGL_PIPEA_MMIO_END 0x93FFF ++#define _TGL_PIPEB_MMIO_START 0x96000 ++#define _TGL_PIPEB_MMIO_END 0x97FFF ++#define ADLP_PIPE_MMIO_START 0x5F000 ++#define ADLP_PIPE_MMIO_END 0x5FFFF ++ ++#define TGL_PIPE_MMIO_START(dmc_id) _PICK_EVEN(((dmc_id) - 1), _TGL_PIPEA_MMIO_START,\ ++ _TGL_PIPEB_MMIO_START) ++ ++#define TGL_PIPE_MMIO_END(dmc_id) _PICK_EVEN(((dmc_id) - 1), _TGL_PIPEA_MMIO_END,\ ++ _TGL_PIPEB_MMIO_END) ++ + #define SKL_DMC_DC3_DC5_COUNT _MMIO(0x80030) + #define SKL_DMC_DC5_DC6_COUNT _MMIO(0x8002C) + #define BXT_DMC_DC3_DC5_COUNT _MMIO(0x80038) diff --git a/queue-5.17/fix-double-fget-in-vhost_net_set_backend.patch b/queue-5.17/fix-double-fget-in-vhost_net_set_backend.patch new file mode 100644 index 00000000000..e67dbef046e --- /dev/null +++ b/queue-5.17/fix-double-fget-in-vhost_net_set_backend.patch @@ -0,0 +1,69 @@ +From fb4554c2232e44d595920f4d5c66cf8f7d13f9bc Mon Sep 17 00:00:00 2001 +From: Al Viro +Date: Mon, 16 May 2022 16:42:13 +0800 +Subject: Fix double fget() in vhost_net_set_backend() + +From: Al Viro + +commit fb4554c2232e44d595920f4d5c66cf8f7d13f9bc upstream. + +Descriptor table is a shared resource; two fget() on the same descriptor +may return different struct file references. get_tap_ptr_ring() is +called after we'd found (and pinned) the socket we'll be using and it +tries to find the private tun/tap data structures associated with it. +Redoing the lookup by the same file descriptor we'd used to get the +socket is racy - we need to same struct file. + +Thanks to Jason for spotting a braino in the original variant of patch - +I'd missed the use of fd == -1 for disabling backend, and in that case +we can end up with sock == NULL and sock != oldsock. + +Cc: stable@kernel.org +Acked-by: Michael S. Tsirkin +Signed-off-by: Jason Wang +Signed-off-by: Al Viro +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/net.c | 15 +++++++-------- + 1 file changed, 7 insertions(+), 8 deletions(-) + +--- a/drivers/vhost/net.c ++++ b/drivers/vhost/net.c +@@ -1450,13 +1450,9 @@ err: + return ERR_PTR(r); + } + +-static struct ptr_ring *get_tap_ptr_ring(int fd) ++static struct ptr_ring *get_tap_ptr_ring(struct file *file) + { + struct ptr_ring *ring; +- struct file *file = fget(fd); +- +- if (!file) +- return NULL; + ring = tun_get_tx_ring(file); + if (!IS_ERR(ring)) + goto out; +@@ -1465,7 +1461,6 @@ static struct ptr_ring *get_tap_ptr_ring + goto out; + ring = NULL; + out: +- fput(file); + return ring; + } + +@@ -1552,8 +1547,12 @@ static long vhost_net_set_backend(struct + r = vhost_net_enable_vq(n, vq); + if (r) + goto err_used; +- if (index == VHOST_NET_VQ_RX) +- nvq->rx_ring = get_tap_ptr_ring(fd); ++ if (index == VHOST_NET_VQ_RX) { ++ if (sock) ++ nvq->rx_ring = get_tap_ptr_ring(sock->file); ++ else ++ nvq->rx_ring = NULL; ++ } + + oldubufs = nvq->ubufs; + nvq->ubufs = ubufs; diff --git a/queue-5.17/kvm-arm64-vgic-v3-consistently-populate-id_aa64pfr0_el1.gic.patch b/queue-5.17/kvm-arm64-vgic-v3-consistently-populate-id_aa64pfr0_el1.gic.patch new file mode 100644 index 00000000000..a186861f1dd --- /dev/null +++ b/queue-5.17/kvm-arm64-vgic-v3-consistently-populate-id_aa64pfr0_el1.gic.patch @@ -0,0 +1,46 @@ +From 5163373af195f10e0d99a8de3465c4ed36bdc337 Mon Sep 17 00:00:00 2001 +From: Marc Zyngier +Date: Tue, 3 May 2022 22:14:24 +0100 +Subject: KVM: arm64: vgic-v3: Consistently populate ID_AA64PFR0_EL1.GIC + +From: Marc Zyngier + +commit 5163373af195f10e0d99a8de3465c4ed36bdc337 upstream. + +When adding support for the slightly wonky Apple M1, we had to +populate ID_AA64PFR0_EL1.GIC==1 to present something to the guest, +as the HW itself doesn't advertise the feature. + +However, we gated this on the in-kernel irqchip being created. +This causes some trouble for QEMU, which snapshots the state of +the registers before creating a virtual GIC, and then tries to +restore these registers once the GIC has been created. Obviously, +between the two stages, ID_AA64PFR0_EL1.GIC has changed value, +and the write fails. + +The fix is to actually emulate the HW, and always populate the +field if the HW is capable of it. + +Fixes: 562e530fd770 ("KVM: arm64: Force ID_AA64PFR0_EL1.GIC=1 when exposing a virtual GICv3") +Cc: stable@vger.kernel.org +Signed-off-by: Marc Zyngier +Reported-by: Peter Maydell +Reviewed-by: Oliver Upton +Link: https://lore.kernel.org/r/20220503211424.3375263-1-maz@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm64/kvm/sys_regs.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/arch/arm64/kvm/sys_regs.c ++++ b/arch/arm64/kvm/sys_regs.c +@@ -1080,8 +1080,7 @@ static u64 read_id_reg(const struct kvm_ + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2); + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3); +- if (irqchip_in_kernel(vcpu->kvm) && +- vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { ++ if (kvm_vgic_global_state.type == VGIC_V3) { + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1); + } diff --git a/queue-5.17/kvm-free-new-dirty-bitmap-if-creating-a-new-memslot-fails.patch b/queue-5.17/kvm-free-new-dirty-bitmap-if-creating-a-new-memslot-fails.patch new file mode 100644 index 00000000000..9bd43745896 --- /dev/null +++ b/queue-5.17/kvm-free-new-dirty-bitmap-if-creating-a-new-memslot-fails.patch @@ -0,0 +1,77 @@ +From c87661f855c3f2023e40ddc364002601ee234367 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 18 May 2022 00:38:42 +0000 +Subject: KVM: Free new dirty bitmap if creating a new memslot fails + +From: Sean Christopherson + +commit c87661f855c3f2023e40ddc364002601ee234367 upstream. + +Fix a goof in kvm_prepare_memory_region() where KVM fails to free the +new memslot's dirty bitmap during a CREATE action if +kvm_arch_prepare_memory_region() fails. The logic is supposed to detect +if the bitmap was allocated and thus needs to be freed, versus if the +bitmap was inherited from the old memslot and thus needs to be kept. If +there is no old memslot, then obviously the bitmap can't have been +inherited + +The bug was exposed by commit 86931ff7207b ("KVM: x86/mmu: Do not create +SPTEs for GFNs that exceed host.MAXPHYADDR"), which made it trivally easy +for syzkaller to trigger failure during kvm_arch_prepare_memory_region(), +but the bug can be hit other ways too, e.g. due to -ENOMEM when +allocating x86's memslot metadata. + +The backtrace from kmemleak: + + __vmalloc_node_range+0xb40/0xbd0 mm/vmalloc.c:3195 + __vmalloc_node mm/vmalloc.c:3232 [inline] + __vmalloc+0x49/0x50 mm/vmalloc.c:3246 + __vmalloc_array mm/util.c:671 [inline] + __vcalloc+0x49/0x70 mm/util.c:694 + kvm_alloc_dirty_bitmap virt/kvm/kvm_main.c:1319 + kvm_prepare_memory_region virt/kvm/kvm_main.c:1551 + kvm_set_memslot+0x1bd/0x690 virt/kvm/kvm_main.c:1782 + __kvm_set_memory_region+0x689/0x750 virt/kvm/kvm_main.c:1949 + kvm_set_memory_region virt/kvm/kvm_main.c:1962 + kvm_vm_ioctl_set_memory_region virt/kvm/kvm_main.c:1974 + kvm_vm_ioctl+0x377/0x13a0 virt/kvm/kvm_main.c:4528 + vfs_ioctl fs/ioctl.c:51 + __do_sys_ioctl fs/ioctl.c:870 + __se_sys_ioctl fs/ioctl.c:856 + __x64_sys_ioctl+0xfc/0x140 fs/ioctl.c:856 + do_syscall_x64 arch/x86/entry/common.c:50 + do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x44/0xae + +And the relevant sequence of KVM events: + + ioctl(3, KVM_CREATE_VM, 0) = 4 + ioctl(4, KVM_SET_USER_MEMORY_REGION, {slot=0, + flags=KVM_MEM_LOG_DIRTY_PAGES, + guest_phys_addr=0x10000000000000, + memory_size=4096, + userspace_addr=0x20fe8000} + ) = -1 EINVAL (Invalid argument) + +Fixes: 244893fa2859 ("KVM: Dynamically allocate "new" memslots from the get-go") +Cc: stable@vger.kernel.org +Reported-by: syzbot+8606b8a9cc97a63f1c87@syzkaller.appspotmail.com +Signed-off-by: Sean Christopherson +Message-Id: <20220518003842.1341782-1-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + virt/kvm/kvm_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -1539,7 +1539,7 @@ static int kvm_prepare_memory_region(str + r = kvm_arch_prepare_memory_region(kvm, old, new, change); + + /* Free the bitmap on failure if it was allocated above. */ +- if (r && new && new->dirty_bitmap && old && !old->dirty_bitmap) ++ if (r && new && new->dirty_bitmap && (!old || !old->dirty_bitmap)) + kvm_destroy_dirty_bitmap(new); + + return r; diff --git a/queue-5.17/kvm-x86-mmu-update-number-of-zapped-pages-even-if-page-list-is-stable.patch b/queue-5.17/kvm-x86-mmu-update-number-of-zapped-pages-even-if-page-list-is-stable.patch new file mode 100644 index 00000000000..77e28fe5264 --- /dev/null +++ b/queue-5.17/kvm-x86-mmu-update-number-of-zapped-pages-even-if-page-list-is-stable.patch @@ -0,0 +1,71 @@ +From b28cb0cd2c5e80a8c0feb408a0e4b0dbb6d132c5 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Wed, 11 May 2022 14:51:22 +0000 +Subject: KVM: x86/mmu: Update number of zapped pages even if page list is stable + +From: Sean Christopherson + +commit b28cb0cd2c5e80a8c0feb408a0e4b0dbb6d132c5 upstream. + +When zapping obsolete pages, update the running count of zapped pages +regardless of whether or not the list has become unstable due to zapping +a shadow page with its own child shadow pages. If the VM is backed by +mostly 4kb pages, KVM can zap an absurd number of SPTEs without bumping +the batch count and thus without yielding. In the worst case scenario, +this can cause a soft lokcup. + + watchdog: BUG: soft lockup - CPU#12 stuck for 22s! [dirty_log_perf_:13020] + RIP: 0010:workingset_activation+0x19/0x130 + mark_page_accessed+0x266/0x2e0 + kvm_set_pfn_accessed+0x31/0x40 + mmu_spte_clear_track_bits+0x136/0x1c0 + drop_spte+0x1a/0xc0 + mmu_page_zap_pte+0xef/0x120 + __kvm_mmu_prepare_zap_page+0x205/0x5e0 + kvm_mmu_zap_all_fast+0xd7/0x190 + kvm_mmu_invalidate_zap_pages_in_memslot+0xe/0x10 + kvm_page_track_flush_slot+0x5c/0x80 + kvm_arch_flush_shadow_memslot+0xe/0x10 + kvm_set_memslot+0x1a8/0x5d0 + __kvm_set_memory_region+0x337/0x590 + kvm_vm_ioctl+0xb08/0x1040 + +Fixes: fbb158cb88b6 ("KVM: x86/mmu: Revert "Revert "KVM: MMU: zap pages in batch""") +Reported-by: David Matlack +Reviewed-by: Ben Gardon +Cc: stable@vger.kernel.org +Signed-off-by: Sean Christopherson +Message-Id: <20220511145122.3133334-1-seanjc@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kvm/mmu/mmu.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/arch/x86/kvm/mmu/mmu.c ++++ b/arch/x86/kvm/mmu/mmu.c +@@ -5611,6 +5611,7 @@ static void kvm_zap_obsolete_pages(struc + { + struct kvm_mmu_page *sp, *node; + int nr_zapped, batch = 0; ++ bool unstable; + + restart: + list_for_each_entry_safe_reverse(sp, node, +@@ -5642,11 +5643,12 @@ restart: + goto restart; + } + +- if (__kvm_mmu_prepare_zap_page(kvm, sp, +- &kvm->arch.zapped_obsolete_pages, &nr_zapped)) { +- batch += nr_zapped; ++ unstable = __kvm_mmu_prepare_zap_page(kvm, sp, ++ &kvm->arch.zapped_obsolete_pages, &nr_zapped); ++ batch += nr_zapped; ++ ++ if (unstable) + goto restart; +- } + } + + /* diff --git a/queue-5.17/libceph-fix-potential-use-after-free-on-linger-ping-and-resends.patch b/queue-5.17/libceph-fix-potential-use-after-free-on-linger-ping-and-resends.patch new file mode 100644 index 00000000000..9cc0b829482 --- /dev/null +++ b/queue-5.17/libceph-fix-potential-use-after-free-on-linger-ping-and-resends.patch @@ -0,0 +1,566 @@ +From 75dbb685f4e8786c33ddef8279bab0eadfb0731f Mon Sep 17 00:00:00 2001 +From: Ilya Dryomov +Date: Sat, 14 May 2022 12:16:47 +0200 +Subject: libceph: fix potential use-after-free on linger ping and resends + +From: Ilya Dryomov + +commit 75dbb685f4e8786c33ddef8279bab0eadfb0731f upstream. + +request_reinit() is not only ugly as the comment rightfully suggests, +but also unsafe. Even though it is called with osdc->lock held for +write in all cases, resetting the OSD request refcount can still race +with handle_reply() and result in use-after-free. Taking linger ping +as an example: + + handle_timeout thread handle_reply thread + + down_read(&osdc->lock) + req = lookup_request(...) + ... + finish_request(req) # unregisters + up_read(&osdc->lock) + __complete_request(req) + linger_ping_cb(req) + + # req->r_kref == 2 because handle_reply still holds its ref + + down_write(&osdc->lock) + send_linger_ping(lreq) + req = lreq->ping_req # same req + # cancel_linger_request is NOT + # called - handle_reply already + # unregistered + request_reinit(req) + WARN_ON(req->r_kref != 1) # fires + request_init(req) + kref_init(req->r_kref) + + # req->r_kref == 1 after kref_init + + ceph_osdc_put_request(req) + kref_put(req->r_kref) + + # req->r_kref == 0 after kref_put, req is freed + + !!! + +This happens because send_linger_ping() always (re)uses the same OSD +request for watch ping requests, relying on cancel_linger_request() to +unregister it from the OSD client and rip its messages out from the +messenger. send_linger() does the same for watch/notify registration +and watch reconnect requests. Unfortunately cancel_request() doesn't +guarantee that after it returns the OSD client would be completely done +with the OSD request -- a ref could still be held and the callback (if +specified) could still be invoked too. + +The original motivation for request_reinit() was inability to deal with +allocation failures in send_linger() and send_linger_ping(). Switching +to using osdc->req_mempool (currently only used by CephFS) respects that +and allows us to get rid of request_reinit(). + +Cc: stable@vger.kernel.org +Signed-off-by: Ilya Dryomov +Reviewed-by: Xiubo Li +Acked-by: Jeff Layton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/ceph/osd_client.h | 3 + net/ceph/osd_client.c | 302 +++++++++++++++------------------------- + 2 files changed, 122 insertions(+), 183 deletions(-) + +--- a/include/linux/ceph/osd_client.h ++++ b/include/linux/ceph/osd_client.h +@@ -287,6 +287,9 @@ struct ceph_osd_linger_request { + rados_watcherrcb_t errcb; + void *data; + ++ struct ceph_pagelist *request_pl; ++ struct page **notify_id_pages; ++ + struct page ***preply_pages; + size_t *preply_len; + }; +--- a/net/ceph/osd_client.c ++++ b/net/ceph/osd_client.c +@@ -537,43 +537,6 @@ static void request_init(struct ceph_osd + target_init(&req->r_t); + } + +-/* +- * This is ugly, but it allows us to reuse linger registration and ping +- * requests, keeping the structure of the code around send_linger{_ping}() +- * reasonable. Setting up a min_nr=2 mempool for each linger request +- * and dealing with copying ops (this blasts req only, watch op remains +- * intact) isn't any better. +- */ +-static void request_reinit(struct ceph_osd_request *req) +-{ +- struct ceph_osd_client *osdc = req->r_osdc; +- bool mempool = req->r_mempool; +- unsigned int num_ops = req->r_num_ops; +- u64 snapid = req->r_snapid; +- struct ceph_snap_context *snapc = req->r_snapc; +- bool linger = req->r_linger; +- struct ceph_msg *request_msg = req->r_request; +- struct ceph_msg *reply_msg = req->r_reply; +- +- dout("%s req %p\n", __func__, req); +- WARN_ON(kref_read(&req->r_kref) != 1); +- request_release_checks(req); +- +- WARN_ON(kref_read(&request_msg->kref) != 1); +- WARN_ON(kref_read(&reply_msg->kref) != 1); +- target_destroy(&req->r_t); +- +- request_init(req); +- req->r_osdc = osdc; +- req->r_mempool = mempool; +- req->r_num_ops = num_ops; +- req->r_snapid = snapid; +- req->r_snapc = snapc; +- req->r_linger = linger; +- req->r_request = request_msg; +- req->r_reply = reply_msg; +-} +- + struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, + struct ceph_snap_context *snapc, + unsigned int num_ops, +@@ -918,14 +881,30 @@ EXPORT_SYMBOL(osd_req_op_xattr_init); + * @watch_opcode: CEPH_OSD_WATCH_OP_* + */ + static void osd_req_op_watch_init(struct ceph_osd_request *req, int which, +- u64 cookie, u8 watch_opcode) ++ u8 watch_opcode, u64 cookie, u32 gen) + { + struct ceph_osd_req_op *op; + + op = osd_req_op_init(req, which, CEPH_OSD_OP_WATCH, 0); + op->watch.cookie = cookie; + op->watch.op = watch_opcode; +- op->watch.gen = 0; ++ op->watch.gen = gen; ++} ++ ++/* ++ * prot_ver, timeout and notify payload (may be empty) should already be ++ * encoded in @request_pl ++ */ ++static void osd_req_op_notify_init(struct ceph_osd_request *req, int which, ++ u64 cookie, struct ceph_pagelist *request_pl) ++{ ++ struct ceph_osd_req_op *op; ++ ++ op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); ++ op->notify.cookie = cookie; ++ ++ ceph_osd_data_pagelist_init(&op->notify.request_data, request_pl); ++ op->indata_len = request_pl->length; + } + + /* +@@ -2727,10 +2706,13 @@ static void linger_release(struct kref * + WARN_ON(!list_empty(&lreq->pending_lworks)); + WARN_ON(lreq->osd); + +- if (lreq->reg_req) +- ceph_osdc_put_request(lreq->reg_req); +- if (lreq->ping_req) +- ceph_osdc_put_request(lreq->ping_req); ++ if (lreq->request_pl) ++ ceph_pagelist_release(lreq->request_pl); ++ if (lreq->notify_id_pages) ++ ceph_release_page_vector(lreq->notify_id_pages, 1); ++ ++ ceph_osdc_put_request(lreq->reg_req); ++ ceph_osdc_put_request(lreq->ping_req); + target_destroy(&lreq->t); + kfree(lreq); + } +@@ -2999,6 +2981,12 @@ static void linger_commit_cb(struct ceph + struct ceph_osd_linger_request *lreq = req->r_priv; + + mutex_lock(&lreq->lock); ++ if (req != lreq->reg_req) { ++ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", ++ __func__, lreq, lreq->linger_id, req, lreq->reg_req); ++ goto out; ++ } ++ + dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq, + lreq->linger_id, req->r_result); + linger_reg_commit_complete(lreq, req->r_result); +@@ -3022,6 +3010,7 @@ static void linger_commit_cb(struct ceph + } + } + ++out: + mutex_unlock(&lreq->lock); + linger_put(lreq); + } +@@ -3044,6 +3033,12 @@ static void linger_reconnect_cb(struct c + struct ceph_osd_linger_request *lreq = req->r_priv; + + mutex_lock(&lreq->lock); ++ if (req != lreq->reg_req) { ++ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", ++ __func__, lreq, lreq->linger_id, req, lreq->reg_req); ++ goto out; ++ } ++ + dout("%s lreq %p linger_id %llu result %d last_error %d\n", __func__, + lreq, lreq->linger_id, req->r_result, lreq->last_error); + if (req->r_result < 0) { +@@ -3053,46 +3048,64 @@ static void linger_reconnect_cb(struct c + } + } + ++out: + mutex_unlock(&lreq->lock); + linger_put(lreq); + } + + static void send_linger(struct ceph_osd_linger_request *lreq) + { +- struct ceph_osd_request *req = lreq->reg_req; +- struct ceph_osd_req_op *op = &req->r_ops[0]; ++ struct ceph_osd_client *osdc = lreq->osdc; ++ struct ceph_osd_request *req; ++ int ret; + +- verify_osdc_wrlocked(req->r_osdc); ++ verify_osdc_wrlocked(osdc); ++ mutex_lock(&lreq->lock); + dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); + +- if (req->r_osd) +- cancel_linger_request(req); ++ if (lreq->reg_req) { ++ if (lreq->reg_req->r_osd) ++ cancel_linger_request(lreq->reg_req); ++ ceph_osdc_put_request(lreq->reg_req); ++ } ++ ++ req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO); ++ BUG_ON(!req); + +- request_reinit(req); + target_copy(&req->r_t, &lreq->t); + req->r_mtime = lreq->mtime; + +- mutex_lock(&lreq->lock); + if (lreq->is_watch && lreq->committed) { +- WARN_ON(op->op != CEPH_OSD_OP_WATCH || +- op->watch.cookie != lreq->linger_id); +- op->watch.op = CEPH_OSD_WATCH_OP_RECONNECT; +- op->watch.gen = ++lreq->register_gen; ++ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_RECONNECT, ++ lreq->linger_id, ++lreq->register_gen); + dout("lreq %p reconnect register_gen %u\n", lreq, +- op->watch.gen); ++ req->r_ops[0].watch.gen); + req->r_callback = linger_reconnect_cb; + } else { +- if (!lreq->is_watch) ++ if (lreq->is_watch) { ++ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_WATCH, ++ lreq->linger_id, 0); ++ } else { + lreq->notify_id = 0; +- else +- WARN_ON(op->watch.op != CEPH_OSD_WATCH_OP_WATCH); ++ ++ refcount_inc(&lreq->request_pl->refcnt); ++ osd_req_op_notify_init(req, 0, lreq->linger_id, ++ lreq->request_pl); ++ ceph_osd_data_pages_init( ++ osd_req_op_data(req, 0, notify, response_data), ++ lreq->notify_id_pages, PAGE_SIZE, 0, false, false); ++ } + dout("lreq %p register\n", lreq); + req->r_callback = linger_commit_cb; + } +- mutex_unlock(&lreq->lock); ++ ++ ret = ceph_osdc_alloc_messages(req, GFP_NOIO); ++ BUG_ON(ret); + + req->r_priv = linger_get(lreq); + req->r_linger = true; ++ lreq->reg_req = req; ++ mutex_unlock(&lreq->lock); + + submit_request(req, true); + } +@@ -3102,6 +3115,12 @@ static void linger_ping_cb(struct ceph_o + struct ceph_osd_linger_request *lreq = req->r_priv; + + mutex_lock(&lreq->lock); ++ if (req != lreq->ping_req) { ++ dout("%s lreq %p linger_id %llu unknown req (%p != %p)\n", ++ __func__, lreq, lreq->linger_id, req, lreq->ping_req); ++ goto out; ++ } ++ + dout("%s lreq %p linger_id %llu result %d ping_sent %lu last_error %d\n", + __func__, lreq, lreq->linger_id, req->r_result, lreq->ping_sent, + lreq->last_error); +@@ -3117,6 +3136,7 @@ static void linger_ping_cb(struct ceph_o + lreq->register_gen, req->r_ops[0].watch.gen); + } + ++out: + mutex_unlock(&lreq->lock); + linger_put(lreq); + } +@@ -3124,8 +3144,8 @@ static void linger_ping_cb(struct ceph_o + static void send_linger_ping(struct ceph_osd_linger_request *lreq) + { + struct ceph_osd_client *osdc = lreq->osdc; +- struct ceph_osd_request *req = lreq->ping_req; +- struct ceph_osd_req_op *op = &req->r_ops[0]; ++ struct ceph_osd_request *req; ++ int ret; + + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) { + dout("%s PAUSERD\n", __func__); +@@ -3137,19 +3157,26 @@ static void send_linger_ping(struct ceph + __func__, lreq, lreq->linger_id, lreq->ping_sent, + lreq->register_gen); + +- if (req->r_osd) +- cancel_linger_request(req); ++ if (lreq->ping_req) { ++ if (lreq->ping_req->r_osd) ++ cancel_linger_request(lreq->ping_req); ++ ceph_osdc_put_request(lreq->ping_req); ++ } + +- request_reinit(req); +- target_copy(&req->r_t, &lreq->t); ++ req = ceph_osdc_alloc_request(osdc, NULL, 1, true, GFP_NOIO); ++ BUG_ON(!req); + +- WARN_ON(op->op != CEPH_OSD_OP_WATCH || +- op->watch.cookie != lreq->linger_id || +- op->watch.op != CEPH_OSD_WATCH_OP_PING); +- op->watch.gen = lreq->register_gen; ++ target_copy(&req->r_t, &lreq->t); ++ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_PING, lreq->linger_id, ++ lreq->register_gen); + req->r_callback = linger_ping_cb; ++ ++ ret = ceph_osdc_alloc_messages(req, GFP_NOIO); ++ BUG_ON(ret); ++ + req->r_priv = linger_get(lreq); + req->r_linger = true; ++ lreq->ping_req = req; + + ceph_osdc_get_request(req); + account_request(req); +@@ -3165,12 +3192,6 @@ static void linger_submit(struct ceph_os + + down_write(&osdc->lock); + linger_register(lreq); +- if (lreq->is_watch) { +- lreq->reg_req->r_ops[0].watch.cookie = lreq->linger_id; +- lreq->ping_req->r_ops[0].watch.cookie = lreq->linger_id; +- } else { +- lreq->reg_req->r_ops[0].notify.cookie = lreq->linger_id; +- } + + calc_target(osdc, &lreq->t, false); + osd = lookup_create_osd(osdc, lreq->t.osd, true); +@@ -3202,9 +3223,9 @@ static void cancel_linger_map_check(stru + */ + static void __linger_cancel(struct ceph_osd_linger_request *lreq) + { +- if (lreq->is_watch && lreq->ping_req->r_osd) ++ if (lreq->ping_req && lreq->ping_req->r_osd) + cancel_linger_request(lreq->ping_req); +- if (lreq->reg_req->r_osd) ++ if (lreq->reg_req && lreq->reg_req->r_osd) + cancel_linger_request(lreq->reg_req); + cancel_linger_map_check(lreq); + unlink_linger(lreq->osd, lreq); +@@ -4653,43 +4674,6 @@ again: + } + EXPORT_SYMBOL(ceph_osdc_sync); + +-static struct ceph_osd_request * +-alloc_linger_request(struct ceph_osd_linger_request *lreq) +-{ +- struct ceph_osd_request *req; +- +- req = ceph_osdc_alloc_request(lreq->osdc, NULL, 1, false, GFP_NOIO); +- if (!req) +- return NULL; +- +- ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid); +- ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); +- return req; +-} +- +-static struct ceph_osd_request * +-alloc_watch_request(struct ceph_osd_linger_request *lreq, u8 watch_opcode) +-{ +- struct ceph_osd_request *req; +- +- req = alloc_linger_request(lreq); +- if (!req) +- return NULL; +- +- /* +- * Pass 0 for cookie because we don't know it yet, it will be +- * filled in by linger_submit(). +- */ +- osd_req_op_watch_init(req, 0, 0, watch_opcode); +- +- if (ceph_osdc_alloc_messages(req, GFP_NOIO)) { +- ceph_osdc_put_request(req); +- return NULL; +- } +- +- return req; +-} +- + /* + * Returns a handle, caller owns a ref. + */ +@@ -4719,18 +4703,6 @@ ceph_osdc_watch(struct ceph_osd_client * + lreq->t.flags = CEPH_OSD_FLAG_WRITE; + ktime_get_real_ts64(&lreq->mtime); + +- lreq->reg_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_WATCH); +- if (!lreq->reg_req) { +- ret = -ENOMEM; +- goto err_put_lreq; +- } +- +- lreq->ping_req = alloc_watch_request(lreq, CEPH_OSD_WATCH_OP_PING); +- if (!lreq->ping_req) { +- ret = -ENOMEM; +- goto err_put_lreq; +- } +- + linger_submit(lreq); + ret = linger_reg_commit_wait(lreq); + if (ret) { +@@ -4768,8 +4740,8 @@ int ceph_osdc_unwatch(struct ceph_osd_cl + ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc); + req->r_flags = CEPH_OSD_FLAG_WRITE; + ktime_get_real_ts64(&req->r_mtime); +- osd_req_op_watch_init(req, 0, lreq->linger_id, +- CEPH_OSD_WATCH_OP_UNWATCH); ++ osd_req_op_watch_init(req, 0, CEPH_OSD_WATCH_OP_UNWATCH, ++ lreq->linger_id, 0); + + ret = ceph_osdc_alloc_messages(req, GFP_NOIO); + if (ret) +@@ -4855,35 +4827,6 @@ out_put_req: + } + EXPORT_SYMBOL(ceph_osdc_notify_ack); + +-static int osd_req_op_notify_init(struct ceph_osd_request *req, int which, +- u64 cookie, u32 prot_ver, u32 timeout, +- void *payload, u32 payload_len) +-{ +- struct ceph_osd_req_op *op; +- struct ceph_pagelist *pl; +- int ret; +- +- op = osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0); +- op->notify.cookie = cookie; +- +- pl = ceph_pagelist_alloc(GFP_NOIO); +- if (!pl) +- return -ENOMEM; +- +- ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */ +- ret |= ceph_pagelist_encode_32(pl, timeout); +- ret |= ceph_pagelist_encode_32(pl, payload_len); +- ret |= ceph_pagelist_append(pl, payload, payload_len); +- if (ret) { +- ceph_pagelist_release(pl); +- return -ENOMEM; +- } +- +- ceph_osd_data_pagelist_init(&op->notify.request_data, pl); +- op->indata_len = pl->length; +- return 0; +-} +- + /* + * @timeout: in seconds + * +@@ -4902,7 +4845,6 @@ int ceph_osdc_notify(struct ceph_osd_cli + size_t *preply_len) + { + struct ceph_osd_linger_request *lreq; +- struct page **pages; + int ret; + + WARN_ON(!timeout); +@@ -4915,41 +4857,35 @@ int ceph_osdc_notify(struct ceph_osd_cli + if (!lreq) + return -ENOMEM; + +- lreq->preply_pages = preply_pages; +- lreq->preply_len = preply_len; +- +- ceph_oid_copy(&lreq->t.base_oid, oid); +- ceph_oloc_copy(&lreq->t.base_oloc, oloc); +- lreq->t.flags = CEPH_OSD_FLAG_READ; +- +- lreq->reg_req = alloc_linger_request(lreq); +- if (!lreq->reg_req) { ++ lreq->request_pl = ceph_pagelist_alloc(GFP_NOIO); ++ if (!lreq->request_pl) { + ret = -ENOMEM; + goto out_put_lreq; + } + +- /* +- * Pass 0 for cookie because we don't know it yet, it will be +- * filled in by linger_submit(). +- */ +- ret = osd_req_op_notify_init(lreq->reg_req, 0, 0, 1, timeout, +- payload, payload_len); +- if (ret) ++ ret = ceph_pagelist_encode_32(lreq->request_pl, 1); /* prot_ver */ ++ ret |= ceph_pagelist_encode_32(lreq->request_pl, timeout); ++ ret |= ceph_pagelist_encode_32(lreq->request_pl, payload_len); ++ ret |= ceph_pagelist_append(lreq->request_pl, payload, payload_len); ++ if (ret) { ++ ret = -ENOMEM; + goto out_put_lreq; ++ } + + /* for notify_id */ +- pages = ceph_alloc_page_vector(1, GFP_NOIO); +- if (IS_ERR(pages)) { +- ret = PTR_ERR(pages); ++ lreq->notify_id_pages = ceph_alloc_page_vector(1, GFP_NOIO); ++ if (IS_ERR(lreq->notify_id_pages)) { ++ ret = PTR_ERR(lreq->notify_id_pages); ++ lreq->notify_id_pages = NULL; + goto out_put_lreq; + } +- ceph_osd_data_pages_init(osd_req_op_data(lreq->reg_req, 0, notify, +- response_data), +- pages, PAGE_SIZE, 0, false, true); + +- ret = ceph_osdc_alloc_messages(lreq->reg_req, GFP_NOIO); +- if (ret) +- goto out_put_lreq; ++ lreq->preply_pages = preply_pages; ++ lreq->preply_len = preply_len; ++ ++ ceph_oid_copy(&lreq->t.base_oid, oid); ++ ceph_oloc_copy(&lreq->t.base_oloc, oloc); ++ lreq->t.flags = CEPH_OSD_FLAG_READ; + + linger_submit(lreq); + ret = linger_reg_commit_wait(lreq); diff --git a/queue-5.17/mmc-core-fix-busy-polling-for-mmc_send_op_cond-again.patch b/queue-5.17/mmc-core-fix-busy-polling-for-mmc_send_op_cond-again.patch new file mode 100644 index 00000000000..caa53063647 --- /dev/null +++ b/queue-5.17/mmc-core-fix-busy-polling-for-mmc_send_op_cond-again.patch @@ -0,0 +1,39 @@ +From e949dee3625e1b0ef2e40d9aa09c2995281b12f6 Mon Sep 17 00:00:00 2001 +From: Ulf Hansson +Date: Tue, 17 May 2022 12:10:46 +0200 +Subject: mmc: core: Fix busy polling for MMC_SEND_OP_COND again + +From: Ulf Hansson + +commit e949dee3625e1b0ef2e40d9aa09c2995281b12f6 upstream. + +It turned out that polling period for MMC_SEND_OP_COND, that currently is +set to 1ms, still isn't sufficient. In particular a Micron eMMC on a +Beaglebone platform, is reported to sometimes fail to initialize. + +Additional test, shows that extending the period to 4ms is working fine, so +let's make that change. + +Reported-by: Jean Rene Dawin +Tested-by: Jean Rene Dawin +Fixes: 1760fdb6fe9f (mmc: core: Restore (almost) the busy polling for MMC_SEND_OP_COND") +Fixes: 76bfc7ccc2fa ("mmc: core: adjust polling interval for CMD1") +Cc: stable@vger.kernel.org +Signed-off-by: Ulf Hansson +Link: https://lore.kernel.org/r/20220517101046.27512-1-ulf.hansson@linaro.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/mmc/core/mmc_ops.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/mmc/core/mmc_ops.c ++++ b/drivers/mmc/core/mmc_ops.c +@@ -21,7 +21,7 @@ + + #define MMC_BKOPS_TIMEOUT_MS (120 * 1000) /* 120s */ + #define MMC_SANITIZE_TIMEOUT_MS (240 * 1000) /* 240s */ +-#define MMC_OP_COND_PERIOD_US (1 * 1000) /* 1ms */ ++#define MMC_OP_COND_PERIOD_US (4 * 1000) /* 4ms */ + #define MMC_OP_COND_TIMEOUT_MS 1000 /* 1s */ + + static const u8 tuning_blk_pattern_4bit[] = { diff --git a/queue-5.17/pci-pm-avoid-putting-elo-i2-pcie-ports-in-d3cold.patch b/queue-5.17/pci-pm-avoid-putting-elo-i2-pcie-ports-in-d3cold.patch new file mode 100644 index 00000000000..bb70a2e3d70 --- /dev/null +++ b/queue-5.17/pci-pm-avoid-putting-elo-i2-pcie-ports-in-d3cold.patch @@ -0,0 +1,51 @@ +From 92597f97a40bf661bebceb92e26ff87c76d562d4 Mon Sep 17 00:00:00 2001 +From: "Rafael J. Wysocki" +Date: Thu, 31 Mar 2022 19:38:51 +0200 +Subject: PCI/PM: Avoid putting Elo i2 PCIe Ports in D3cold + +From: Rafael J. Wysocki + +commit 92597f97a40bf661bebceb92e26ff87c76d562d4 upstream. + +If a Root Port on Elo i2 is put into D3cold and then back into D0, the +downstream device becomes permanently inaccessible, so add a bridge D3 DMI +quirk for that system. + +This was exposed by 14858dcc3b35 ("PCI: Use pci_update_current_state() in +pci_enable_device_flags()"), but before that commit the Root Port in +question had never been put into D3cold for real due to a mismatch between +its power state retrieved from the PCI_PM_CTRL register (which was +accessible even though the platform firmware indicated that the port was in +D3cold) and the state of an ACPI power resource involved in its power +management. + +BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215715 +Link: https://lore.kernel.org/r/11980172.O9o76ZdvQC@kreacher +Reported-by: Stefan Gottwald +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Bjorn Helgaas +Cc: stable@vger.kernel.org # v5.15+ +Signed-off-by: Greg Kroah-Hartman +--- + drivers/pci/pci.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/drivers/pci/pci.c ++++ b/drivers/pci/pci.c +@@ -2920,6 +2920,16 @@ static const struct dmi_system_id bridge + DMI_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."), + DMI_MATCH(DMI_BOARD_NAME, "X299 DESIGNARE EX-CF"), + }, ++ /* ++ * Downstream device is not accessible after putting a root port ++ * into D3cold and back into D0 on Elo i2. ++ */ ++ .ident = "Elo i2", ++ .matches = { ++ DMI_MATCH(DMI_SYS_VENDOR, "Elo Touch Solutions"), ++ DMI_MATCH(DMI_PRODUCT_NAME, "Elo i2"), ++ DMI_MATCH(DMI_PRODUCT_VERSION, "RevB"), ++ }, + }, + #endif + { } diff --git a/queue-5.17/perf-fix-sys_perf_event_open-race-against-self.patch b/queue-5.17/perf-fix-sys_perf_event_open-race-against-self.patch new file mode 100644 index 00000000000..c6f91546ffa --- /dev/null +++ b/queue-5.17/perf-fix-sys_perf_event_open-race-against-self.patch @@ -0,0 +1,71 @@ +From 3ac6487e584a1eb54071dbe1212e05b884136704 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Fri, 20 May 2022 20:38:06 +0200 +Subject: perf: Fix sys_perf_event_open() race against self + +From: Peter Zijlstra + +commit 3ac6487e584a1eb54071dbe1212e05b884136704 upstream. + +Norbert reported that it's possible to race sys_perf_event_open() such +that the looser ends up in another context from the group leader, +triggering many WARNs. + +The move_group case checks for races against itself, but the +!move_group case doesn't, seemingly relying on the previous +group_leader->ctx == ctx check. However, that check is racy due to not +holding any locks at that time. + +Therefore, re-check the result after acquiring locks and bailing +if they no longer match. + +Additionally, clarify the not_move_group case from the +move_group-vs-move_group race. + +Fixes: f63a8daa5812 ("perf: Fix event->ctx locking") +Reported-by: Norbert Slusarek +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + kernel/events/core.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -12327,6 +12327,9 @@ SYSCALL_DEFINE5(perf_event_open, + * Do not allow to attach to a group in a different task + * or CPU context. If we're moving SW events, we'll fix + * this up later, so allow that. ++ * ++ * Racy, not holding group_leader->ctx->mutex, see comment with ++ * perf_event_ctx_lock(). + */ + if (!move_group && group_leader->ctx != ctx) + goto err_context; +@@ -12392,6 +12395,7 @@ SYSCALL_DEFINE5(perf_event_open, + } else { + perf_event_ctx_unlock(group_leader, gctx); + move_group = 0; ++ goto not_move_group; + } + } + +@@ -12408,7 +12412,17 @@ SYSCALL_DEFINE5(perf_event_open, + } + } else { + mutex_lock(&ctx->mutex); ++ ++ /* ++ * Now that we hold ctx->lock, (re)validate group_leader->ctx == ctx, ++ * see the group_leader && !move_group test earlier. ++ */ ++ if (group_leader && group_leader->ctx != ctx) { ++ err = -EINVAL; ++ goto err_locked; ++ } + } ++not_move_group: + + if (ctx->task == TASK_TOMBSTONE) { + err = -ESRCH; diff --git a/queue-5.17/revert-can-m_can-pci-use-custom-bit-timings-for-elkhart-lake.patch b/queue-5.17/revert-can-m_can-pci-use-custom-bit-timings-for-elkhart-lake.patch new file mode 100644 index 00000000000..cc88661c2ec --- /dev/null +++ b/queue-5.17/revert-can-m_can-pci-use-custom-bit-timings-for-elkhart-lake.patch @@ -0,0 +1,133 @@ +From 14ea4a470494528c7e88da5c4116c24eb027059f Mon Sep 17 00:00:00 2001 +From: Jarkko Nikula +Date: Thu, 12 May 2022 15:41:43 +0300 +Subject: Revert "can: m_can: pci: use custom bit timings for Elkhart Lake" + +From: Jarkko Nikula + +commit 14ea4a470494528c7e88da5c4116c24eb027059f upstream. + +This reverts commit 0e8ffdf3b86dfd44b651f91b12fcae76c25c453b. + +Commit 0e8ffdf3b86d ("can: m_can: pci: use custom bit timings for +Elkhart Lake") broke the test case using bitrate switching. + +| ip link set can0 up type can bitrate 500000 dbitrate 4000000 fd on +| ip link set can1 up type can bitrate 500000 dbitrate 4000000 fd on +| candump can0 & +| cangen can1 -I 0x800 -L 64 -e -fb \ +| -D 11223344deadbeef55667788feedf00daabbccdd44332211 -n 1 -v -v + +Above commit does everything correctly according to the datasheet. +However datasheet wasn't correct. + +I got confirmation from hardware engineers that the actual CAN +hardware on Intel Elkhart Lake is based on M_CAN version v3.2.0. +Datasheet was mirroring values from an another specification which was +based on earlier M_CAN version leading to wrong bit timings. + +Therefore revert the commit and switch back to common bit timings. + +Fixes: ea4c1787685d ("can: m_can: pci: use custom bit timings for Elkhart Lake") +Link: https://lore.kernel.org/all/20220512124144.536850-1-jarkko.nikula@linux.intel.com +Signed-off-by: Jarkko Nikula +Reported-by: Chee Hou Ong +Reported-by: Aman Kumar +Reported-by: Pallavi Kumari +Cc: # v5.16+ +Signed-off-by: Marc Kleine-Budde +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/can/m_can/m_can_pci.c | 48 +++----------------------------------- + 1 file changed, 4 insertions(+), 44 deletions(-) + +--- a/drivers/net/can/m_can/m_can_pci.c ++++ b/drivers/net/can/m_can/m_can_pci.c +@@ -18,14 +18,9 @@ + + #define M_CAN_PCI_MMIO_BAR 0 + ++#define M_CAN_CLOCK_FREQ_EHL 200000000 + #define CTL_CSR_INT_CTL_OFFSET 0x508 + +-struct m_can_pci_config { +- const struct can_bittiming_const *bit_timing; +- const struct can_bittiming_const *data_timing; +- unsigned int clock_freq; +-}; +- + struct m_can_pci_priv { + struct m_can_classdev cdev; + +@@ -89,40 +84,9 @@ static struct m_can_ops m_can_pci_ops = + .read_fifo = iomap_read_fifo, + }; + +-static const struct can_bittiming_const m_can_bittiming_const_ehl = { +- .name = KBUILD_MODNAME, +- .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */ +- .tseg1_max = 64, +- .tseg2_min = 1, /* Time segment 2 = phase_seg2 */ +- .tseg2_max = 128, +- .sjw_max = 128, +- .brp_min = 1, +- .brp_max = 512, +- .brp_inc = 1, +-}; +- +-static const struct can_bittiming_const m_can_data_bittiming_const_ehl = { +- .name = KBUILD_MODNAME, +- .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */ +- .tseg1_max = 16, +- .tseg2_min = 1, /* Time segment 2 = phase_seg2 */ +- .tseg2_max = 8, +- .sjw_max = 4, +- .brp_min = 1, +- .brp_max = 32, +- .brp_inc = 1, +-}; +- +-static const struct m_can_pci_config m_can_pci_ehl = { +- .bit_timing = &m_can_bittiming_const_ehl, +- .data_timing = &m_can_data_bittiming_const_ehl, +- .clock_freq = 200000000, +-}; +- + static int m_can_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) + { + struct device *dev = &pci->dev; +- const struct m_can_pci_config *cfg; + struct m_can_classdev *mcan_class; + struct m_can_pci_priv *priv; + void __iomem *base; +@@ -150,8 +114,6 @@ static int m_can_pci_probe(struct pci_de + if (!mcan_class) + return -ENOMEM; + +- cfg = (const struct m_can_pci_config *)id->driver_data; +- + priv = cdev_to_priv(mcan_class); + + priv->base = base; +@@ -163,9 +125,7 @@ static int m_can_pci_probe(struct pci_de + mcan_class->dev = &pci->dev; + mcan_class->net->irq = pci_irq_vector(pci, 0); + mcan_class->pm_clock_support = 1; +- mcan_class->bit_timing = cfg->bit_timing; +- mcan_class->data_timing = cfg->data_timing; +- mcan_class->can.clock.freq = cfg->clock_freq; ++ mcan_class->can.clock.freq = id->driver_data; + mcan_class->ops = &m_can_pci_ops; + + pci_set_drvdata(pci, mcan_class); +@@ -218,8 +178,8 @@ static SIMPLE_DEV_PM_OPS(m_can_pci_pm_op + m_can_pci_suspend, m_can_pci_resume); + + static const struct pci_device_id m_can_pci_id_table[] = { +- { PCI_VDEVICE(INTEL, 0x4bc1), (kernel_ulong_t)&m_can_pci_ehl, }, +- { PCI_VDEVICE(INTEL, 0x4bc2), (kernel_ulong_t)&m_can_pci_ehl, }, ++ { PCI_VDEVICE(INTEL, 0x4bc1), M_CAN_CLOCK_FREQ_EHL, }, ++ { PCI_VDEVICE(INTEL, 0x4bc2), M_CAN_CLOCK_FREQ_EHL, }, + { } /* Terminating Entry */ + }; + MODULE_DEVICE_TABLE(pci, m_can_pci_id_table); diff --git a/queue-5.17/selinux-fix-bad-cleanup-on-error-in-hashtab_duplicate.patch b/queue-5.17/selinux-fix-bad-cleanup-on-error-in-hashtab_duplicate.patch new file mode 100644 index 00000000000..74e3e7d038d --- /dev/null +++ b/queue-5.17/selinux-fix-bad-cleanup-on-error-in-hashtab_duplicate.patch @@ -0,0 +1,36 @@ +From 6254bd3db316c9ccb3b05caa8b438be63245466f Mon Sep 17 00:00:00 2001 +From: Ondrej Mosnacek +Date: Tue, 17 May 2022 14:08:16 +0200 +Subject: selinux: fix bad cleanup on error in hashtab_duplicate() + +From: Ondrej Mosnacek + +commit 6254bd3db316c9ccb3b05caa8b438be63245466f upstream. + +The code attempts to free the 'new' pointer using kmem_cache_free(), +which is wrong because this function isn't responsible of freeing it. +Instead, the function should free new->htable and clear the contents of +*new (to prevent double-free). + +Cc: stable@vger.kernel.org +Fixes: c7c556f1e81b ("selinux: refactor changing booleans") +Reported-by: Wander Lairson Costa +Signed-off-by: Ondrej Mosnacek +Signed-off-by: Paul Moore +Signed-off-by: Greg Kroah-Hartman +--- + security/selinux/ss/hashtab.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/security/selinux/ss/hashtab.c ++++ b/security/selinux/ss/hashtab.c +@@ -179,7 +179,8 @@ int hashtab_duplicate(struct hashtab *ne + kmem_cache_free(hashtab_node_cachep, cur); + } + } +- kmem_cache_free(hashtab_node_cachep, new); ++ kfree(new->htable); ++ memset(new, 0, sizeof(*new)); + return -ENOMEM; + } + diff --git a/queue-5.17/series b/queue-5.17/series index 51afa8e04e3..d8b041d7d3b 100644 --- a/queue-5.17/series +++ b/queue-5.17/series @@ -46,3 +46,22 @@ nilfs2-fix-lockdep-warnings-during-disk-space-reclam.patch alsa-usb-audio-restore-rane-sl-1-quirk.patch alsa-wavefront-proper-check-of-get_user-error.patch alsa-hda-realtek-add-quirk-for-tongfang-devices-with-pop-noise.patch +perf-fix-sys_perf_event_open-race-against-self.patch +selinux-fix-bad-cleanup-on-error-in-hashtab_duplicate.patch +audit-io_uring-io-wq-call-__audit_uring_exit-for-dummy-contexts.patch +fix-double-fget-in-vhost_net_set_backend.patch +pci-pm-avoid-putting-elo-i2-pcie-ports-in-d3cold.patch +revert-can-m_can-pci-use-custom-bit-timings-for-elkhart-lake.patch +kvm-arm64-vgic-v3-consistently-populate-id_aa64pfr0_el1.gic.patch +kvm-x86-mmu-update-number-of-zapped-pages-even-if-page-list-is-stable.patch +kvm-free-new-dirty-bitmap-if-creating-a-new-memslot-fails.patch +arm64-paravirt-use-rcu-read-locks-to-guard-stolen_time.patch +arm64-mte-ensure-the-cleared-tags-are-visible-before-setting-the-pte.patch +crypto-qcom-rng-fix-infinite-loop-on-requests-not-multiple-of-word_sz.patch +mmc-core-fix-busy-polling-for-mmc_send_op_cond-again.patch +libceph-fix-potential-use-after-free-on-linger-ping-and-resends.patch +drm-amd-don-t-reset-dgpus-if-the-system-is-going-to-s2idle.patch +drm-i915-dmc-add-mmio-range-restrictions.patch +drm-dp-mst-fix-a-possible-memory-leak-in-fetch_monitor_name.patch +dma-buf-fix-use-of-dma_buf_set_name_-a-b-in-userspace.patch +dma-buf-ensure-unique-directory-name-for-dmabuf-stats.patch