From 5912e0f15bc68699af86042f1569055a24a5254c Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 8 Dec 2022 08:53:35 -0500 Subject: [PATCH] Fixes for 5.10 Signed-off-by: Sasha Levin --- .../9p-fd-use-p9_hdrsz-for-header-size.patch | 56 +++ ...n-check-logical-size-for-buffer-size.patch | 53 +++ ...ction-prototype-mismatch-in-snd_seq_.patch | 77 ++++ ...fix-stacktraces-for-tracepoint-event.patch | 70 ++++ ...-fix-no-mmu-zero_page-implementation.patch | 141 +++++++ ...-disable-arm_global_timer-on-rk3066-.patch | 64 +++ ...-rockchip-fix-ir-receiver-node-names.patch | 36 ++ ...ckchip-fix-node-name-for-hym8563-rtc.patch | 91 ++++ ...hip-rk3188-fix-lcdc1-rgb24-node-name.patch | 36 ++ ...-rockchip-fix-ir-receiver-node-names.patch | 36 ++ ...ip-keep-i2s1-disabled-for-gpio-funct.patch | 36 ++ ...pcm-add-null-check-in-be-reparenting.patch | 37 ++ ...-for-updated-value-of-wm8962_clockin.patch | 47 +++ ...-unaligned-encoded-writes-when-attem.patch | 161 ++++++++ ...on-use-kzalloc-in-fbcon_prepare_logo.patch | 93 +++++ ...core-take-mmap_lock-in-vb2_get_unmap.patch | 265 ++++++++++++ ..._page_prepare-in-isolate_migratepage.patch | 318 ++++++++++++++ ...o-page-isolation-first-in-compaction.patch | 266 ++++++++++++ ...x-gup-fast-interaction-by-sending-ip.patch | 112 +++++ ...voke-mmu-notifiers-in-shmem-file-col.patch | 90 ++++ ...ke-the-right-locks-for-page-table-re.patch | 141 +++++++ .../mm-lru-introduce-testclearpagelru.patch | 223 ++++++++++ ...rate-fix-thp-s-mapcount-on-isolation.patch | 89 ++++ ...ck-remove-__munlock_isolate_lru_page.patch | 104 +++++ ...ove-lru_lock-on-testclearpagemlocked.patch | 115 ++++++ ...n-__isolate_lru_page_prepare-cleanup.patch | 183 ++++++++ ...i_wwan-add-u-blox-0x1342-composition.patch | 53 +++ ...slg51000-wait-after-asserting-cs-pin.patch | 44 ++ ...0-fix-get-status-of-twl6032-regulato.patch | 69 ++++ ...-return-value-from-mc146818_get_time.patch | 159 +++++++ ...os-avoid-uip-when-reading-alarm-time.patch | 146 +++++++ ...os-avoid-uip-when-writing-alarm-time.patch | 178 ++++++++ ...c-cmos-remove-stale-revisit-comments.patch | 58 +++ ...-spin_lock_irqsave-with-spin_lock-in.patch | 49 +++ ...146818-detect-and-handle-broken-rtcs.patch | 76 ++++ ...-dont-test-for-bit-0-5-in-register-d.patch | 65 +++ ...-change-return-values-of-mc146818_ge.patch | 51 +++ ...46818-lib-extract-mc146818_avoid_uip.patch | 136 ++++++ ...-mc146818-lib-fix-rtc-presence-check.patch | 168 ++++++++ ...rtc-mc146818-prevent-reading-garbage.patch | 165 ++++++++ ...uce-spinlock-section-in-mc146818_set.patch | 54 +++ queue-5.10/series | 45 ++ ...disable-gusb2phycfg.susphy-for-end-t.patch | 47 +++ .../xen-netback-do-some-code-cleanup.patch | 147 +++++++ ...t-call-kfree_skb-with-interrupts-dis.patch | 105 +++++ ...re-protocol-headers-don-t-fall-in-th.patch | 390 ++++++++++++++++++ 46 files changed, 5145 insertions(+) create mode 100644 queue-5.10/9p-fd-use-p9_hdrsz-for-header-size.patch create mode 100644 queue-5.10/9p-xen-check-logical-size-for-buffer-size.patch create mode 100644 queue-5.10/alsa-seq-fix-function-prototype-mismatch-in-snd_seq_.patch create mode 100644 queue-5.10/arm-9251-1-perf-fix-stacktraces-for-tracepoint-event.patch create mode 100644 queue-5.10/arm-9266-1-mm-fix-no-mmu-zero_page-implementation.patch create mode 100644 queue-5.10/arm-dts-rockchip-disable-arm_global_timer-on-rk3066-.patch create mode 100644 queue-5.10/arm-dts-rockchip-fix-ir-receiver-node-names.patch create mode 100644 queue-5.10/arm-dts-rockchip-fix-node-name-for-hym8563-rtc.patch create mode 100644 queue-5.10/arm-dts-rockchip-rk3188-fix-lcdc1-rgb24-node-name.patch create mode 100644 queue-5.10/arm64-dts-rockchip-fix-ir-receiver-node-names.patch create mode 100644 queue-5.10/arm64-dts-rockchip-keep-i2s1-disabled-for-gpio-funct.patch create mode 100644 queue-5.10/asoc-soc-pcm-add-null-check-in-be-reparenting.patch create mode 100644 queue-5.10/asoc-wm8962-wait-for-updated-value-of-wm8962_clockin.patch create mode 100644 queue-5.10/btrfs-send-avoid-unaligned-encoded-writes-when-attem.patch create mode 100644 queue-5.10/fbcon-use-kzalloc-in-fbcon_prepare_logo.patch create mode 100644 queue-5.10/media-videobuf2-core-take-mmap_lock-in-vb2_get_unmap.patch create mode 100644 queue-5.10/mm-__isolate_lru_page_prepare-in-isolate_migratepage.patch create mode 100644 queue-5.10/mm-compaction-do-page-isolation-first-in-compaction.patch create mode 100644 queue-5.10/mm-khugepaged-fix-gup-fast-interaction-by-sending-ip.patch create mode 100644 queue-5.10/mm-khugepaged-invoke-mmu-notifiers-in-shmem-file-col.patch create mode 100644 queue-5.10/mm-khugepaged-take-the-right-locks-for-page-table-re.patch create mode 100644 queue-5.10/mm-lru-introduce-testclearpagelru.patch create mode 100644 queue-5.10/mm-migrate-fix-thp-s-mapcount-on-isolation.patch create mode 100644 queue-5.10/mm-mlock-remove-__munlock_isolate_lru_page.patch create mode 100644 queue-5.10/mm-mlock-remove-lru_lock-on-testclearpagemlocked.patch create mode 100644 queue-5.10/mm-vmscan-__isolate_lru_page_prepare-cleanup.patch create mode 100644 queue-5.10/net-usb-qmi_wwan-add-u-blox-0x1342-composition.patch create mode 100644 queue-5.10/regulator-slg51000-wait-after-asserting-cs-pin.patch create mode 100644 queue-5.10/regulator-twl6030-fix-get-status-of-twl6032-regulato.patch create mode 100644 queue-5.10/rtc-check-return-value-from-mc146818_get_time.patch create mode 100644 queue-5.10/rtc-cmos-avoid-uip-when-reading-alarm-time.patch create mode 100644 queue-5.10/rtc-cmos-avoid-uip-when-writing-alarm-time.patch create mode 100644 queue-5.10/rtc-cmos-remove-stale-revisit-comments.patch create mode 100644 queue-5.10/rtc-cmos-replace-spin_lock_irqsave-with-spin_lock-in.patch create mode 100644 queue-5.10/rtc-mc146818-detect-and-handle-broken-rtcs.patch create mode 100644 queue-5.10/rtc-mc146818-dont-test-for-bit-0-5-in-register-d.patch create mode 100644 queue-5.10/rtc-mc146818-lib-change-return-values-of-mc146818_ge.patch create mode 100644 queue-5.10/rtc-mc146818-lib-extract-mc146818_avoid_uip.patch create mode 100644 queue-5.10/rtc-mc146818-lib-fix-rtc-presence-check.patch create mode 100644 queue-5.10/rtc-mc146818-prevent-reading-garbage.patch create mode 100644 queue-5.10/rtc-mc146818-reduce-spinlock-section-in-mc146818_set.patch create mode 100644 queue-5.10/series create mode 100644 queue-5.10/usb-dwc3-gadget-disable-gusb2phycfg.susphy-for-end-t.patch create mode 100644 queue-5.10/xen-netback-do-some-code-cleanup.patch create mode 100644 queue-5.10/xen-netback-don-t-call-kfree_skb-with-interrupts-dis.patch create mode 100644 queue-5.10/xen-netback-ensure-protocol-headers-don-t-fall-in-th.patch diff --git a/queue-5.10/9p-fd-use-p9_hdrsz-for-header-size.patch b/queue-5.10/9p-fd-use-p9_hdrsz-for-header-size.patch new file mode 100644 index 00000000000..04c13940783 --- /dev/null +++ b/queue-5.10/9p-fd-use-p9_hdrsz-for-header-size.patch @@ -0,0 +1,56 @@ +From e817ab2c6014a79269c881ba6704088a67795ad2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Nov 2022 17:11:59 +0800 +Subject: 9p/fd: Use P9_HDRSZ for header size + +From: GUO Zihua + +[ Upstream commit 6854fadbeee10891ed74246bdc05031906b6c8cf ] + +Cleanup hardcoded header sizes to use P9_HDRSZ instead of '7' + +Link: https://lkml.kernel.org/r/20221117091159.31533-4-guozihua@huawei.com +Signed-off-by: GUO Zihua +Reviewed-by: Christian Schoenebeck +[Dominique: commit message adjusted to make sense after offset size +adjustment got removed] +Signed-off-by: Dominique Martinet +Signed-off-by: Sasha Levin +--- + net/9p/trans_fd.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c +index deb66635f0f3..e070a0b8e5ca 100644 +--- a/net/9p/trans_fd.c ++++ b/net/9p/trans_fd.c +@@ -118,7 +118,7 @@ struct p9_conn { + struct list_head unsent_req_list; + struct p9_req_t *rreq; + struct p9_req_t *wreq; +- char tmp_buf[7]; ++ char tmp_buf[P9_HDRSZ]; + struct p9_fcall rc; + int wpos; + int wsize; +@@ -291,7 +291,7 @@ static void p9_read_work(struct work_struct *work) + if (!m->rc.sdata) { + m->rc.sdata = m->tmp_buf; + m->rc.offset = 0; +- m->rc.capacity = 7; /* start by reading header */ ++ m->rc.capacity = P9_HDRSZ; /* start by reading header */ + } + + clear_bit(Rpending, &m->wsched); +@@ -314,7 +314,7 @@ static void p9_read_work(struct work_struct *work) + p9_debug(P9_DEBUG_TRANS, "got new header\n"); + + /* Header size */ +- m->rc.size = 7; ++ m->rc.size = P9_HDRSZ; + err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0); + if (err) { + p9_debug(P9_DEBUG_ERROR, +-- +2.35.1 + diff --git a/queue-5.10/9p-xen-check-logical-size-for-buffer-size.patch b/queue-5.10/9p-xen-check-logical-size-for-buffer-size.patch new file mode 100644 index 00000000000..eff3d929b69 --- /dev/null +++ b/queue-5.10/9p-xen-check-logical-size-for-buffer-size.patch @@ -0,0 +1,53 @@ +From b5b32fd794cbd41eaa5fbcace2be39beef2eea56 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Nov 2022 22:44:41 +0900 +Subject: 9p/xen: check logical size for buffer size + +From: Dominique Martinet + +[ Upstream commit 391c18cf776eb4569ecda1f7794f360fe0a45a26 ] + +trans_xen did not check the data fits into the buffer before copying +from the xen ring, but we probably should. +Add a check that just skips the request and return an error to +userspace if it did not fit + +Tested-by: Stefano Stabellini +Reviewed-by: Christian Schoenebeck +Link: https://lkml.kernel.org/r/20221118135542.63400-1-asmadeus@codewreck.org +Signed-off-by: Dominique Martinet +Signed-off-by: Sasha Levin +--- + net/9p/trans_xen.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c +index 432ac5a16f2e..6c8a33f98f09 100644 +--- a/net/9p/trans_xen.c ++++ b/net/9p/trans_xen.c +@@ -231,6 +231,14 @@ static void p9_xen_response(struct work_struct *work) + continue; + } + ++ if (h.size > req->rc.capacity) { ++ dev_warn(&priv->dev->dev, ++ "requested packet size too big: %d for tag %d with capacity %zd\n", ++ h.size, h.tag, req->rc.capacity); ++ req->status = REQ_STATUS_ERROR; ++ goto recv_error; ++ } ++ + memcpy(&req->rc, &h, sizeof(h)); + req->rc.offset = 0; + +@@ -240,6 +248,7 @@ static void p9_xen_response(struct work_struct *work) + masked_prod, &masked_cons, + XEN_9PFS_RING_SIZE(ring)); + ++recv_error: + virt_mb(); + cons += h.size; + ring->intf->in_cons = cons; +-- +2.35.1 + diff --git a/queue-5.10/alsa-seq-fix-function-prototype-mismatch-in-snd_seq_.patch b/queue-5.10/alsa-seq-fix-function-prototype-mismatch-in-snd_seq_.patch new file mode 100644 index 00000000000..23b6e64bcd1 --- /dev/null +++ b/queue-5.10/alsa-seq-fix-function-prototype-mismatch-in-snd_seq_.patch @@ -0,0 +1,77 @@ +From 621d8a45725353c81804715e0391a1762b803b4e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Nov 2022 15:23:50 -0800 +Subject: ALSA: seq: Fix function prototype mismatch in + snd_seq_expand_var_event + +From: Kees Cook + +[ Upstream commit 05530ef7cf7c7d700f6753f058999b1b5099a026 ] + +With clang's kernel control flow integrity (kCFI, CONFIG_CFI_CLANG), +indirect call targets are validated against the expected function +pointer prototype to make sure the call target is valid to help mitigate +ROP attacks. If they are not identical, there is a failure at run time, +which manifests as either a kernel panic or thread getting killed. + +seq_copy_in_user() and seq_copy_in_kernel() did not have prototypes +matching snd_seq_dump_func_t. Adjust this and remove the casts. There +are not resulting binary output differences. + +This was found as a result of Clang's new -Wcast-function-type-strict +flag, which is more sensitive than the simpler -Wcast-function-type, +which only checks for type width mismatches. + +Reported-by: kernel test robot +Link: https://lore.kernel.org/lkml/202211041527.HD8TLSE1-lkp@intel.com +Cc: Jaroslav Kysela +Cc: Takashi Iwai +Cc: "Gustavo A. R. Silva" +Cc: alsa-devel@alsa-project.org +Signed-off-by: Kees Cook +Link: https://lore.kernel.org/r/20221118232346.never.380-kees@kernel.org +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/core/seq/seq_memory.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c +index 65db1a7c77b7..bb76a2dd0a2f 100644 +--- a/sound/core/seq/seq_memory.c ++++ b/sound/core/seq/seq_memory.c +@@ -112,15 +112,19 @@ EXPORT_SYMBOL(snd_seq_dump_var_event); + * expand the variable length event to linear buffer space. + */ + +-static int seq_copy_in_kernel(char **bufptr, const void *src, int size) ++static int seq_copy_in_kernel(void *ptr, void *src, int size) + { ++ char **bufptr = ptr; ++ + memcpy(*bufptr, src, size); + *bufptr += size; + return 0; + } + +-static int seq_copy_in_user(char __user **bufptr, const void *src, int size) ++static int seq_copy_in_user(void *ptr, void *src, int size) + { ++ char __user **bufptr = ptr; ++ + if (copy_to_user(*bufptr, src, size)) + return -EFAULT; + *bufptr += size; +@@ -149,8 +153,7 @@ int snd_seq_expand_var_event(const struct snd_seq_event *event, int count, char + return newlen; + } + err = snd_seq_dump_var_event(event, +- in_kernel ? (snd_seq_dump_func_t)seq_copy_in_kernel : +- (snd_seq_dump_func_t)seq_copy_in_user, ++ in_kernel ? seq_copy_in_kernel : seq_copy_in_user, + &buf); + return err < 0 ? err : newlen; + } +-- +2.35.1 + diff --git a/queue-5.10/arm-9251-1-perf-fix-stacktraces-for-tracepoint-event.patch b/queue-5.10/arm-9251-1-perf-fix-stacktraces-for-tracepoint-event.patch new file mode 100644 index 00000000000..3251fda19a3 --- /dev/null +++ b/queue-5.10/arm-9251-1-perf-fix-stacktraces-for-tracepoint-event.patch @@ -0,0 +1,70 @@ +From 8b3c105761d6cae81a31e82cdbda80b22fb4c1b7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 26 Sep 2022 16:09:12 +0100 +Subject: ARM: 9251/1: perf: Fix stacktraces for tracepoint events in THUMB2 + kernels + +From: Tomislav Novak + +[ Upstream commit 612695bccfdbd52004551308a55bae410e7cd22f ] + +Store the frame address where arm_get_current_stackframe() looks for it +(ARM_r7 instead of ARM_fp if CONFIG_THUMB2_KERNEL=y). Otherwise frame->fp +gets set to 0, causing unwind_frame() to fail. + + # bpftrace -e 't:sched:sched_switch { @[kstack] = count(); exit(); }' + Attaching 1 probe... + @[ + __schedule+1059 + ]: 1 + +A typical first unwind instruction is 0x97 (SP = R7), so after executing +it SP ends up being 0 and -URC_FAILURE is returned. + + unwind_frame(pc = ac9da7d7 lr = 00000000 sp = c69bdda0 fp = 00000000) + unwind_find_idx(ac9da7d7) + unwind_exec_insn: insn = 00000097 + unwind_exec_insn: fp = 00000000 sp = 00000000 lr = 00000000 pc = 00000000 + +With this patch: + + # bpftrace -e 't:sched:sched_switch { @[kstack] = count(); exit(); }' + Attaching 1 probe... + @[ + __schedule+1059 + __schedule+1059 + schedule+79 + schedule_hrtimeout_range_clock+163 + schedule_hrtimeout_range+17 + ep_poll+471 + SyS_epoll_wait+111 + sys_epoll_pwait+231 + __ret_fast_syscall+1 + ]: 1 + +Link: https://lore.kernel.org/r/20220920230728.2617421-1-tnovak@fb.com/ + +Reviewed-by: Linus Walleij +Signed-off-by: Tomislav Novak +Signed-off-by: Russell King (Oracle) +Signed-off-by: Sasha Levin +--- + arch/arm/include/asm/perf_event.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h +index fe87397c3d8c..bdbc1e590891 100644 +--- a/arch/arm/include/asm/perf_event.h ++++ b/arch/arm/include/asm/perf_event.h +@@ -17,7 +17,7 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); + + #define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->ARM_pc = (__ip); \ +- (regs)->ARM_fp = (unsigned long) __builtin_frame_address(0); \ ++ frame_pointer((regs)) = (unsigned long) __builtin_frame_address(0); \ + (regs)->ARM_sp = current_stack_pointer; \ + (regs)->ARM_cpsr = SVC_MODE; \ + } +-- +2.35.1 + diff --git a/queue-5.10/arm-9266-1-mm-fix-no-mmu-zero_page-implementation.patch b/queue-5.10/arm-9266-1-mm-fix-no-mmu-zero_page-implementation.patch new file mode 100644 index 00000000000..f0027de073c --- /dev/null +++ b/queue-5.10/arm-9266-1-mm-fix-no-mmu-zero_page-implementation.patch @@ -0,0 +1,141 @@ +From 528c862595d7d443fc942544454319823b2e5810 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 4 Nov 2022 21:46:18 +0100 +Subject: ARM: 9266/1: mm: fix no-MMU ZERO_PAGE() implementation + +From: Giulio Benetti + +[ Upstream commit 340a982825f76f1cff0daa605970fe47321b5ee7 ] + +Actually in no-MMU SoCs(i.e. i.MXRT) ZERO_PAGE(vaddr) expands to +``` +virt_to_page(0) +``` +that in order expands to: +``` +pfn_to_page(virt_to_pfn(0)) +``` +and then virt_to_pfn(0) to: +``` + ((((unsigned long)(0) - PAGE_OFFSET) >> PAGE_SHIFT) + + PHYS_PFN_OFFSET) +``` +where PAGE_OFFSET and PHYS_PFN_OFFSET are the DRAM offset(0x80000000) and +PAGE_SHIFT is 12. This way we obtain 16MB(0x01000000) summed to the base of +DRAM(0x80000000). +When ZERO_PAGE(0) is then used, for example in bio_add_page(), the page +gets an address that is out of DRAM bounds. +So instead of using fake virtual page 0 let's allocate a dedicated +zero_page during paging_init() and assign it to a global 'struct page * +empty_zero_page' the same way mmu.c does and it's the same approach used +in m68k with commit dc068f462179 as discussed here[0]. Then let's move +ZERO_PAGE() definition to the top of pgtable.h to be in common between +mmu.c and nommu.c. + +[0]: https://lore.kernel.org/linux-m68k/2a462b23-5b8e-bbf4-ec7d-778434a3b9d7@google.com/T/#m1266ceb63 +ad140743174d6b3070364d3c9a5179b + +Signed-off-by: Giulio Benetti +Reviewed-by: Arnd Bergmann +Signed-off-by: Russell King (Oracle) +Signed-off-by: Sasha Levin +--- + arch/arm/include/asm/pgtable-nommu.h | 6 ------ + arch/arm/include/asm/pgtable.h | 16 +++++++++------- + arch/arm/mm/nommu.c | 19 +++++++++++++++++++ + 3 files changed, 28 insertions(+), 13 deletions(-) + +diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h +index d16aba48fa0a..090011394477 100644 +--- a/arch/arm/include/asm/pgtable-nommu.h ++++ b/arch/arm/include/asm/pgtable-nommu.h +@@ -44,12 +44,6 @@ + + typedef pte_t *pte_addr_t; + +-/* +- * ZERO_PAGE is a global shared page that is always zero: used +- * for zero-mapped memory areas etc.. +- */ +-#define ZERO_PAGE(vaddr) (virt_to_page(0)) +- + /* + * Mark the prot value as uncacheable and unbufferable. + */ +diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h +index c02f24400369..d38d503493cb 100644 +--- a/arch/arm/include/asm/pgtable.h ++++ b/arch/arm/include/asm/pgtable.h +@@ -10,6 +10,15 @@ + #include + #include + ++#ifndef __ASSEMBLY__ ++/* ++ * ZERO_PAGE is a global shared page that is always zero: used ++ * for zero-mapped memory areas etc.. ++ */ ++extern struct page *empty_zero_page; ++#define ZERO_PAGE(vaddr) (empty_zero_page) ++#endif ++ + #ifndef CONFIG_MMU + + #include +@@ -156,13 +165,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + #define __S111 __PAGE_SHARED_EXEC + + #ifndef __ASSEMBLY__ +-/* +- * ZERO_PAGE is a global shared page that is always zero: used +- * for zero-mapped memory areas etc.. +- */ +-extern struct page *empty_zero_page; +-#define ZERO_PAGE(vaddr) (empty_zero_page) +- + + extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; + +diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c +index 8b3d7191e2b8..959f05701738 100644 +--- a/arch/arm/mm/nommu.c ++++ b/arch/arm/mm/nommu.c +@@ -26,6 +26,13 @@ + + unsigned long vectors_base; + ++/* ++ * empty_zero_page is a special page that is used for ++ * zero-initialized data and COW. ++ */ ++struct page *empty_zero_page; ++EXPORT_SYMBOL(empty_zero_page); ++ + #ifdef CONFIG_ARM_MPU + struct mpu_rgn_info mpu_rgn_info; + #endif +@@ -148,9 +155,21 @@ void __init adjust_lowmem_bounds(void) + */ + void __init paging_init(const struct machine_desc *mdesc) + { ++ void *zero_page; ++ + early_trap_init((void *)vectors_base); + mpu_setup(); ++ ++ /* allocate the zero page. */ ++ zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE); ++ if (!zero_page) ++ panic("%s: Failed to allocate %lu bytes align=0x%lx\n", ++ __func__, PAGE_SIZE, PAGE_SIZE); ++ + bootmem_init(); ++ ++ empty_zero_page = virt_to_page(zero_page); ++ flush_dcache_page(empty_zero_page); + } + + /* +-- +2.35.1 + diff --git a/queue-5.10/arm-dts-rockchip-disable-arm_global_timer-on-rk3066-.patch b/queue-5.10/arm-dts-rockchip-disable-arm_global_timer-on-rk3066-.patch new file mode 100644 index 00000000000..eb4cc63d48e --- /dev/null +++ b/queue-5.10/arm-dts-rockchip-disable-arm_global_timer-on-rk3066-.patch @@ -0,0 +1,64 @@ +From d4c7c13a6cd1aed237bee68b3450766db819e289 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 30 Oct 2022 21:56:29 +0100 +Subject: ARM: dts: rockchip: disable arm_global_timer on rk3066 and rk3188 + +From: Johan Jonker + +[ Upstream commit da74858a475782a3f16470907814c8cc5950ad68 ] + +The clock source and the sched_clock provided by the arm_global_timer +on Rockchip rk3066a/rk3188 are quite unstable because their rates +depend on the CPU frequency. + +Recent changes to the arm_global_timer driver makes it impossible to use. + +On the other side, the arm_global_timer has a higher rating than the +ROCKCHIP_TIMER, it will be selected by default by the time framework +while we want to use the stable Rockchip clock source. + +Keep the arm_global_timer disabled in order to have the +DW_APB_TIMER (rk3066a) or ROCKCHIP_TIMER (rk3188) selected by default. + +Signed-off-by: Johan Jonker +Link: https://lore.kernel.org/r/f275ca8d-fd0a-26e5-b978-b7f3df815e0a@gmail.com +Signed-off-by: Heiko Stuebner +Signed-off-by: Sasha Levin +--- + arch/arm/boot/dts/rk3188.dtsi | 1 - + arch/arm/boot/dts/rk3xxx.dtsi | 7 +++++++ + 2 files changed, 7 insertions(+), 1 deletion(-) + +diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi +index a837a9a34e3e..ddf23748ead4 100644 +--- a/arch/arm/boot/dts/rk3188.dtsi ++++ b/arch/arm/boot/dts/rk3188.dtsi +@@ -630,7 +630,6 @@ + + &global_timer { + interrupts = ; +- status = "disabled"; + }; + + &local_timer { +diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi +index 859a7477909f..5edc46a5585c 100644 +--- a/arch/arm/boot/dts/rk3xxx.dtsi ++++ b/arch/arm/boot/dts/rk3xxx.dtsi +@@ -111,6 +111,13 @@ + reg = <0x1013c200 0x20>; + interrupts = ; + clocks = <&cru CORE_PERI>; ++ status = "disabled"; ++ /* The clock source and the sched_clock provided by the arm_global_timer ++ * on Rockchip rk3066a/rk3188 are quite unstable because their rates ++ * depend on the CPU frequency. ++ * Keep the arm_global_timer disabled in order to have the ++ * DW_APB_TIMER (rk3066a) or ROCKCHIP_TIMER (rk3188) selected by default. ++ */ + }; + + local_timer: local-timer@1013c600 { +-- +2.35.1 + diff --git a/queue-5.10/arm-dts-rockchip-fix-ir-receiver-node-names.patch b/queue-5.10/arm-dts-rockchip-fix-ir-receiver-node-names.patch new file mode 100644 index 00000000000..c2c34ad97f3 --- /dev/null +++ b/queue-5.10/arm-dts-rockchip-fix-ir-receiver-node-names.patch @@ -0,0 +1,36 @@ +From 5b1b99a1508cbef88005366b4a729d2514113d53 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 27 Oct 2022 10:58:22 +0200 +Subject: ARM: dts: rockchip: fix ir-receiver node names + +From: Johan Jonker + +[ Upstream commit dd847fe34cdf1e89afed1af24986359f13082bfb ] + +Fix ir-receiver node names on Rockchip boards, +so that they match with regex: '^ir(-receiver)?(@[a-f0-9]+)?$' + +Signed-off-by: Johan Jonker +Link: https://lore.kernel.org/r/ea5af279-f44c-afea-023d-bb37f5a0d58d@gmail.com +Signed-off-by: Heiko Stuebner +Signed-off-by: Sasha Levin +--- + arch/arm/boot/dts/rk3188-radxarock.dts | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/arm/boot/dts/rk3188-radxarock.dts b/arch/arm/boot/dts/rk3188-radxarock.dts +index b0fef82c0a71..39b913f8d701 100644 +--- a/arch/arm/boot/dts/rk3188-radxarock.dts ++++ b/arch/arm/boot/dts/rk3188-radxarock.dts +@@ -67,7 +67,7 @@ + #sound-dai-cells = <0>; + }; + +- ir_recv: gpio-ir-receiver { ++ ir_recv: ir-receiver { + compatible = "gpio-ir-receiver"; + gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>; + pinctrl-names = "default"; +-- +2.35.1 + diff --git a/queue-5.10/arm-dts-rockchip-fix-node-name-for-hym8563-rtc.patch b/queue-5.10/arm-dts-rockchip-fix-node-name-for-hym8563-rtc.patch new file mode 100644 index 00000000000..77fc21ce893 --- /dev/null +++ b/queue-5.10/arm-dts-rockchip-fix-node-name-for-hym8563-rtc.patch @@ -0,0 +1,91 @@ +From 163ce1b02852814851e67f197a271748daab03c7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Oct 2022 18:55:46 +0200 +Subject: arm: dts: rockchip: fix node name for hym8563 rtc + +From: Sebastian Reichel + +[ Upstream commit 17b57beafccb4569accbfc8c11390744cf59c021 ] + +Fix the node name for hym8563 in all arm rockchip devicetrees. + +Signed-off-by: Sebastian Reichel +Link: https://lore.kernel.org/r/20221024165549.74574-4-sebastian.reichel@collabora.com +Signed-off-by: Heiko Stuebner +Signed-off-by: Sasha Levin +--- + arch/arm/boot/dts/rk3036-evb.dts | 2 +- + arch/arm/boot/dts/rk3288-evb-act8846.dts | 2 +- + arch/arm/boot/dts/rk3288-firefly.dtsi | 2 +- + arch/arm/boot/dts/rk3288-miqi.dts | 2 +- + arch/arm/boot/dts/rk3288-rock2-square.dts | 2 +- + 5 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/arch/arm/boot/dts/rk3036-evb.dts b/arch/arm/boot/dts/rk3036-evb.dts +index 2a7e6624efb9..ea23ba98625e 100644 +--- a/arch/arm/boot/dts/rk3036-evb.dts ++++ b/arch/arm/boot/dts/rk3036-evb.dts +@@ -31,7 +31,7 @@ + &i2c1 { + status = "okay"; + +- hym8563: hym8563@51 { ++ hym8563: rtc@51 { + compatible = "haoyu,hym8563"; + reg = <0x51>; + #clock-cells = <0>; +diff --git a/arch/arm/boot/dts/rk3288-evb-act8846.dts b/arch/arm/boot/dts/rk3288-evb-act8846.dts +index be695b8c1f67..8a635c243127 100644 +--- a/arch/arm/boot/dts/rk3288-evb-act8846.dts ++++ b/arch/arm/boot/dts/rk3288-evb-act8846.dts +@@ -54,7 +54,7 @@ + vin-supply = <&vcc_sys>; + }; + +- hym8563@51 { ++ rtc@51 { + compatible = "haoyu,hym8563"; + reg = <0x51>; + +diff --git a/arch/arm/boot/dts/rk3288-firefly.dtsi b/arch/arm/boot/dts/rk3288-firefly.dtsi +index 7fb582302b32..c560afe3af78 100644 +--- a/arch/arm/boot/dts/rk3288-firefly.dtsi ++++ b/arch/arm/boot/dts/rk3288-firefly.dtsi +@@ -233,7 +233,7 @@ + vin-supply = <&vcc_sys>; + }; + +- hym8563: hym8563@51 { ++ hym8563: rtc@51 { + compatible = "haoyu,hym8563"; + reg = <0x51>; + #clock-cells = <0>; +diff --git a/arch/arm/boot/dts/rk3288-miqi.dts b/arch/arm/boot/dts/rk3288-miqi.dts +index cf54d5ffff2f..fe265a834e8e 100644 +--- a/arch/arm/boot/dts/rk3288-miqi.dts ++++ b/arch/arm/boot/dts/rk3288-miqi.dts +@@ -157,7 +157,7 @@ + vin-supply = <&vcc_sys>; + }; + +- hym8563: hym8563@51 { ++ hym8563: rtc@51 { + compatible = "haoyu,hym8563"; + reg = <0x51>; + #clock-cells = <0>; +diff --git a/arch/arm/boot/dts/rk3288-rock2-square.dts b/arch/arm/boot/dts/rk3288-rock2-square.dts +index c4d1d142d8c6..d5ef99ebbddc 100644 +--- a/arch/arm/boot/dts/rk3288-rock2-square.dts ++++ b/arch/arm/boot/dts/rk3288-rock2-square.dts +@@ -165,7 +165,7 @@ + }; + + &i2c0 { +- hym8563: hym8563@51 { ++ hym8563: rtc@51 { + compatible = "haoyu,hym8563"; + reg = <0x51>; + #clock-cells = <0>; +-- +2.35.1 + diff --git a/queue-5.10/arm-dts-rockchip-rk3188-fix-lcdc1-rgb24-node-name.patch b/queue-5.10/arm-dts-rockchip-rk3188-fix-lcdc1-rgb24-node-name.patch new file mode 100644 index 00000000000..bda3a6b7478 --- /dev/null +++ b/queue-5.10/arm-dts-rockchip-rk3188-fix-lcdc1-rgb24-node-name.patch @@ -0,0 +1,36 @@ +From e5912fb7eb097679379662e4ac19cadcaf8ba3d5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 27 Oct 2022 01:31:37 +0200 +Subject: ARM: dts: rockchip: rk3188: fix lcdc1-rgb24 node name + +From: Johan Jonker + +[ Upstream commit 11871e20bcb23c00966e785a124fb72bc8340af4 ] + +The lcdc1-rgb24 node name is out of line with the rest +of the rk3188 lcdc1 node, so fix it. + +Signed-off-by: Johan Jonker +Link: https://lore.kernel.org/r/7b9c0a6f-626b-07e8-ae74-7e0f08b8d241@gmail.com +Signed-off-by: Heiko Stuebner +Signed-off-by: Sasha Levin +--- + arch/arm/boot/dts/rk3188.dtsi | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi +index b6bde9d12c2b..a837a9a34e3e 100644 +--- a/arch/arm/boot/dts/rk3188.dtsi ++++ b/arch/arm/boot/dts/rk3188.dtsi +@@ -402,7 +402,7 @@ + rockchip,pins = <2 RK_PD3 1 &pcfg_pull_none>; + }; + +- lcdc1_rgb24: ldcd1-rgb24 { ++ lcdc1_rgb24: lcdc1-rgb24 { + rockchip,pins = <2 RK_PA0 1 &pcfg_pull_none>, + <2 RK_PA1 1 &pcfg_pull_none>, + <2 RK_PA2 1 &pcfg_pull_none>, +-- +2.35.1 + diff --git a/queue-5.10/arm64-dts-rockchip-fix-ir-receiver-node-names.patch b/queue-5.10/arm64-dts-rockchip-fix-ir-receiver-node-names.patch new file mode 100644 index 00000000000..ea964ef8646 --- /dev/null +++ b/queue-5.10/arm64-dts-rockchip-fix-ir-receiver-node-names.patch @@ -0,0 +1,36 @@ +From 76fabdbab581205aaa7da7864a6f8c029f745509 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 27 Oct 2022 10:59:10 +0200 +Subject: arm64: dts: rockchip: fix ir-receiver node names + +From: Johan Jonker + +[ Upstream commit de0d04b9780a23eb928aedfb6f981285f78d58e5 ] + +Fix ir-receiver node names on Rockchip boards, +so that they match with regex: '^ir(-receiver)?(@[a-f0-9]+)?$' + +Signed-off-by: Johan Jonker +Link: https://lore.kernel.org/r/e9764253-8ce8-150b-4820-41f03f845469@gmail.com +Signed-off-by: Heiko Stuebner +Signed-off-by: Sasha Levin +--- + arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +index fbcb9531cc70..213c0759c4b8 100644 +--- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts ++++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +@@ -13,7 +13,7 @@ + stdout-path = "serial2:1500000n8"; + }; + +- ir_rx { ++ ir-receiver { + compatible = "gpio-ir-receiver"; + gpios = <&gpio0 RK_PC0 GPIO_ACTIVE_HIGH>; + pinctrl-names = "default"; +-- +2.35.1 + diff --git a/queue-5.10/arm64-dts-rockchip-keep-i2s1-disabled-for-gpio-funct.patch b/queue-5.10/arm64-dts-rockchip-keep-i2s1-disabled-for-gpio-funct.patch new file mode 100644 index 00000000000..74d0ee3ef15 --- /dev/null +++ b/queue-5.10/arm64-dts-rockchip-keep-i2s1-disabled-for-gpio-funct.patch @@ -0,0 +1,36 @@ +From ffb3971e26f4690f2500ab9738472181039c199f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 24 Sep 2022 11:28:12 +0000 +Subject: arm64: dts: rockchip: keep I2S1 disabled for GPIO function on ROCK Pi + 4 series + +From: FUKAUMI Naoki + +[ Upstream commit 849c19d14940b87332d5d59c7fc581d73f2099fd ] + +I2S1 pins are exposed on 40-pin header on Radxa ROCK Pi 4 series. +their default function is GPIO, so I2S1 need to be disabled. + +Signed-off-by: FUKAUMI Naoki +Link: https://lore.kernel.org/r/20220924112812.1219-1-naoki@radxa.com +Signed-off-by: Heiko Stuebner +Signed-off-by: Sasha Levin +--- + arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +index f121203081b9..64df64339119 100644 +--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi ++++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +@@ -448,7 +448,6 @@ + &i2s1 { + rockchip,playback-channels = <2>; + rockchip,capture-channels = <2>; +- status = "okay"; + }; + + &i2s2 { +-- +2.35.1 + diff --git a/queue-5.10/asoc-soc-pcm-add-null-check-in-be-reparenting.patch b/queue-5.10/asoc-soc-pcm-add-null-check-in-be-reparenting.patch new file mode 100644 index 00000000000..0e2df53663b --- /dev/null +++ b/queue-5.10/asoc-soc-pcm-add-null-check-in-be-reparenting.patch @@ -0,0 +1,37 @@ +From c21ed1784175f9bc14e302b6c710a05b1b188fce Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Nov 2022 12:01:13 +0530 +Subject: ASoC: soc-pcm: Add NULL check in BE reparenting + +From: Srinivasa Rao Mandadapu + +[ Upstream commit db8f91d424fe0ea6db337aca8bc05908bbce1498 ] + +Add NULL check in dpcm_be_reparent API, to handle +kernel NULL pointer dereference error. +The issue occurred in fuzzing test. + +Signed-off-by: Srinivasa Rao Mandadapu +Link: https://lore.kernel.org/r/1669098673-29703-1-git-send-email-quic_srivasam@quicinc.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/soc-pcm.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c +index 0e2261ee07b6..fb874f924bbe 100644 +--- a/sound/soc/soc-pcm.c ++++ b/sound/soc/soc-pcm.c +@@ -1154,6 +1154,8 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe, + return; + + be_substream = snd_soc_dpcm_get_substream(be, stream); ++ if (!be_substream) ++ return; + + for_each_dpcm_fe(be, stream, dpcm) { + if (dpcm->fe == fe) +-- +2.35.1 + diff --git a/queue-5.10/asoc-wm8962-wait-for-updated-value-of-wm8962_clockin.patch b/queue-5.10/asoc-wm8962-wait-for-updated-value-of-wm8962_clockin.patch new file mode 100644 index 00000000000..3f17d77f6df --- /dev/null +++ b/queue-5.10/asoc-wm8962-wait-for-updated-value-of-wm8962_clockin.patch @@ -0,0 +1,47 @@ +From 1515e509bb8fd1b1133f5c12ccfa326ea424a96e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Nov 2022 20:13:54 +0800 +Subject: ASoC: wm8962: Wait for updated value of WM8962_CLOCKING1 register + +From: Chancel Liu + +[ Upstream commit 3ca507bf99611c82dafced73e921c1b10ee12869 ] + +DSPCLK_DIV field in WM8962_CLOCKING1 register is used to generate +correct frequency of LRCLK and BCLK. Sometimes the read-only value +can't be updated timely after enabling SYSCLK. This results in wrong +calculation values. Delay is introduced here to wait for newest value +from register. The time of the delay should be at least 500~1000us +according to test. + +Signed-off-by: Chancel Liu +Acked-by: Charles Keepax +Link: https://lore.kernel.org/r/20221109121354.123958-1-chancel.liu@nxp.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/codecs/wm8962.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c +index 21574447650c..57aeded978c2 100644 +--- a/sound/soc/codecs/wm8962.c ++++ b/sound/soc/codecs/wm8962.c +@@ -2489,6 +2489,14 @@ static void wm8962_configure_bclk(struct snd_soc_component *component) + snd_soc_component_update_bits(component, WM8962_CLOCKING2, + WM8962_SYSCLK_ENA_MASK, WM8962_SYSCLK_ENA); + ++ /* DSPCLK_DIV field in WM8962_CLOCKING1 register is used to generate ++ * correct frequency of LRCLK and BCLK. Sometimes the read-only value ++ * can't be updated timely after enabling SYSCLK. This results in wrong ++ * calculation values. Delay is introduced here to wait for newest ++ * value from register. The time of the delay should be at least ++ * 500~1000us according to test. ++ */ ++ usleep_range(500, 1000); + dspclk = snd_soc_component_read(component, WM8962_CLOCKING1); + + if (snd_soc_component_get_bias_level(component) != SND_SOC_BIAS_ON) +-- +2.35.1 + diff --git a/queue-5.10/btrfs-send-avoid-unaligned-encoded-writes-when-attem.patch b/queue-5.10/btrfs-send-avoid-unaligned-encoded-writes-when-attem.patch new file mode 100644 index 00000000000..97994270cbf --- /dev/null +++ b/queue-5.10/btrfs-send-avoid-unaligned-encoded-writes-when-attem.patch @@ -0,0 +1,161 @@ +From 6c94634e8e0ed4750f2342a16b20fd351585fc7f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Nov 2022 16:29:44 +0000 +Subject: btrfs: send: avoid unaligned encoded writes when attempting to clone + range + +From: Filipe Manana + +[ Upstream commit a11452a3709e217492798cf3686ac2cc8eb3fb51 ] + +When trying to see if we can clone a file range, there are cases where we +end up sending two write operations in case the inode from the source root +has an i_size that is not sector size aligned and the length from the +current offset to its i_size is less than the remaining length we are +trying to clone. + +Issuing two write operations when we could instead issue a single write +operation is not incorrect. However it is not optimal, specially if the +extents are compressed and the flag BTRFS_SEND_FLAG_COMPRESSED was passed +to the send ioctl. In that case we can end up sending an encoded write +with an offset that is not sector size aligned, which makes the receiver +fallback to decompressing the data and writing it using regular buffered +IO (so re-compressing the data in case the fs is mounted with compression +enabled), because encoded writes fail with -EINVAL when an offset is not +sector size aligned. + +The following example, which triggered a bug in the receiver code for the +fallback logic of decompressing + regular buffer IO and is fixed by the +patchset referred in a Link at the bottom of this changelog, is an example +where we have the non-optimal behaviour due to an unaligned encoded write: + + $ cat test.sh + #!/bin/bash + + DEV=/dev/sdj + MNT=/mnt/sdj + + mkfs.btrfs -f $DEV > /dev/null + mount -o compress $DEV $MNT + + # File foo has a size of 33K, not aligned to the sector size. + xfs_io -f -c "pwrite -S 0xab 0 33K" $MNT/foo + + xfs_io -f -c "pwrite -S 0xcd 0 64K" $MNT/bar + + # Now clone the first 32K of file bar into foo at offset 0. + xfs_io -c "reflink $MNT/bar 0 0 32K" $MNT/foo + + # Snapshot the default subvolume and create a full send stream (v2). + btrfs subvolume snapshot -r $MNT $MNT/snap + + btrfs send --compressed-data -f /tmp/test.send $MNT/snap + + echo -e "\nFile bar in the original filesystem:" + od -A d -t x1 $MNT/snap/bar + + umount $MNT + mkfs.btrfs -f $DEV > /dev/null + mount $DEV $MNT + + echo -e "\nReceiving stream in a new filesystem..." + btrfs receive -f /tmp/test.send $MNT + + echo -e "\nFile bar in the new filesystem:" + od -A d -t x1 $MNT/snap/bar + + umount $MNT + +Before this patch, the send stream included one regular write and one +encoded write for file 'bar', with the later being not sector size aligned +and causing the receiver to fallback to decompression + buffered writes. +The output of the btrfs receive command in verbose mode (-vvv): + + (...) + mkfile o258-7-0 + rename o258-7-0 -> bar + utimes + clone bar - source=foo source offset=0 offset=0 length=32768 + write bar - offset=32768 length=1024 + encoded_write bar - offset=33792, len=4096, unencoded_offset=33792, unencoded_file_len=31744, unencoded_len=65536, compression=1, encryption=0 + encoded_write bar - falling back to decompress and write due to errno 22 ("Invalid argument") + (...) + +This patch avoids the regular write followed by an unaligned encoded write +so that we end up sending a single encoded write that is aligned. So after +this patch the stream content is (output of btrfs receive -vvv): + + (...) + mkfile o258-7-0 + rename o258-7-0 -> bar + utimes + clone bar - source=foo source offset=0 offset=0 length=32768 + encoded_write bar - offset=32768, len=4096, unencoded_offset=32768, unencoded_file_len=32768, unencoded_len=65536, compression=1, encryption=0 + (...) + +So we get more optimal behaviour and avoid the silent data loss bug in +versions of btrfs-progs affected by the bug referred by the Link tag +below (btrfs-progs v5.19, v5.19.1, v6.0 and v6.0.1). + +Link: https://lore.kernel.org/linux-btrfs/cover.1668529099.git.fdmanana@suse.com/ +Reviewed-by: Boris Burkov +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/send.c | 24 +++++++++++++++++++++++- + 1 file changed, 23 insertions(+), 1 deletion(-) + +diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c +index 6b80dee17f49..4a6ba0997e39 100644 +--- a/fs/btrfs/send.c ++++ b/fs/btrfs/send.c +@@ -5398,6 +5398,7 @@ static int clone_range(struct send_ctx *sctx, + u64 ext_len; + u64 clone_len; + u64 clone_data_offset; ++ bool crossed_src_i_size = false; + + if (slot >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(clone_root->root, path); +@@ -5454,8 +5455,10 @@ static int clone_range(struct send_ctx *sctx, + if (key.offset >= clone_src_i_size) + break; + +- if (key.offset + ext_len > clone_src_i_size) ++ if (key.offset + ext_len > clone_src_i_size) { + ext_len = clone_src_i_size - key.offset; ++ crossed_src_i_size = true; ++ } + + clone_data_offset = btrfs_file_extent_offset(leaf, ei); + if (btrfs_file_extent_disk_bytenr(leaf, ei) == disk_byte) { +@@ -5515,6 +5518,25 @@ static int clone_range(struct send_ctx *sctx, + ret = send_clone(sctx, offset, clone_len, + clone_root); + } ++ } else if (crossed_src_i_size && clone_len < len) { ++ /* ++ * If we are at i_size of the clone source inode and we ++ * can not clone from it, terminate the loop. This is ++ * to avoid sending two write operations, one with a ++ * length matching clone_len and the final one after ++ * this loop with a length of len - clone_len. ++ * ++ * When using encoded writes (BTRFS_SEND_FLAG_COMPRESSED ++ * was passed to the send ioctl), this helps avoid ++ * sending an encoded write for an offset that is not ++ * sector size aligned, in case the i_size of the source ++ * inode is not sector size aligned. That will make the ++ * receiver fallback to decompression of the data and ++ * writing it using regular buffered IO, therefore while ++ * not incorrect, it's not optimal due decompression and ++ * possible re-compression at the receiver. ++ */ ++ break; + } else { + ret = send_extent_data(sctx, offset, clone_len); + } +-- +2.35.1 + diff --git a/queue-5.10/fbcon-use-kzalloc-in-fbcon_prepare_logo.patch b/queue-5.10/fbcon-use-kzalloc-in-fbcon_prepare_logo.patch new file mode 100644 index 00000000000..c1a101f82a7 --- /dev/null +++ b/queue-5.10/fbcon-use-kzalloc-in-fbcon_prepare_logo.patch @@ -0,0 +1,93 @@ +From 23441e9af6d7908d5a0414e955ccd881e14d00d2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Nov 2022 00:27:58 +0900 +Subject: fbcon: Use kzalloc() in fbcon_prepare_logo() + +From: Tetsuo Handa + +[ Upstream commit a6a00d7e8ffd78d1cdb7a43f1278f081038c638f ] + +A kernel built with syzbot's config file reported that + + scr_memcpyw(q, save, array3_size(logo_lines, new_cols, 2)) + +causes uninitialized "save" to be copied. + + ---------- + [drm] Initialized vgem 1.0.0 20120112 for vgem on minor 0 + [drm] Initialized vkms 1.0.0 20180514 for vkms on minor 1 + Console: switching to colour frame buffer device 128x48 + ===================================================== + BUG: KMSAN: uninit-value in do_update_region+0x4b8/0xba0 + do_update_region+0x4b8/0xba0 + update_region+0x40d/0x840 + fbcon_switch+0x3364/0x35e0 + redraw_screen+0xae3/0x18a0 + do_bind_con_driver+0x1cb3/0x1df0 + do_take_over_console+0x11cb/0x13f0 + fbcon_fb_registered+0xacc/0xfd0 + register_framebuffer+0x1179/0x1320 + __drm_fb_helper_initial_config_and_unlock+0x23ad/0x2b40 + drm_fbdev_client_hotplug+0xbea/0xda0 + drm_fbdev_generic_setup+0x65e/0x9d0 + vkms_init+0x9f3/0xc76 + (...snipped...) + + Uninit was stored to memory at: + fbcon_prepare_logo+0x143b/0x1940 + fbcon_init+0x2c1b/0x31c0 + visual_init+0x3e7/0x820 + do_bind_con_driver+0x14a4/0x1df0 + do_take_over_console+0x11cb/0x13f0 + fbcon_fb_registered+0xacc/0xfd0 + register_framebuffer+0x1179/0x1320 + __drm_fb_helper_initial_config_and_unlock+0x23ad/0x2b40 + drm_fbdev_client_hotplug+0xbea/0xda0 + drm_fbdev_generic_setup+0x65e/0x9d0 + vkms_init+0x9f3/0xc76 + (...snipped...) + + Uninit was created at: + __kmem_cache_alloc_node+0xb69/0x1020 + __kmalloc+0x379/0x680 + fbcon_prepare_logo+0x704/0x1940 + fbcon_init+0x2c1b/0x31c0 + visual_init+0x3e7/0x820 + do_bind_con_driver+0x14a4/0x1df0 + do_take_over_console+0x11cb/0x13f0 + fbcon_fb_registered+0xacc/0xfd0 + register_framebuffer+0x1179/0x1320 + __drm_fb_helper_initial_config_and_unlock+0x23ad/0x2b40 + drm_fbdev_client_hotplug+0xbea/0xda0 + drm_fbdev_generic_setup+0x65e/0x9d0 + vkms_init+0x9f3/0xc76 + (...snipped...) + + CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.1.0-rc4-00356-g8f2975c2bb4c #924 + Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 + ---------- + +Signed-off-by: Tetsuo Handa +Signed-off-by: Daniel Vetter +Link: https://patchwork.freedesktop.org/patch/msgid/cad03d25-0ea0-32c4-8173-fd1895314bce@I-love.SAKURA.ne.jp +Signed-off-by: Sasha Levin +--- + drivers/video/fbdev/core/fbcon.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c +index 2618d3beef64..27828435dd4f 100644 +--- a/drivers/video/fbdev/core/fbcon.c ++++ b/drivers/video/fbdev/core/fbcon.c +@@ -609,7 +609,7 @@ static void fbcon_prepare_logo(struct vc_data *vc, struct fb_info *info, + if (scr_readw(r) != vc->vc_video_erase_char) + break; + if (r != q && new_rows >= rows + logo_lines) { +- save = kmalloc(array3_size(logo_lines, new_cols, 2), ++ save = kzalloc(array3_size(logo_lines, new_cols, 2), + GFP_KERNEL); + if (save) { + int i = cols < new_cols ? cols : new_cols; +-- +2.35.1 + diff --git a/queue-5.10/media-videobuf2-core-take-mmap_lock-in-vb2_get_unmap.patch b/queue-5.10/media-videobuf2-core-take-mmap_lock-in-vb2_get_unmap.patch new file mode 100644 index 00000000000..a357c017f59 --- /dev/null +++ b/queue-5.10/media-videobuf2-core-take-mmap_lock-in-vb2_get_unmap.patch @@ -0,0 +1,265 @@ +From e5e26144a62a3037222e7eb80ffe8ed2d249ca26 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 Dec 2022 13:04:34 +0000 +Subject: media: videobuf2-core: take mmap_lock in vb2_get_unmapped_area() + +From: Hans Verkuil + +[ Upstream commit 098e5edc5d048a8df8691fd9fde895af100be42b ] + +While vb2_mmap took the mmap_lock mutex, vb2_get_unmapped_area didn't. +Add this. + +Also take this opportunity to move the 'q->memory != VB2_MEMORY_MMAP' +check and vb2_fileio_is_active() check into __find_plane_by_offset() so +both vb2_mmap and vb2_get_unmapped_area do the same checks. + +Since q->memory is checked while mmap_lock is held, also take that lock +in reqbufs and create_bufs when it is set, and set it back to +MEMORY_UNKNOWN on error. + +Fixes: f035eb4e976e ("[media] videobuf2: fix lockdep warning") +Signed-off-by: Hans Verkuil +Acked-by: Tomasz Figa +Reviewed-by: Ricardo Ribalda +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + .../media/common/videobuf2/videobuf2-core.c | 102 +++++++++++++----- + 1 file changed, 73 insertions(+), 29 deletions(-) + +diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c +index 72350343a56a..3bafde87a125 100644 +--- a/drivers/media/common/videobuf2/videobuf2-core.c ++++ b/drivers/media/common/videobuf2/videobuf2-core.c +@@ -787,7 +787,13 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, + num_buffers = max_t(unsigned int, *count, q->min_buffers_needed); + num_buffers = min_t(unsigned int, num_buffers, VB2_MAX_FRAME); + memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); ++ /* ++ * Set this now to ensure that drivers see the correct q->memory value ++ * in the queue_setup op. ++ */ ++ mutex_lock(&q->mmap_lock); + q->memory = memory; ++ mutex_unlock(&q->mmap_lock); + + /* + * Ask the driver how many buffers and planes per buffer it requires. +@@ -796,22 +802,27 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, + ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes, + plane_sizes, q->alloc_devs); + if (ret) +- return ret; ++ goto error; + + /* Check that driver has set sane values */ +- if (WARN_ON(!num_planes)) +- return -EINVAL; ++ if (WARN_ON(!num_planes)) { ++ ret = -EINVAL; ++ goto error; ++ } + + for (i = 0; i < num_planes; i++) +- if (WARN_ON(!plane_sizes[i])) +- return -EINVAL; ++ if (WARN_ON(!plane_sizes[i])) { ++ ret = -EINVAL; ++ goto error; ++ } + + /* Finally, allocate buffers and video memory */ + allocated_buffers = + __vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes); + if (allocated_buffers == 0) { + dprintk(q, 1, "memory allocation failed\n"); +- return -ENOMEM; ++ ret = -ENOMEM; ++ goto error; + } + + /* +@@ -852,7 +863,8 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, + if (ret < 0) { + /* + * Note: __vb2_queue_free() will subtract 'allocated_buffers' +- * from q->num_buffers. ++ * from q->num_buffers and it will reset q->memory to ++ * VB2_MEMORY_UNKNOWN. + */ + __vb2_queue_free(q, allocated_buffers); + mutex_unlock(&q->mmap_lock); +@@ -868,6 +880,12 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, + q->waiting_for_buffers = !q->is_output; + + return 0; ++ ++error: ++ mutex_lock(&q->mmap_lock); ++ q->memory = VB2_MEMORY_UNKNOWN; ++ mutex_unlock(&q->mmap_lock); ++ return ret; + } + EXPORT_SYMBOL_GPL(vb2_core_reqbufs); + +@@ -878,6 +896,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, + { + unsigned int num_planes = 0, num_buffers, allocated_buffers; + unsigned plane_sizes[VB2_MAX_PLANES] = { }; ++ bool no_previous_buffers = !q->num_buffers; + int ret; + + if (q->num_buffers == VB2_MAX_FRAME) { +@@ -885,13 +904,19 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, + return -ENOBUFS; + } + +- if (!q->num_buffers) { ++ if (no_previous_buffers) { + if (q->waiting_in_dqbuf && *count) { + dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n"); + return -EBUSY; + } + memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); ++ /* ++ * Set this now to ensure that drivers see the correct q->memory ++ * value in the queue_setup op. ++ */ ++ mutex_lock(&q->mmap_lock); + q->memory = memory; ++ mutex_unlock(&q->mmap_lock); + q->waiting_for_buffers = !q->is_output; + } else { + if (q->memory != memory) { +@@ -914,14 +939,15 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, + ret = call_qop(q, queue_setup, q, &num_buffers, + &num_planes, plane_sizes, q->alloc_devs); + if (ret) +- return ret; ++ goto error; + + /* Finally, allocate buffers and video memory */ + allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers, + num_planes, plane_sizes); + if (allocated_buffers == 0) { + dprintk(q, 1, "memory allocation failed\n"); +- return -ENOMEM; ++ ret = -ENOMEM; ++ goto error; + } + + /* +@@ -952,7 +978,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, + if (ret < 0) { + /* + * Note: __vb2_queue_free() will subtract 'allocated_buffers' +- * from q->num_buffers. ++ * from q->num_buffers and it will reset q->memory to ++ * VB2_MEMORY_UNKNOWN. + */ + __vb2_queue_free(q, allocated_buffers); + mutex_unlock(&q->mmap_lock); +@@ -967,6 +994,14 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, + *count = allocated_buffers; + + return 0; ++ ++error: ++ if (no_previous_buffers) { ++ mutex_lock(&q->mmap_lock); ++ q->memory = VB2_MEMORY_UNKNOWN; ++ mutex_unlock(&q->mmap_lock); ++ } ++ return ret; + } + EXPORT_SYMBOL_GPL(vb2_core_create_bufs); + +@@ -2120,6 +2155,22 @@ static int __find_plane_by_offset(struct vb2_queue *q, unsigned long off, + struct vb2_buffer *vb; + unsigned int buffer, plane; + ++ /* ++ * Sanity checks to ensure the lock is held, MEMORY_MMAP is ++ * used and fileio isn't active. ++ */ ++ lockdep_assert_held(&q->mmap_lock); ++ ++ if (q->memory != VB2_MEMORY_MMAP) { ++ dprintk(q, 1, "queue is not currently set up for mmap\n"); ++ return -EINVAL; ++ } ++ ++ if (vb2_fileio_is_active(q)) { ++ dprintk(q, 1, "file io in progress\n"); ++ return -EBUSY; ++ } ++ + /* + * Go over all buffers and their planes, comparing the given offset + * with an offset assigned to each plane. If a match is found, +@@ -2219,11 +2270,6 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) + int ret; + unsigned long length; + +- if (q->memory != VB2_MEMORY_MMAP) { +- dprintk(q, 1, "queue is not currently set up for mmap\n"); +- return -EINVAL; +- } +- + /* + * Check memory area access mode. + */ +@@ -2245,14 +2291,9 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) + + mutex_lock(&q->mmap_lock); + +- if (vb2_fileio_is_active(q)) { +- dprintk(q, 1, "mmap: file io in progress\n"); +- ret = -EBUSY; +- goto unlock; +- } +- + /* +- * Find the plane corresponding to the offset passed by userspace. ++ * Find the plane corresponding to the offset passed by userspace. This ++ * will return an error if not MEMORY_MMAP or file I/O is in progress. + */ + ret = __find_plane_by_offset(q, off, &buffer, &plane); + if (ret) +@@ -2305,22 +2346,25 @@ unsigned long vb2_get_unmapped_area(struct vb2_queue *q, + void *vaddr; + int ret; + +- if (q->memory != VB2_MEMORY_MMAP) { +- dprintk(q, 1, "queue is not currently set up for mmap\n"); +- return -EINVAL; +- } ++ mutex_lock(&q->mmap_lock); + + /* +- * Find the plane corresponding to the offset passed by userspace. ++ * Find the plane corresponding to the offset passed by userspace. This ++ * will return an error if not MEMORY_MMAP or file I/O is in progress. + */ + ret = __find_plane_by_offset(q, off, &buffer, &plane); + if (ret) +- return ret; ++ goto unlock; + + vb = q->bufs[buffer]; + + vaddr = vb2_plane_vaddr(vb, plane); ++ mutex_unlock(&q->mmap_lock); + return vaddr ? (unsigned long)vaddr : -EINVAL; ++ ++unlock: ++ mutex_unlock(&q->mmap_lock); ++ return ret; + } + EXPORT_SYMBOL_GPL(vb2_get_unmapped_area); + #endif +-- +2.35.1 + diff --git a/queue-5.10/mm-__isolate_lru_page_prepare-in-isolate_migratepage.patch b/queue-5.10/mm-__isolate_lru_page_prepare-in-isolate_migratepage.patch new file mode 100644 index 00000000000..2a42e3a1b3b --- /dev/null +++ b/queue-5.10/mm-__isolate_lru_page_prepare-in-isolate_migratepage.patch @@ -0,0 +1,318 @@ +From 60accdd3d3a54a9d28d0f2d39ec740df38c167fa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Mar 2022 14:45:41 -0700 +Subject: mm: __isolate_lru_page_prepare() in isolate_migratepages_block() + +From: Hugh Dickins + +[ Upstream commit 89f6c88a6ab4a11deb14c270f7f1454cda4f73d6 ] + +__isolate_lru_page_prepare() conflates two unrelated functions, with the +flags to one disjoint from the flags to the other; and hides some of the +important checks outside of isolate_migratepages_block(), where the +sequence is better to be visible. It comes from the days of lumpy +reclaim, before compaction, when the combination made more sense. + +Move what's needed by mm/compaction.c isolate_migratepages_block() inline +there, and what's needed by mm/vmscan.c isolate_lru_pages() inline there. + +Shorten "isolate_mode" to "mode", so the sequence of conditions is easier +to read. Declare a "mapping" variable, to save one call to page_mapping() +(but not another: calling again after page is locked is necessary). +Simplify isolate_lru_pages() with a "move_to" list pointer. + +Link: https://lkml.kernel.org/r/879d62a8-91cc-d3c6-fb3b-69768236df68@google.com +Signed-off-by: Hugh Dickins +Acked-by: David Rientjes +Reviewed-by: Alex Shi +Cc: Alexander Duyck +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation") +Signed-off-by: Sasha Levin +--- + include/linux/swap.h | 1 - + mm/compaction.c | 51 +++++++++++++++++++--- + mm/vmscan.c | 101 ++++++++----------------------------------- + 3 files changed, 62 insertions(+), 91 deletions(-) + +diff --git a/include/linux/swap.h b/include/linux/swap.h +index 394d5de5d4b4..a502928c29c5 100644 +--- a/include/linux/swap.h ++++ b/include/linux/swap.h +@@ -358,7 +358,6 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page, + extern unsigned long zone_reclaimable_pages(struct zone *zone); + extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, + gfp_t gfp_mask, nodemask_t *mask); +-extern bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode); + extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, + unsigned long nr_pages, + gfp_t gfp_mask, +diff --git a/mm/compaction.c b/mm/compaction.c +index ea46aadc7c21..57ce6b001b10 100644 +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -784,7 +784,7 @@ static bool too_many_isolated(pg_data_t *pgdat) + * @cc: Compaction control structure. + * @low_pfn: The first PFN to isolate + * @end_pfn: The one-past-the-last PFN to isolate, within same pageblock +- * @isolate_mode: Isolation mode to be used. ++ * @mode: Isolation mode to be used. + * + * Isolate all pages that can be migrated from the range specified by + * [low_pfn, end_pfn). The range is expected to be within same pageblock. +@@ -798,7 +798,7 @@ static bool too_many_isolated(pg_data_t *pgdat) + */ + static unsigned long + isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, +- unsigned long end_pfn, isolate_mode_t isolate_mode) ++ unsigned long end_pfn, isolate_mode_t mode) + { + pg_data_t *pgdat = cc->zone->zone_pgdat; + unsigned long nr_scanned = 0, nr_isolated = 0; +@@ -806,6 +806,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, + unsigned long flags = 0; + bool locked = false; + struct page *page = NULL, *valid_page = NULL; ++ struct address_space *mapping; + unsigned long start_pfn = low_pfn; + bool skip_on_failure = false; + unsigned long next_skip_pfn = 0; +@@ -949,7 +950,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, + locked = false; + } + +- if (!isolate_movable_page(page, isolate_mode)) ++ if (!isolate_movable_page(page, mode)) + goto isolate_success; + } + +@@ -961,15 +962,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, + * so avoid taking lru_lock and isolating it unnecessarily in an + * admittedly racy check. + */ +- if (!page_mapping(page) && +- page_count(page) > page_mapcount(page)) ++ mapping = page_mapping(page); ++ if (!mapping && page_count(page) > page_mapcount(page)) + goto isolate_fail; + + /* + * Only allow to migrate anonymous pages in GFP_NOFS context + * because those do not depend on fs locks. + */ +- if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page)) ++ if (!(cc->gfp_mask & __GFP_FS) && mapping) + goto isolate_fail; + + /* +@@ -980,9 +981,45 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, + if (unlikely(!get_page_unless_zero(page))) + goto isolate_fail; + +- if (!__isolate_lru_page_prepare(page, isolate_mode)) ++ /* Only take pages on LRU: a check now makes later tests safe */ ++ if (!PageLRU(page)) ++ goto isolate_fail_put; ++ ++ /* Compaction might skip unevictable pages but CMA takes them */ ++ if (!(mode & ISOLATE_UNEVICTABLE) && PageUnevictable(page)) ++ goto isolate_fail_put; ++ ++ /* ++ * To minimise LRU disruption, the caller can indicate with ++ * ISOLATE_ASYNC_MIGRATE that it only wants to isolate pages ++ * it will be able to migrate without blocking - clean pages ++ * for the most part. PageWriteback would require blocking. ++ */ ++ if ((mode & ISOLATE_ASYNC_MIGRATE) && PageWriteback(page)) + goto isolate_fail_put; + ++ if ((mode & ISOLATE_ASYNC_MIGRATE) && PageDirty(page)) { ++ bool migrate_dirty; ++ ++ /* ++ * Only pages without mappings or that have a ++ * ->migratepage callback are possible to migrate ++ * without blocking. However, we can be racing with ++ * truncation so it's necessary to lock the page ++ * to stabilise the mapping as truncation holds ++ * the page lock until after the page is removed ++ * from the page cache. ++ */ ++ if (!trylock_page(page)) ++ goto isolate_fail_put; ++ ++ mapping = page_mapping(page); ++ migrate_dirty = !mapping || mapping->a_ops->migratepage; ++ unlock_page(page); ++ if (!migrate_dirty) ++ goto isolate_fail_put; ++ } ++ + /* Try isolate the page */ + if (!TestClearPageLRU(page)) + goto isolate_fail_put; +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 00a47845a15b..9cba0f890b33 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -1535,69 +1535,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, + return nr_reclaimed; + } + +-/* +- * Attempt to remove the specified page from its LRU. Only take this page +- * if it is of the appropriate PageActive status. Pages which are being +- * freed elsewhere are also ignored. +- * +- * page: page to consider +- * mode: one of the LRU isolation modes defined above +- * +- * returns true on success, false on failure. +- */ +-bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode) +-{ +- /* Only take pages on the LRU. */ +- if (!PageLRU(page)) +- return false; +- +- /* Compaction should not handle unevictable pages but CMA can do so */ +- if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE)) +- return false; +- +- /* +- * To minimise LRU disruption, the caller can indicate that it only +- * wants to isolate pages it will be able to operate on without +- * blocking - clean pages for the most part. +- * +- * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages +- * that it is possible to migrate without blocking +- */ +- if (mode & ISOLATE_ASYNC_MIGRATE) { +- /* All the caller can do on PageWriteback is block */ +- if (PageWriteback(page)) +- return false; +- +- if (PageDirty(page)) { +- struct address_space *mapping; +- bool migrate_dirty; +- +- /* +- * Only pages without mappings or that have a +- * ->migratepage callback are possible to migrate +- * without blocking. However, we can be racing with +- * truncation so it's necessary to lock the page +- * to stabilise the mapping as truncation holds +- * the page lock until after the page is removed +- * from the page cache. +- */ +- if (!trylock_page(page)) +- return false; +- +- mapping = page_mapping(page); +- migrate_dirty = !mapping || mapping->a_ops->migratepage; +- unlock_page(page); +- if (!migrate_dirty) +- return false; +- } +- } +- +- if ((mode & ISOLATE_UNMAPPED) && page_mapped(page)) +- return false; +- +- return true; +-} +- + /* + * Update LRU sizes after isolating pages. The LRU size updates must + * be complete before mem_cgroup_update_lru_size due to a sanity check. +@@ -1647,11 +1584,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, + unsigned long skipped = 0; + unsigned long scan, total_scan, nr_pages; + LIST_HEAD(pages_skipped); +- isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED); + + total_scan = 0; + scan = 0; + while (scan < nr_to_scan && !list_empty(src)) { ++ struct list_head *move_to = src; + struct page *page; + + page = lru_to_page(src); +@@ -1661,9 +1598,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, + total_scan += nr_pages; + + if (page_zonenum(page) > sc->reclaim_idx) { +- list_move(&page->lru, &pages_skipped); + nr_skipped[page_zonenum(page)] += nr_pages; +- continue; ++ move_to = &pages_skipped; ++ goto move; + } + + /* +@@ -1671,37 +1608,34 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, + * return with no isolated pages if the LRU mostly contains + * ineligible pages. This causes the VM to not reclaim any + * pages, triggering a premature OOM. +- * +- * Account all tail pages of THP. This would not cause +- * premature OOM since __isolate_lru_page() returns -EBUSY +- * only when the page is being freed somewhere else. ++ * Account all tail pages of THP. + */ + scan += nr_pages; +- if (!__isolate_lru_page_prepare(page, mode)) { +- /* It is being freed elsewhere */ +- list_move(&page->lru, src); +- continue; +- } ++ ++ if (!PageLRU(page)) ++ goto move; ++ if (!sc->may_unmap && page_mapped(page)) ++ goto move; ++ + /* + * Be careful not to clear PageLRU until after we're + * sure the page is not being freed elsewhere -- the + * page release code relies on it. + */ +- if (unlikely(!get_page_unless_zero(page))) { +- list_move(&page->lru, src); +- continue; +- } ++ if (unlikely(!get_page_unless_zero(page))) ++ goto move; + + if (!TestClearPageLRU(page)) { + /* Another thread is already isolating this page */ + put_page(page); +- list_move(&page->lru, src); +- continue; ++ goto move; + } + + nr_taken += nr_pages; + nr_zone_taken[page_zonenum(page)] += nr_pages; +- list_move(&page->lru, dst); ++ move_to = dst; ++move: ++ list_move(&page->lru, move_to); + } + + /* +@@ -1725,7 +1659,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, + } + *nr_scanned = total_scan; + trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, +- total_scan, skipped, nr_taken, mode, lru); ++ total_scan, skipped, nr_taken, ++ sc->may_unmap ? 0 : ISOLATE_UNMAPPED, lru); + update_lru_sizes(lruvec, lru, nr_zone_taken); + return nr_taken; + } +-- +2.35.1 + diff --git a/queue-5.10/mm-compaction-do-page-isolation-first-in-compaction.patch b/queue-5.10/mm-compaction-do-page-isolation-first-in-compaction.patch new file mode 100644 index 00000000000..45d5ebfdc0c --- /dev/null +++ b/queue-5.10/mm-compaction-do-page-isolation-first-in-compaction.patch @@ -0,0 +1,266 @@ +From ef33d369381db1a4df5a9f474b014cc4e4664606 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Dec 2020 12:34:20 -0800 +Subject: mm/compaction: do page isolation first in compaction +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Alex Shi + +[ Upstream commit 9df41314390b81a541ca6e84c8340bad0959e4b5 ] + +Currently, compaction would get the lru_lock and then do page isolation +which works fine with pgdat->lru_lock, since any page isoltion would +compete for the lru_lock. If we want to change to memcg lru_lock, we have +to isolate the page before getting lru_lock, thus isoltion would block +page's memcg change which relay on page isoltion too. Then we could +safely use per memcg lru_lock later. + +The new page isolation use previous introduced TestClearPageLRU() + pgdat +lru locking which will be changed to memcg lru lock later. + +Hugh Dickins fixed following bugs in this patch's early +version: + +Fix lots of crashes under compaction load: isolate_migratepages_block() +must clean up appropriately when rejecting a page, setting PageLRU again +if it had been cleared; and a put_page() after get_page_unless_zero() +cannot safely be done while holding locked_lruvec - it may turn out to be +the final put_page(), which will take an lruvec lock when PageLRU. + +And move __isolate_lru_page_prepare back after get_page_unless_zero to +make trylock_page() safe: trylock_page() is not safe to use at this time: +its setting PG_locked can race with the page being freed or allocated +("Bad page"), and can also erase flags being set by one of those "sole +owners" of a freshly allocated page who use non-atomic __SetPageFlag(). + +Link: https://lkml.kernel.org/r/1604566549-62481-16-git-send-email-alex.shi@linux.alibaba.com +Suggested-by: Johannes Weiner +Signed-off-by: Alex Shi +Acked-by: Hugh Dickins +Acked-by: Johannes Weiner +Acked-by: Vlastimil Babka +Cc: Matthew Wilcox +Cc: Alexander Duyck +Cc: Andrea Arcangeli +Cc: Andrey Ryabinin +Cc: "Chen, Rong A" +Cc: Daniel Jordan +Cc: "Huang, Ying" +Cc: Jann Horn +Cc: Joonsoo Kim +Cc: Kirill A. Shutemov +Cc: Kirill A. Shutemov +Cc: Konstantin Khlebnikov +Cc: Mel Gorman +Cc: Michal Hocko +Cc: Michal Hocko +Cc: Mika Penttilä +Cc: Minchan Kim +Cc: Shakeel Butt +Cc: Tejun Heo +Cc: Thomas Gleixner +Cc: Vladimir Davydov +Cc: Wei Yang +Cc: Yang Shi +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation") +Signed-off-by: Sasha Levin +--- + include/linux/swap.h | 2 +- + mm/compaction.c | 42 +++++++++++++++++++++++++++++++++--------- + mm/vmscan.c | 43 ++++++++++++++++++++++--------------------- + 3 files changed, 56 insertions(+), 31 deletions(-) + +diff --git a/include/linux/swap.h b/include/linux/swap.h +index fbc6805358da..3577d3a6ec37 100644 +--- a/include/linux/swap.h ++++ b/include/linux/swap.h +@@ -358,7 +358,7 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page, + extern unsigned long zone_reclaimable_pages(struct zone *zone); + extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, + gfp_t gfp_mask, nodemask_t *mask); +-extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); ++extern int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode); + extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, + unsigned long nr_pages, + gfp_t gfp_mask, +diff --git a/mm/compaction.c b/mm/compaction.c +index 8dfbe86bd74f..ba3e907f03b7 100644 +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -890,6 +890,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, + if (!valid_page && IS_ALIGNED(low_pfn, pageblock_nr_pages)) { + if (!cc->ignore_skip_hint && get_pageblock_skip(page)) { + low_pfn = end_pfn; ++ page = NULL; + goto isolate_abort; + } + valid_page = page; +@@ -971,6 +972,21 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, + if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page)) + goto isolate_fail; + ++ /* ++ * Be careful not to clear PageLRU until after we're ++ * sure the page is not being freed elsewhere -- the ++ * page release code relies on it. ++ */ ++ if (unlikely(!get_page_unless_zero(page))) ++ goto isolate_fail; ++ ++ if (__isolate_lru_page_prepare(page, isolate_mode) != 0) ++ goto isolate_fail_put; ++ ++ /* Try isolate the page */ ++ if (!TestClearPageLRU(page)) ++ goto isolate_fail_put; ++ + /* If we already hold the lock, we can skip some rechecking */ + if (!locked) { + locked = compact_lock_irqsave(&pgdat->lru_lock, +@@ -983,10 +999,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, + goto isolate_abort; + } + +- /* Recheck PageLRU and PageCompound under lock */ +- if (!PageLRU(page)) +- goto isolate_fail; +- + /* + * Page become compound since the non-locked check, + * and it's on LRU. It can only be a THP so the order +@@ -994,16 +1006,13 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, + */ + if (unlikely(PageCompound(page) && !cc->alloc_contig)) { + low_pfn += compound_nr(page) - 1; +- goto isolate_fail; ++ SetPageLRU(page); ++ goto isolate_fail_put; + } + } + + lruvec = mem_cgroup_page_lruvec(page, pgdat); + +- /* Try isolate the page */ +- if (__isolate_lru_page(page, isolate_mode) != 0) +- goto isolate_fail; +- + /* The whole page is taken off the LRU; skip the tail pages. */ + if (PageCompound(page)) + low_pfn += compound_nr(page) - 1; +@@ -1032,6 +1041,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, + } + + continue; ++ ++isolate_fail_put: ++ /* Avoid potential deadlock in freeing page under lru_lock */ ++ if (locked) { ++ spin_unlock_irqrestore(&pgdat->lru_lock, flags); ++ locked = false; ++ } ++ put_page(page); ++ + isolate_fail: + if (!skip_on_failure) + continue; +@@ -1068,9 +1086,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, + if (unlikely(low_pfn > end_pfn)) + low_pfn = end_pfn; + ++ page = NULL; ++ + isolate_abort: + if (locked) + spin_unlock_irqrestore(&pgdat->lru_lock, flags); ++ if (page) { ++ SetPageLRU(page); ++ put_page(page); ++ } + + /* + * Updated the cached scanner pfn once the pageblock has been scanned +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 8d62eedfc794..5ada402c8d95 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -1545,7 +1545,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, + * + * returns 0 on success, -ve errno on failure. + */ +-int __isolate_lru_page(struct page *page, isolate_mode_t mode) ++int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode) + { + int ret = -EBUSY; + +@@ -1597,22 +1597,9 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) + if ((mode & ISOLATE_UNMAPPED) && page_mapped(page)) + return ret; + +- if (likely(get_page_unless_zero(page))) { +- /* +- * Be careful not to clear PageLRU until after we're +- * sure the page is not being freed elsewhere -- the +- * page release code relies on it. +- */ +- if (TestClearPageLRU(page)) +- ret = 0; +- else +- put_page(page); +- } +- +- return ret; ++ return 0; + } + +- + /* + * Update LRU sizes after isolating pages. The LRU size updates must + * be complete before mem_cgroup_update_lru_size due to a sanity check. +@@ -1692,20 +1679,34 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, + * only when the page is being freed somewhere else. + */ + scan += nr_pages; +- switch (__isolate_lru_page(page, mode)) { ++ switch (__isolate_lru_page_prepare(page, mode)) { + case 0: ++ /* ++ * Be careful not to clear PageLRU until after we're ++ * sure the page is not being freed elsewhere -- the ++ * page release code relies on it. ++ */ ++ if (unlikely(!get_page_unless_zero(page))) ++ goto busy; ++ ++ if (!TestClearPageLRU(page)) { ++ /* ++ * This page may in other isolation path, ++ * but we still hold lru_lock. ++ */ ++ put_page(page); ++ goto busy; ++ } ++ + nr_taken += nr_pages; + nr_zone_taken[page_zonenum(page)] += nr_pages; + list_move(&page->lru, dst); + break; + +- case -EBUSY: ++ default: ++busy: + /* else it is being freed elsewhere */ + list_move(&page->lru, src); +- continue; +- +- default: +- BUG(); + } + } + +-- +2.35.1 + diff --git a/queue-5.10/mm-khugepaged-fix-gup-fast-interaction-by-sending-ip.patch b/queue-5.10/mm-khugepaged-fix-gup-fast-interaction-by-sending-ip.patch new file mode 100644 index 00000000000..ad4d743cc6d --- /dev/null +++ b/queue-5.10/mm-khugepaged-fix-gup-fast-interaction-by-sending-ip.patch @@ -0,0 +1,112 @@ +From 2b6a220c2d4c90dda4ae1ed3aa0251eb152b8825 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Dec 2022 18:16:04 +0100 +Subject: mm/khugepaged: fix GUP-fast interaction by sending IPI + +From: Jann Horn + +commit 2ba99c5e08812494bc57f319fb562f527d9bacd8 upstream. + +Since commit 70cbc3cc78a99 ("mm: gup: fix the fast GUP race against THP +collapse"), the lockless_pages_from_mm() fastpath rechecks the pmd_t to +ensure that the page table was not removed by khugepaged in between. + +However, lockless_pages_from_mm() still requires that the page table is +not concurrently freed. Fix it by sending IPIs (if the architecture uses +semi-RCU-style page table freeing) before freeing/reusing page tables. + +Link: https://lkml.kernel.org/r/20221129154730.2274278-2-jannh@google.com +Link: https://lkml.kernel.org/r/20221128180252.1684965-2-jannh@google.com +Link: https://lkml.kernel.org/r/20221125213714.4115729-2-jannh@google.com +Fixes: ba76149f47d8 ("thp: khugepaged") +Signed-off-by: Jann Horn +Reviewed-by: Yang Shi +Acked-by: David Hildenbrand +Cc: John Hubbard +Cc: Peter Xu +Cc: +Signed-off-by: Andrew Morton +[manual backport: two of the three places in khugepaged that can free +ptes were refactored into a common helper between 5.15 and 6.0] +Signed-off-by: Jann Horn +Signed-off-by: Sasha Levin +--- + include/asm-generic/tlb.h | 4 ++++ + mm/khugepaged.c | 3 +++ + mm/mmu_gather.c | 4 +--- + 3 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h +index a0c4b99d2899..f40c9534f20b 100644 +--- a/include/asm-generic/tlb.h ++++ b/include/asm-generic/tlb.h +@@ -205,12 +205,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table); + #define tlb_needs_table_invalidate() (true) + #endif + ++void tlb_remove_table_sync_one(void); ++ + #else + + #ifdef tlb_needs_table_invalidate + #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE + #endif + ++static inline void tlb_remove_table_sync_one(void) { } ++ + #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ + + +diff --git a/mm/khugepaged.c b/mm/khugepaged.c +index 014e8b259313..0268b549bd60 100644 +--- a/mm/khugepaged.c ++++ b/mm/khugepaged.c +@@ -1154,6 +1154,7 @@ static void collapse_huge_page(struct mm_struct *mm, + _pmd = pmdp_collapse_flush(vma, address, pmd); + spin_unlock(pmd_ptl); + mmu_notifier_invalidate_range_end(&range); ++ tlb_remove_table_sync_one(); + + spin_lock(pte_ptl); + isolated = __collapse_huge_page_isolate(vma, address, pte, +@@ -1538,6 +1539,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) + /* step 4: collapse pmd */ + _pmd = pmdp_collapse_flush(vma, haddr, pmd); + mm_dec_nr_ptes(mm); ++ tlb_remove_table_sync_one(); + pte_free(mm, pmd_pgtable(_pmd)); + + i_mmap_unlock_write(vma->vm_file->f_mapping); +@@ -1625,6 +1627,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) + /* assume page table is clear */ + _pmd = pmdp_collapse_flush(vma, addr, pmd); + mm_dec_nr_ptes(mm); ++ tlb_remove_table_sync_one(); + pte_free(mm, pmd_pgtable(_pmd)); + } + mmap_write_unlock(mm); +diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c +index 03c33c93a582..205fdbb5792a 100644 +--- a/mm/mmu_gather.c ++++ b/mm/mmu_gather.c +@@ -139,7 +139,7 @@ static void tlb_remove_table_smp_sync(void *arg) + /* Simply deliver the interrupt */ + } + +-static void tlb_remove_table_sync_one(void) ++void tlb_remove_table_sync_one(void) + { + /* + * This isn't an RCU grace period and hence the page-tables cannot be +@@ -163,8 +163,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch) + + #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */ + +-static void tlb_remove_table_sync_one(void) { } +- + static void tlb_remove_table_free(struct mmu_table_batch *batch) + { + __tlb_remove_table_free(batch); +-- +2.35.1 + diff --git a/queue-5.10/mm-khugepaged-invoke-mmu-notifiers-in-shmem-file-col.patch b/queue-5.10/mm-khugepaged-invoke-mmu-notifiers-in-shmem-file-col.patch new file mode 100644 index 00000000000..1d0ad3f974f --- /dev/null +++ b/queue-5.10/mm-khugepaged-invoke-mmu-notifiers-in-shmem-file-col.patch @@ -0,0 +1,90 @@ +From 8e1c95908e48c5198348debed6c347698ecc9ec1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Dec 2022 18:16:05 +0100 +Subject: mm/khugepaged: invoke MMU notifiers in shmem/file collapse paths + +From: Jann Horn + +commit f268f6cf875f3220afc77bdd0bf1bb136eb54db9 upstream. + +Any codepath that zaps page table entries must invoke MMU notifiers to +ensure that secondary MMUs (like KVM) don't keep accessing pages which +aren't mapped anymore. Secondary MMUs don't hold their own references to +pages that are mirrored over, so failing to notify them can lead to page +use-after-free. + +I'm marking this as addressing an issue introduced in commit f3f0e1d2150b +("khugepaged: add support of collapse for tmpfs/shmem pages"), but most of +the security impact of this only came in commit 27e1f8273113 ("khugepaged: +enable collapse pmd for pte-mapped THP"), which actually omitted flushes +for the removal of present PTEs, not just for the removal of empty page +tables. + +Link: https://lkml.kernel.org/r/20221129154730.2274278-3-jannh@google.com +Link: https://lkml.kernel.org/r/20221128180252.1684965-3-jannh@google.com +Link: https://lkml.kernel.org/r/20221125213714.4115729-3-jannh@google.com +Fixes: f3f0e1d2150b ("khugepaged: add support of collapse for tmpfs/shmem pages") +Signed-off-by: Jann Horn +Acked-by: David Hildenbrand +Reviewed-by: Yang Shi +Cc: John Hubbard +Cc: Peter Xu +Cc: +Signed-off-by: Andrew Morton +[manual backport: this code was refactored from two copies into a common +helper between 5.15 and 6.0] +Signed-off-by: Jann Horn +Signed-off-by: Sasha Levin +--- + mm/khugepaged.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/mm/khugepaged.c b/mm/khugepaged.c +index 0268b549bd60..0eb3adf4ff68 100644 +--- a/mm/khugepaged.c ++++ b/mm/khugepaged.c +@@ -1444,6 +1444,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) + spinlock_t *ptl; + int count = 0; + int i; ++ struct mmu_notifier_range range; + + if (!vma || !vma->vm_file || + vma->vm_start > haddr || vma->vm_end < haddr + HPAGE_PMD_SIZE) +@@ -1537,9 +1538,13 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) + } + + /* step 4: collapse pmd */ ++ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, haddr, ++ haddr + HPAGE_PMD_SIZE); ++ mmu_notifier_invalidate_range_start(&range); + _pmd = pmdp_collapse_flush(vma, haddr, pmd); + mm_dec_nr_ptes(mm); + tlb_remove_table_sync_one(); ++ mmu_notifier_invalidate_range_end(&range); + pte_free(mm, pmd_pgtable(_pmd)); + + i_mmap_unlock_write(vma->vm_file->f_mapping); +@@ -1624,11 +1629,19 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) + */ + if (mmap_write_trylock(mm)) { + if (!khugepaged_test_exit(mm)) { ++ struct mmu_notifier_range range; ++ ++ mmu_notifier_range_init(&range, ++ MMU_NOTIFY_CLEAR, 0, ++ NULL, mm, addr, ++ addr + HPAGE_PMD_SIZE); ++ mmu_notifier_invalidate_range_start(&range); + /* assume page table is clear */ + _pmd = pmdp_collapse_flush(vma, addr, pmd); + mm_dec_nr_ptes(mm); + tlb_remove_table_sync_one(); + pte_free(mm, pmd_pgtable(_pmd)); ++ mmu_notifier_invalidate_range_end(&range); + } + mmap_write_unlock(mm); + } else { +-- +2.35.1 + diff --git a/queue-5.10/mm-khugepaged-take-the-right-locks-for-page-table-re.patch b/queue-5.10/mm-khugepaged-take-the-right-locks-for-page-table-re.patch new file mode 100644 index 00000000000..c973f9c8f10 --- /dev/null +++ b/queue-5.10/mm-khugepaged-take-the-right-locks-for-page-table-re.patch @@ -0,0 +1,141 @@ +From fb456f2c893540f9a10c07cf05d86bc67bea8359 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Dec 2022 18:16:06 +0100 +Subject: mm/khugepaged: take the right locks for page table retraction + +From: Jann Horn + +commit 8d3c106e19e8d251da31ff4cc7462e4565d65084 upstream. + +pagetable walks on address ranges mapped by VMAs can be done under the +mmap lock, the lock of an anon_vma attached to the VMA, or the lock of the +VMA's address_space. Only one of these needs to be held, and it does not +need to be held in exclusive mode. + +Under those circumstances, the rules for concurrent access to page table +entries are: + + - Terminal page table entries (entries that don't point to another page + table) can be arbitrarily changed under the page table lock, with the + exception that they always need to be consistent for + hardware page table walks and lockless_pages_from_mm(). + This includes that they can be changed into non-terminal entries. + - Non-terminal page table entries (which point to another page table) + can not be modified; readers are allowed to READ_ONCE() an entry, verify + that it is non-terminal, and then assume that its value will stay as-is. + +Retracting a page table involves modifying a non-terminal entry, so +page-table-level locks are insufficient to protect against concurrent page +table traversal; it requires taking all the higher-level locks under which +it is possible to start a page walk in the relevant range in exclusive +mode. + +The collapse_huge_page() path for anonymous THP already follows this rule, +but the shmem/file THP path was getting it wrong, making it possible for +concurrent rmap-based operations to cause corruption. + +Link: https://lkml.kernel.org/r/20221129154730.2274278-1-jannh@google.com +Link: https://lkml.kernel.org/r/20221128180252.1684965-1-jannh@google.com +Link: https://lkml.kernel.org/r/20221125213714.4115729-1-jannh@google.com +Fixes: 27e1f8273113 ("khugepaged: enable collapse pmd for pte-mapped THP") +Signed-off-by: Jann Horn +Reviewed-by: Yang Shi +Acked-by: David Hildenbrand +Cc: John Hubbard +Cc: Peter Xu +Cc: +Signed-off-by: Andrew Morton +[manual backport: this code was refactored from two copies into a common +helper between 5.15 and 6.0] +Signed-off-by: Jann Horn +Signed-off-by: Sasha Levin +--- + mm/khugepaged.c | 31 ++++++++++++++++++++++++++----- + 1 file changed, 26 insertions(+), 5 deletions(-) + +diff --git a/mm/khugepaged.c b/mm/khugepaged.c +index cf4dceb9682b..014e8b259313 100644 +--- a/mm/khugepaged.c ++++ b/mm/khugepaged.c +@@ -1457,6 +1457,14 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) + if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE)) + return; + ++ /* ++ * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings ++ * that got written to. Without this, we'd have to also lock the ++ * anon_vma if one exists. ++ */ ++ if (vma->anon_vma) ++ return; ++ + hpage = find_lock_page(vma->vm_file->f_mapping, + linear_page_index(vma, haddr)); + if (!hpage) +@@ -1469,6 +1477,19 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) + if (!pmd) + goto drop_hpage; + ++ /* ++ * We need to lock the mapping so that from here on, only GUP-fast and ++ * hardware page walks can access the parts of the page tables that ++ * we're operating on. ++ */ ++ i_mmap_lock_write(vma->vm_file->f_mapping); ++ ++ /* ++ * This spinlock should be unnecessary: Nobody else should be accessing ++ * the page tables under spinlock protection here, only ++ * lockless_pages_from_mm() and the hardware page walker can access page ++ * tables while all the high-level locks are held in write mode. ++ */ + start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); + + /* step 1: check all mapped PTEs are to the right huge page */ +@@ -1515,12 +1536,12 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) + } + + /* step 4: collapse pmd */ +- ptl = pmd_lock(vma->vm_mm, pmd); + _pmd = pmdp_collapse_flush(vma, haddr, pmd); +- spin_unlock(ptl); + mm_dec_nr_ptes(mm); + pte_free(mm, pmd_pgtable(_pmd)); + ++ i_mmap_unlock_write(vma->vm_file->f_mapping); ++ + drop_hpage: + unlock_page(hpage); + put_page(hpage); +@@ -1528,6 +1549,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) + + abort: + pte_unmap_unlock(start_pte, ptl); ++ i_mmap_unlock_write(vma->vm_file->f_mapping); + goto drop_hpage; + } + +@@ -1577,7 +1599,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) + * An alternative would be drop the check, but check that page + * table is clear before calling pmdp_collapse_flush() under + * ptl. It has higher chance to recover THP for the VMA, but +- * has higher cost too. ++ * has higher cost too. It would also probably require locking ++ * the anon_vma. + */ + if (vma->anon_vma) + continue; +@@ -1599,10 +1622,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) + */ + if (mmap_write_trylock(mm)) { + if (!khugepaged_test_exit(mm)) { +- spinlock_t *ptl = pmd_lock(mm, pmd); + /* assume page table is clear */ + _pmd = pmdp_collapse_flush(vma, addr, pmd); +- spin_unlock(ptl); + mm_dec_nr_ptes(mm); + pte_free(mm, pmd_pgtable(_pmd)); + } +-- +2.35.1 + diff --git a/queue-5.10/mm-lru-introduce-testclearpagelru.patch b/queue-5.10/mm-lru-introduce-testclearpagelru.patch new file mode 100644 index 00000000000..abe63017820 --- /dev/null +++ b/queue-5.10/mm-lru-introduce-testclearpagelru.patch @@ -0,0 +1,223 @@ +From d84c0415a11eafaa01336ef3fa61f707986b5656 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Dec 2020 12:34:16 -0800 +Subject: mm/lru: introduce TestClearPageLRU() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Alex Shi + +[ Upstream commit d25b5bd8a8f420b15517c19c4626c0c009f72a63 ] + +Currently lru_lock still guards both lru list and page's lru bit, that's +ok. but if we want to use specific lruvec lock on the page, we need to +pin down the page's lruvec/memcg during locking. Just taking lruvec lock +first may be undermined by the page's memcg charge/migration. To fix this +problem, we will clear the lru bit out of locking and use it as pin down +action to block the page isolation in memcg changing. + +So now a standard steps of page isolation is following: + 1, get_page(); #pin the page avoid to be free + 2, TestClearPageLRU(); #block other isolation like memcg change + 3, spin_lock on lru_lock; #serialize lru list access + 4, delete page from lru list; + +This patch start with the first part: TestClearPageLRU, which combines +PageLRU check and ClearPageLRU into a macro func TestClearPageLRU. This +function will be used as page isolation precondition to prevent other +isolations some where else. Then there are may !PageLRU page on lru list, +need to remove BUG() checking accordingly. + +There 2 rules for lru bit now: +1, the lru bit still indicate if a page on lru list, just in some + temporary moment(isolating), the page may have no lru bit when + it's on lru list. but the page still must be on lru list when the + lru bit set. +2, have to remove lru bit before delete it from lru list. + +As Andrew Morton mentioned this change would dirty cacheline for a page +which isn't on the LRU. But the loss would be acceptable in Rong Chen + report: +https://lore.kernel.org/lkml/20200304090301.GB5972@shao2-debian/ + +Link: https://lkml.kernel.org/r/1604566549-62481-15-git-send-email-alex.shi@linux.alibaba.com +Suggested-by: Johannes Weiner +Signed-off-by: Alex Shi +Acked-by: Hugh Dickins +Acked-by: Johannes Weiner +Acked-by: Vlastimil Babka +Cc: Michal Hocko +Cc: Vladimir Davydov +Cc: Alexander Duyck +Cc: Andrea Arcangeli +Cc: Andrey Ryabinin +Cc: Daniel Jordan +Cc: "Huang, Ying" +Cc: Jann Horn +Cc: Joonsoo Kim +Cc: Kirill A. Shutemov +Cc: Kirill A. Shutemov +Cc: Konstantin Khlebnikov +Cc: Matthew Wilcox (Oracle) +Cc: Mel Gorman +Cc: Michal Hocko +Cc: Mika Penttilä +Cc: Minchan Kim +Cc: Shakeel Butt +Cc: Tejun Heo +Cc: Thomas Gleixner +Cc: Wei Yang +Cc: Yang Shi +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation") +Signed-off-by: Sasha Levin +--- + include/linux/page-flags.h | 1 + + mm/mlock.c | 3 +-- + mm/vmscan.c | 39 +++++++++++++++++++------------------- + 3 files changed, 21 insertions(+), 22 deletions(-) + +diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h +index 4f6ba9379112..14a0cac9e099 100644 +--- a/include/linux/page-flags.h ++++ b/include/linux/page-flags.h +@@ -335,6 +335,7 @@ PAGEFLAG(Referenced, referenced, PF_HEAD) + PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD) + __CLEARPAGEFLAG(Dirty, dirty, PF_HEAD) + PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD) ++ TESTCLEARFLAG(LRU, lru, PF_HEAD) + PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD) + TESTCLEARFLAG(Active, active, PF_HEAD) + PAGEFLAG(Workingset, workingset, PF_HEAD) +diff --git a/mm/mlock.c b/mm/mlock.c +index d487aa864e86..7b0e6334be6f 100644 +--- a/mm/mlock.c ++++ b/mm/mlock.c +@@ -276,10 +276,9 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) + * We already have pin from follow_page_mask() + * so we can spare the get_page() here. + */ +- if (PageLRU(page)) { ++ if (TestClearPageLRU(page)) { + struct lruvec *lruvec; + +- ClearPageLRU(page); + lruvec = mem_cgroup_page_lruvec(page, + page_pgdat(page)); + del_page_from_lru_list(page, lruvec, +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 51ccd80e70b6..8d62eedfc794 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -1547,7 +1547,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, + */ + int __isolate_lru_page(struct page *page, isolate_mode_t mode) + { +- int ret = -EINVAL; ++ int ret = -EBUSY; + + /* Only take pages on the LRU. */ + if (!PageLRU(page)) +@@ -1557,8 +1557,6 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) + if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE)) + return ret; + +- ret = -EBUSY; +- + /* + * To minimise LRU disruption, the caller can indicate that it only + * wants to isolate pages it will be able to operate on without +@@ -1605,8 +1603,10 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode) + * sure the page is not being freed elsewhere -- the + * page release code relies on it. + */ +- ClearPageLRU(page); +- ret = 0; ++ if (TestClearPageLRU(page)) ++ ret = 0; ++ else ++ put_page(page); + } + + return ret; +@@ -1672,8 +1672,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, + page = lru_to_page(src); + prefetchw_prev_lru_page(page, src, flags); + +- VM_BUG_ON_PAGE(!PageLRU(page), page); +- + nr_pages = compound_nr(page); + total_scan += nr_pages; + +@@ -1770,21 +1768,18 @@ int isolate_lru_page(struct page *page) + VM_BUG_ON_PAGE(!page_count(page), page); + WARN_RATELIMIT(PageTail(page), "trying to isolate tail page"); + +- if (PageLRU(page)) { ++ if (TestClearPageLRU(page)) { + pg_data_t *pgdat = page_pgdat(page); + struct lruvec *lruvec; + +- spin_lock_irq(&pgdat->lru_lock); ++ get_page(page); + lruvec = mem_cgroup_page_lruvec(page, pgdat); +- if (PageLRU(page)) { +- int lru = page_lru(page); +- get_page(page); +- ClearPageLRU(page); +- del_page_from_lru_list(page, lruvec, lru); +- ret = 0; +- } ++ spin_lock_irq(&pgdat->lru_lock); ++ del_page_from_lru_list(page, lruvec, page_lru(page)); + spin_unlock_irq(&pgdat->lru_lock); ++ ret = 0; + } ++ + return ret; + } + +@@ -4291,6 +4286,10 @@ void check_move_unevictable_pages(struct pagevec *pvec) + nr_pages = thp_nr_pages(page); + pgscanned += nr_pages; + ++ /* block memcg migration during page moving between lru */ ++ if (!TestClearPageLRU(page)) ++ continue; ++ + if (pagepgdat != pgdat) { + if (pgdat) + spin_unlock_irq(&pgdat->lru_lock); +@@ -4299,10 +4298,7 @@ void check_move_unevictable_pages(struct pagevec *pvec) + } + lruvec = mem_cgroup_page_lruvec(page, pgdat); + +- if (!PageLRU(page) || !PageUnevictable(page)) +- continue; +- +- if (page_evictable(page)) { ++ if (page_evictable(page) && PageUnevictable(page)) { + enum lru_list lru = page_lru_base_type(page); + + VM_BUG_ON_PAGE(PageActive(page), page); +@@ -4311,12 +4307,15 @@ void check_move_unevictable_pages(struct pagevec *pvec) + add_page_to_lru_list(page, lruvec, lru); + pgrescued += nr_pages; + } ++ SetPageLRU(page); + } + + if (pgdat) { + __count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued); + __count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned); + spin_unlock_irq(&pgdat->lru_lock); ++ } else if (pgscanned) { ++ count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned); + } + } + EXPORT_SYMBOL_GPL(check_move_unevictable_pages); +-- +2.35.1 + diff --git a/queue-5.10/mm-migrate-fix-thp-s-mapcount-on-isolation.patch b/queue-5.10/mm-migrate-fix-thp-s-mapcount-on-isolation.patch new file mode 100644 index 00000000000..21b81076840 --- /dev/null +++ b/queue-5.10/mm-migrate-fix-thp-s-mapcount-on-isolation.patch @@ -0,0 +1,89 @@ +From 91f25a9aa0bb126c81ed361cef0f8608ac4c3f15 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Nov 2022 17:55:23 +0800 +Subject: mm: migrate: fix THP's mapcount on isolation + +From: Gavin Shan + +[ Upstream commit 829ae0f81ce093d674ff2256f66a714753e9ce32 ] + +The issue is reported when removing memory through virtio_mem device. The +transparent huge page, experienced copy-on-write fault, is wrongly +regarded as pinned. The transparent huge page is escaped from being +isolated in isolate_migratepages_block(). The transparent huge page can't +be migrated and the corresponding memory block can't be put into offline +state. + +Fix it by replacing page_mapcount() with total_mapcount(). With this, the +transparent huge page can be isolated and migrated, and the memory block +can be put into offline state. Besides, The page's refcount is increased +a bit earlier to avoid the page is released when the check is executed. + +Link: https://lkml.kernel.org/r/20221124095523.31061-1-gshan@redhat.com +Fixes: 1da2f328fa64 ("mm,thp,compaction,cma: allow THP migration for CMA allocations") +Signed-off-by: Gavin Shan +Reported-by: Zhenyu Zhang +Tested-by: Zhenyu Zhang +Suggested-by: David Hildenbrand +Acked-by: David Hildenbrand +Cc: Alistair Popple +Cc: Hugh Dickins +Cc: Kirill A. Shutemov +Cc: Matthew Wilcox +Cc: William Kucharski +Cc: Zi Yan +Cc: [5.7+] +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + mm/compaction.c | 22 +++++++++++----------- + 1 file changed, 11 insertions(+), 11 deletions(-) + +diff --git a/mm/compaction.c b/mm/compaction.c +index 57ce6b001b10..54d1041560c7 100644 +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -957,29 +957,29 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, + goto isolate_fail; + } + ++ /* ++ * Be careful not to clear PageLRU until after we're ++ * sure the page is not being freed elsewhere -- the ++ * page release code relies on it. ++ */ ++ if (unlikely(!get_page_unless_zero(page))) ++ goto isolate_fail; ++ + /* + * Migration will fail if an anonymous page is pinned in memory, + * so avoid taking lru_lock and isolating it unnecessarily in an + * admittedly racy check. + */ + mapping = page_mapping(page); +- if (!mapping && page_count(page) > page_mapcount(page)) +- goto isolate_fail; ++ if (!mapping && (page_count(page) - 1) > total_mapcount(page)) ++ goto isolate_fail_put; + + /* + * Only allow to migrate anonymous pages in GFP_NOFS context + * because those do not depend on fs locks. + */ + if (!(cc->gfp_mask & __GFP_FS) && mapping) +- goto isolate_fail; +- +- /* +- * Be careful not to clear PageLRU until after we're +- * sure the page is not being freed elsewhere -- the +- * page release code relies on it. +- */ +- if (unlikely(!get_page_unless_zero(page))) +- goto isolate_fail; ++ goto isolate_fail_put; + + /* Only take pages on LRU: a check now makes later tests safe */ + if (!PageLRU(page)) +-- +2.35.1 + diff --git a/queue-5.10/mm-mlock-remove-__munlock_isolate_lru_page.patch b/queue-5.10/mm-mlock-remove-__munlock_isolate_lru_page.patch new file mode 100644 index 00000000000..b5a8094271d --- /dev/null +++ b/queue-5.10/mm-mlock-remove-__munlock_isolate_lru_page.patch @@ -0,0 +1,104 @@ +From 6649227e330b37c9583146cd7446b41771b3a7f1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Dec 2020 12:34:11 -0800 +Subject: mm/mlock: remove __munlock_isolate_lru_page() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Alex Shi + +[ Upstream commit 13805a88a9bd3fb37f33dd8972d904de62796f3d ] + +__munlock_isolate_lru_page() only has one caller, remove it to clean up +and simplify code. + +Link: https://lkml.kernel.org/r/1604566549-62481-14-git-send-email-alex.shi@linux.alibaba.com +Signed-off-by: Alex Shi +Acked-by: Hugh Dickins +Acked-by: Johannes Weiner +Acked-by: Vlastimil Babka +Cc: Kirill A. Shutemov +Cc: Alexander Duyck +Cc: Andrea Arcangeli +Cc: Andrey Ryabinin +Cc: "Chen, Rong A" +Cc: Daniel Jordan +Cc: "Huang, Ying" +Cc: Jann Horn +Cc: Joonsoo Kim +Cc: Kirill A. Shutemov +Cc: Konstantin Khlebnikov +Cc: Matthew Wilcox (Oracle) +Cc: Mel Gorman +Cc: Michal Hocko +Cc: Michal Hocko +Cc: Mika Penttilä +Cc: Minchan Kim +Cc: Shakeel Butt +Cc: Tejun Heo +Cc: Thomas Gleixner +Cc: Vladimir Davydov +Cc: Wei Yang +Cc: Yang Shi +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation") +Signed-off-by: Sasha Levin +--- + mm/mlock.c | 31 +++++++++---------------------- + 1 file changed, 9 insertions(+), 22 deletions(-) + +diff --git a/mm/mlock.c b/mm/mlock.c +index 796c726a0407..d487aa864e86 100644 +--- a/mm/mlock.c ++++ b/mm/mlock.c +@@ -105,26 +105,6 @@ void mlock_vma_page(struct page *page) + } + } + +-/* +- * Isolate a page from LRU with optional get_page() pin. +- * Assumes lru_lock already held and page already pinned. +- */ +-static bool __munlock_isolate_lru_page(struct page *page, bool getpage) +-{ +- if (PageLRU(page)) { +- struct lruvec *lruvec; +- +- lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); +- if (getpage) +- get_page(page); +- ClearPageLRU(page); +- del_page_from_lru_list(page, lruvec, page_lru(page)); +- return true; +- } +- +- return false; +-} +- + /* + * Finish munlock after successful page isolation + * +@@ -296,9 +276,16 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) + * We already have pin from follow_page_mask() + * so we can spare the get_page() here. + */ +- if (__munlock_isolate_lru_page(page, false)) ++ if (PageLRU(page)) { ++ struct lruvec *lruvec; ++ ++ ClearPageLRU(page); ++ lruvec = mem_cgroup_page_lruvec(page, ++ page_pgdat(page)); ++ del_page_from_lru_list(page, lruvec, ++ page_lru(page)); + continue; +- else ++ } else + __munlock_isolation_failed(page); + } else { + delta_munlocked++; +-- +2.35.1 + diff --git a/queue-5.10/mm-mlock-remove-lru_lock-on-testclearpagemlocked.patch b/queue-5.10/mm-mlock-remove-lru_lock-on-testclearpagemlocked.patch new file mode 100644 index 00000000000..2efbb62b983 --- /dev/null +++ b/queue-5.10/mm-mlock-remove-lru_lock-on-testclearpagemlocked.patch @@ -0,0 +1,115 @@ +From b824ddafd0a14e7a943171ce5903b83057e0c587 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Dec 2020 12:34:07 -0800 +Subject: mm/mlock: remove lru_lock on TestClearPageMlocked +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Alex Shi + +[ Upstream commit 3db19aa39bac33f2e850fa1ddd67be29b192e51f ] + +In the func munlock_vma_page, comments mentained lru_lock needed for +serialization with split_huge_pages. But the page must be PageLocked as +well as pages in split_huge_page series funcs. Thus the PageLocked is +enough to serialize both funcs. + +Further more, Hugh Dickins pointed: before splitting in +split_huge_page_to_list, the page was unmap_page() to remove pmd/ptes +which protect the page from munlock. Thus, no needs to guard +__split_huge_page_tail for mlock clean, just keep the lru_lock there for +isolation purpose. + +LKP found a preempt issue on __mod_zone_page_state which need change to +mod_zone_page_state. Thanks! + +Link: https://lkml.kernel.org/r/1604566549-62481-13-git-send-email-alex.shi@linux.alibaba.com +Signed-off-by: Alex Shi +Acked-by: Hugh Dickins +Acked-by: Johannes Weiner +Acked-by: Vlastimil Babka +Cc: Kirill A. Shutemov +Cc: Alexander Duyck +Cc: Andrea Arcangeli +Cc: Andrey Ryabinin +Cc: "Chen, Rong A" +Cc: Daniel Jordan +Cc: "Huang, Ying" +Cc: Jann Horn +Cc: Joonsoo Kim +Cc: Kirill A. Shutemov +Cc: Konstantin Khlebnikov +Cc: Matthew Wilcox (Oracle) +Cc: Mel Gorman +Cc: Michal Hocko +Cc: Michal Hocko +Cc: Mika Penttilä +Cc: Minchan Kim +Cc: Shakeel Butt +Cc: Tejun Heo +Cc: Thomas Gleixner +Cc: Vladimir Davydov +Cc: Wei Yang +Cc: Yang Shi +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation") +Signed-off-by: Sasha Levin +--- + mm/mlock.c | 26 +++++--------------------- + 1 file changed, 5 insertions(+), 21 deletions(-) + +diff --git a/mm/mlock.c b/mm/mlock.c +index 884b1216da6a..796c726a0407 100644 +--- a/mm/mlock.c ++++ b/mm/mlock.c +@@ -187,40 +187,24 @@ static void __munlock_isolation_failed(struct page *page) + unsigned int munlock_vma_page(struct page *page) + { + int nr_pages; +- pg_data_t *pgdat = page_pgdat(page); + + /* For try_to_munlock() and to serialize with page migration */ + BUG_ON(!PageLocked(page)); +- + VM_BUG_ON_PAGE(PageTail(page), page); + +- /* +- * Serialize with any parallel __split_huge_page_refcount() which +- * might otherwise copy PageMlocked to part of the tail pages before +- * we clear it in the head page. It also stabilizes thp_nr_pages(). +- */ +- spin_lock_irq(&pgdat->lru_lock); +- + if (!TestClearPageMlocked(page)) { + /* Potentially, PTE-mapped THP: do not skip the rest PTEs */ +- nr_pages = 1; +- goto unlock_out; ++ return 0; + } + + nr_pages = thp_nr_pages(page); +- __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); ++ mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); + +- if (__munlock_isolate_lru_page(page, true)) { +- spin_unlock_irq(&pgdat->lru_lock); ++ if (!isolate_lru_page(page)) + __munlock_isolated_page(page); +- goto out; +- } +- __munlock_isolation_failed(page); +- +-unlock_out: +- spin_unlock_irq(&pgdat->lru_lock); ++ else ++ __munlock_isolation_failed(page); + +-out: + return nr_pages - 1; + } + +-- +2.35.1 + diff --git a/queue-5.10/mm-vmscan-__isolate_lru_page_prepare-cleanup.patch b/queue-5.10/mm-vmscan-__isolate_lru_page_prepare-cleanup.patch new file mode 100644 index 00000000000..5ab65849262 --- /dev/null +++ b/queue-5.10/mm-vmscan-__isolate_lru_page_prepare-cleanup.patch @@ -0,0 +1,183 @@ +From af78db2daeeeec6283747a8d591daf6df57e1961 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 24 Feb 2021 12:08:01 -0800 +Subject: mm/vmscan: __isolate_lru_page_prepare() cleanup + +From: Alex Shi + +[ Upstream commit c2135f7c570bc274035834848d9bf46ea89ba763 ] + +The function just returns 2 results, so using a 'switch' to deal with its +result is unnecessary. Also simplify it to a bool func as Vlastimil +suggested. + +Also remove 'goto' by reusing list_move(), and take Matthew Wilcox's +suggestion to update comments in function. + +Link: https://lkml.kernel.org/r/728874d7-2d93-4049-68c1-dcc3b2d52ccd@linux.alibaba.com +Signed-off-by: Alex Shi +Reviewed-by: Andrew Morton +Acked-by: Vlastimil Babka +Cc: Matthew Wilcox +Cc: Hugh Dickins +Cc: Yu Zhao +Cc: Michal Hocko +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Stable-dep-of: 829ae0f81ce0 ("mm: migrate: fix THP's mapcount on isolation") +Signed-off-by: Sasha Levin +--- + include/linux/swap.h | 2 +- + mm/compaction.c | 2 +- + mm/vmscan.c | 68 ++++++++++++++++++++------------------------ + 3 files changed, 33 insertions(+), 39 deletions(-) + +diff --git a/include/linux/swap.h b/include/linux/swap.h +index 3577d3a6ec37..394d5de5d4b4 100644 +--- a/include/linux/swap.h ++++ b/include/linux/swap.h +@@ -358,7 +358,7 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page, + extern unsigned long zone_reclaimable_pages(struct zone *zone); + extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, + gfp_t gfp_mask, nodemask_t *mask); +-extern int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode); ++extern bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode); + extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, + unsigned long nr_pages, + gfp_t gfp_mask, +diff --git a/mm/compaction.c b/mm/compaction.c +index ba3e907f03b7..ea46aadc7c21 100644 +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -980,7 +980,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, + if (unlikely(!get_page_unless_zero(page))) + goto isolate_fail; + +- if (__isolate_lru_page_prepare(page, isolate_mode) != 0) ++ if (!__isolate_lru_page_prepare(page, isolate_mode)) + goto isolate_fail_put; + + /* Try isolate the page */ +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 5ada402c8d95..00a47845a15b 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -1543,19 +1543,17 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, + * page: page to consider + * mode: one of the LRU isolation modes defined above + * +- * returns 0 on success, -ve errno on failure. ++ * returns true on success, false on failure. + */ +-int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode) ++bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode) + { +- int ret = -EBUSY; +- + /* Only take pages on the LRU. */ + if (!PageLRU(page)) +- return ret; ++ return false; + + /* Compaction should not handle unevictable pages but CMA can do so */ + if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE)) +- return ret; ++ return false; + + /* + * To minimise LRU disruption, the caller can indicate that it only +@@ -1568,7 +1566,7 @@ int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode) + if (mode & ISOLATE_ASYNC_MIGRATE) { + /* All the caller can do on PageWriteback is block */ + if (PageWriteback(page)) +- return ret; ++ return false; + + if (PageDirty(page)) { + struct address_space *mapping; +@@ -1584,20 +1582,20 @@ int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode) + * from the page cache. + */ + if (!trylock_page(page)) +- return ret; ++ return false; + + mapping = page_mapping(page); + migrate_dirty = !mapping || mapping->a_ops->migratepage; + unlock_page(page); + if (!migrate_dirty) +- return ret; ++ return false; + } + } + + if ((mode & ISOLATE_UNMAPPED) && page_mapped(page)) +- return ret; ++ return false; + +- return 0; ++ return true; + } + + /* +@@ -1679,35 +1677,31 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, + * only when the page is being freed somewhere else. + */ + scan += nr_pages; +- switch (__isolate_lru_page_prepare(page, mode)) { +- case 0: +- /* +- * Be careful not to clear PageLRU until after we're +- * sure the page is not being freed elsewhere -- the +- * page release code relies on it. +- */ +- if (unlikely(!get_page_unless_zero(page))) +- goto busy; +- +- if (!TestClearPageLRU(page)) { +- /* +- * This page may in other isolation path, +- * but we still hold lru_lock. +- */ +- put_page(page); +- goto busy; +- } +- +- nr_taken += nr_pages; +- nr_zone_taken[page_zonenum(page)] += nr_pages; +- list_move(&page->lru, dst); +- break; ++ if (!__isolate_lru_page_prepare(page, mode)) { ++ /* It is being freed elsewhere */ ++ list_move(&page->lru, src); ++ continue; ++ } ++ /* ++ * Be careful not to clear PageLRU until after we're ++ * sure the page is not being freed elsewhere -- the ++ * page release code relies on it. ++ */ ++ if (unlikely(!get_page_unless_zero(page))) { ++ list_move(&page->lru, src); ++ continue; ++ } + +- default: +-busy: +- /* else it is being freed elsewhere */ ++ if (!TestClearPageLRU(page)) { ++ /* Another thread is already isolating this page */ ++ put_page(page); + list_move(&page->lru, src); ++ continue; + } ++ ++ nr_taken += nr_pages; ++ nr_zone_taken[page_zonenum(page)] += nr_pages; ++ list_move(&page->lru, dst); + } + + /* +-- +2.35.1 + diff --git a/queue-5.10/net-usb-qmi_wwan-add-u-blox-0x1342-composition.patch b/queue-5.10/net-usb-qmi_wwan-add-u-blox-0x1342-composition.patch new file mode 100644 index 00000000000..aa1840183d2 --- /dev/null +++ b/queue-5.10/net-usb-qmi_wwan-add-u-blox-0x1342-composition.patch @@ -0,0 +1,53 @@ +From f7759205e536a72dd8f59d500166f51408c42e0f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Nov 2022 13:54:55 +0100 +Subject: net: usb: qmi_wwan: add u-blox 0x1342 composition +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Davide Tronchin + +[ Upstream commit a487069e11b6527373f7c6f435d8998051d0b5d9 ] + +Add RmNet support for LARA-L6. + +LARA-L6 module can be configured (by AT interface) in three different +USB modes: +* Default mode (Vendor ID: 0x1546 Product ID: 0x1341) with 4 serial +interfaces +* RmNet mode (Vendor ID: 0x1546 Product ID: 0x1342) with 4 serial +interfaces and 1 RmNet virtual network interface +* CDC-ECM mode (Vendor ID: 0x1546 Product ID: 0x1343) with 4 serial +interface and 1 CDC-ECM virtual network interface + +In RmNet mode LARA-L6 exposes the following interfaces: +If 0: Diagnostic +If 1: AT parser +If 2: AT parser +If 3: AT parset/alternative functions +If 4: RMNET interface + +Signed-off-by: Davide Tronchin +Acked-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/usb/qmi_wwan.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c +index 7313e6e03c12..bce151e3706a 100644 +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -1352,6 +1352,7 @@ static const struct usb_device_id products[] = { + {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */ + {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/ + {QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */ ++ {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */ + + /* 4. Gobi 1000 devices */ + {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ +-- +2.35.1 + diff --git a/queue-5.10/regulator-slg51000-wait-after-asserting-cs-pin.patch b/queue-5.10/regulator-slg51000-wait-after-asserting-cs-pin.patch new file mode 100644 index 00000000000..d8b495c1b4c --- /dev/null +++ b/queue-5.10/regulator-slg51000-wait-after-asserting-cs-pin.patch @@ -0,0 +1,44 @@ +From 19b51875859c93759b9e767950f8d2b937384249 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Nov 2022 14:10:35 +0100 +Subject: regulator: slg51000: Wait after asserting CS pin + +From: Konrad Dybcio + +[ Upstream commit 0b24dfa587c6cc7484cfb170da5c7dd73451f670 ] + +Sony's downstream driver [1], among some other changes, adds a +seemingly random 10ms usleep_range, which turned out to be necessary +for the hardware to function properly on at least Sony Xperia 1 IV. +Without this, I2C transactions with the SLG51000 straight up fail. + +Relax (10-10ms -> 10-11ms) and add the aforementioned sleep to make +sure the hardware has some time to wake up. + +(nagara-2.0.0-mlc/vendor/semc/hardware/camera-kernel-module/) +[1] https://developer.sony.com/file/download/open-source-archive-for-64-0-m-4-29/ + +Signed-off-by: Konrad Dybcio +Link: https://lore.kernel.org/r/20221118131035.54874-1-konrad.dybcio@linaro.org +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + drivers/regulator/slg51000-regulator.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/regulator/slg51000-regulator.c b/drivers/regulator/slg51000-regulator.c +index 75a941fb3c2b..1b2eee95ad3f 100644 +--- a/drivers/regulator/slg51000-regulator.c ++++ b/drivers/regulator/slg51000-regulator.c +@@ -457,6 +457,8 @@ static int slg51000_i2c_probe(struct i2c_client *client) + chip->cs_gpiod = cs_gpiod; + } + ++ usleep_range(10000, 11000); ++ + i2c_set_clientdata(client, chip); + chip->chip_irq = client->irq; + chip->dev = dev; +-- +2.35.1 + diff --git a/queue-5.10/regulator-twl6030-fix-get-status-of-twl6032-regulato.patch b/queue-5.10/regulator-twl6030-fix-get-status-of-twl6032-regulato.patch new file mode 100644 index 00000000000..f4dc13885bf --- /dev/null +++ b/queue-5.10/regulator-twl6030-fix-get-status-of-twl6032-regulato.patch @@ -0,0 +1,69 @@ +From 6d822387a0acf36069221010ef3e9a99131f4167 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 20 Nov 2022 23:12:08 +0100 +Subject: regulator: twl6030: fix get status of twl6032 regulators + +From: Andreas Kemnade + +[ Upstream commit 31a6297b89aabc81b274c093a308a7f5b55081a7 ] + +Status is reported as always off in the 6032 case. Status +reporting now matches the logic in the setters. Once of +the differences to the 6030 is that there are no groups, +therefore the state needs to be read out in the lower bits. + +Signed-off-by: Andreas Kemnade +Link: https://lore.kernel.org/r/20221120221208.3093727-3-andreas@kemnade.info +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + drivers/regulator/twl6030-regulator.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +diff --git a/drivers/regulator/twl6030-regulator.c b/drivers/regulator/twl6030-regulator.c +index 7c7e3648ea4b..f3856750944f 100644 +--- a/drivers/regulator/twl6030-regulator.c ++++ b/drivers/regulator/twl6030-regulator.c +@@ -67,6 +67,7 @@ struct twlreg_info { + #define TWL6030_CFG_STATE_SLEEP 0x03 + #define TWL6030_CFG_STATE_GRP_SHIFT 5 + #define TWL6030_CFG_STATE_APP_SHIFT 2 ++#define TWL6030_CFG_STATE_MASK 0x03 + #define TWL6030_CFG_STATE_APP_MASK (0x03 << TWL6030_CFG_STATE_APP_SHIFT) + #define TWL6030_CFG_STATE_APP(v) (((v) & TWL6030_CFG_STATE_APP_MASK) >>\ + TWL6030_CFG_STATE_APP_SHIFT) +@@ -128,13 +129,14 @@ static int twl6030reg_is_enabled(struct regulator_dev *rdev) + if (grp < 0) + return grp; + grp &= P1_GRP_6030; ++ val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); ++ val = TWL6030_CFG_STATE_APP(val); + } else { ++ val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); ++ val &= TWL6030_CFG_STATE_MASK; + grp = 1; + } + +- val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); +- val = TWL6030_CFG_STATE_APP(val); +- + return grp && (val == TWL6030_CFG_STATE_ON); + } + +@@ -187,7 +189,12 @@ static int twl6030reg_get_status(struct regulator_dev *rdev) + + val = twlreg_read(info, TWL_MODULE_PM_RECEIVER, VREG_STATE); + +- switch (TWL6030_CFG_STATE_APP(val)) { ++ if (info->features & TWL6032_SUBCLASS) ++ val &= TWL6030_CFG_STATE_MASK; ++ else ++ val = TWL6030_CFG_STATE_APP(val); ++ ++ switch (val) { + case TWL6030_CFG_STATE_ON: + return REGULATOR_STATUS_NORMAL; + +-- +2.35.1 + diff --git a/queue-5.10/rtc-check-return-value-from-mc146818_get_time.patch b/queue-5.10/rtc-check-return-value-from-mc146818_get_time.patch new file mode 100644 index 00000000000..7a3466fec7a --- /dev/null +++ b/queue-5.10/rtc-check-return-value-from-mc146818_get_time.patch @@ -0,0 +1,159 @@ +From 7a3b77a291c1fb84974f59d34a2d5cf044f52d8f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 10 Dec 2021 21:01:25 +0100 +Subject: rtc: Check return value from mc146818_get_time() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mateusz Jończyk + +[ Upstream commit 0dd8d6cb9eddfe637bcd821bbfd40ebd5a0737b9 ] + +There are 4 users of mc146818_get_time() and none of them was checking +the return value from this function. Change this. + +Print the appropriate warnings in callers of mc146818_get_time() instead +of in the function mc146818_get_time() itself, in order not to add +strings to rtc-mc146818-lib.c, which is kind of a library. + +The callers of alpha_rtc_read_time() and cmos_read_time() may use the +contents of (struct rtc_time *) even when the functions return a failure +code. Therefore, set the contents of (struct rtc_time *) to 0x00, +which looks more sensible then 0xff and aligns with the (possibly +stale?) comment in cmos_read_time: + + /* + * If pm_trace abused the RTC for storage, set the timespec to 0, + * which tells the caller that this RTC value is unusable. + */ + +For consistency, do this in mc146818_get_time(). + +Note: hpet_rtc_interrupt() may call mc146818_get_time() many times a +second. It is very unlikely, though, that the RTC suddenly stops +working and mc146818_get_time() would consistently fail. + +Only compile-tested on alpha. + +Signed-off-by: Mateusz Jończyk +Cc: Richard Henderson +Cc: Ivan Kokshaysky +Cc: Matt Turner +Cc: Thomas Gleixner +Cc: Ingo Molnar +Cc: Borislav Petkov +Cc: Dave Hansen +Cc: Alessandro Zummo +Cc: Alexandre Belloni +Cc: linux-alpha@vger.kernel.org +Cc: x86@kernel.org +Signed-off-by: Alexandre Belloni +Link: https://lore.kernel.org/r/20211210200131.153887-4-mat.jonczyk@o2.pl +Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") +Signed-off-by: Sasha Levin +--- + arch/alpha/kernel/rtc.c | 7 ++++++- + arch/x86/kernel/hpet.c | 8 ++++++-- + drivers/base/power/trace.c | 6 +++++- + drivers/rtc/rtc-cmos.c | 9 ++++++++- + drivers/rtc/rtc-mc146818-lib.c | 2 +- + 5 files changed, 26 insertions(+), 6 deletions(-) + +diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c +index 1b1d5963ac55..48ffbfbd0624 100644 +--- a/arch/alpha/kernel/rtc.c ++++ b/arch/alpha/kernel/rtc.c +@@ -80,7 +80,12 @@ init_rtc_epoch(void) + static int + alpha_rtc_read_time(struct device *dev, struct rtc_time *tm) + { +- mc146818_get_time(tm); ++ int ret = mc146818_get_time(tm); ++ ++ if (ret < 0) { ++ dev_err_ratelimited(dev, "unable to read current time\n"); ++ return ret; ++ } + + /* Adjust for non-default epochs. It's easier to depend on the + generic __get_rtc_time and adjust the epoch here than create +diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c +index 4ab7a9757e52..574df24a8e5a 100644 +--- a/arch/x86/kernel/hpet.c ++++ b/arch/x86/kernel/hpet.c +@@ -1325,8 +1325,12 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) + hpet_rtc_timer_reinit(); + memset(&curr_time, 0, sizeof(struct rtc_time)); + +- if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) +- mc146818_get_time(&curr_time); ++ if (hpet_rtc_flags & (RTC_UIE | RTC_AIE)) { ++ if (unlikely(mc146818_get_time(&curr_time) < 0)) { ++ pr_err_ratelimited("unable to read current time from RTC\n"); ++ return IRQ_HANDLED; ++ } ++ } + + if (hpet_rtc_flags & RTC_UIE && + curr_time.tm_sec != hpet_prev_update_sec) { +diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c +index 94665037f4a3..72b7a92337b1 100644 +--- a/drivers/base/power/trace.c ++++ b/drivers/base/power/trace.c +@@ -120,7 +120,11 @@ static unsigned int read_magic_time(void) + struct rtc_time time; + unsigned int val; + +- mc146818_get_time(&time); ++ if (mc146818_get_time(&time) < 0) { ++ pr_err("Unable to read current time from RTC\n"); ++ return 0; ++ } ++ + pr_info("RTC time: %ptRt, date: %ptRd\n", &time, &time); + val = time.tm_year; /* 100 years */ + if (val > 100) +diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c +index ed4f512eabf0..f8358bb2ae31 100644 +--- a/drivers/rtc/rtc-cmos.c ++++ b/drivers/rtc/rtc-cmos.c +@@ -222,6 +222,8 @@ static inline void cmos_write_bank2(unsigned char val, unsigned char addr) + + static int cmos_read_time(struct device *dev, struct rtc_time *t) + { ++ int ret; ++ + /* + * If pm_trace abused the RTC for storage, set the timespec to 0, + * which tells the caller that this RTC value is unusable. +@@ -229,7 +231,12 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t) + if (!pm_trace_rtc_valid()) + return -EIO; + +- mc146818_get_time(t); ++ ret = mc146818_get_time(t); ++ if (ret < 0) { ++ dev_err_ratelimited(dev, "unable to read current time\n"); ++ return ret; ++ } ++ + return 0; + } + +diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c +index 6262f0680f13..3ae5c690f22b 100644 +--- a/drivers/rtc/rtc-mc146818-lib.c ++++ b/drivers/rtc/rtc-mc146818-lib.c +@@ -24,7 +24,7 @@ unsigned int mc146818_get_time(struct rtc_time *time) + /* Ensure that the RTC is accessible. Bit 6 must be 0! */ + if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) { + spin_unlock_irqrestore(&rtc_lock, flags); +- memset(time, 0xff, sizeof(*time)); ++ memset(time, 0, sizeof(*time)); + return -EIO; + } + +-- +2.35.1 + diff --git a/queue-5.10/rtc-cmos-avoid-uip-when-reading-alarm-time.patch b/queue-5.10/rtc-cmos-avoid-uip-when-reading-alarm-time.patch new file mode 100644 index 00000000000..7d1705a9621 --- /dev/null +++ b/queue-5.10/rtc-cmos-avoid-uip-when-reading-alarm-time.patch @@ -0,0 +1,146 @@ +From fcac9c587c7c5343a87cdce5d46cd47ca1057c21 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 10 Dec 2021 21:01:30 +0100 +Subject: rtc: cmos: avoid UIP when reading alarm time +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mateusz Jończyk + +[ Upstream commit cdedc45c579faf8cc6608d3ef81576ee0d512aa4 ] + +Some Intel chipsets disconnect the time and date RTC registers when the +clock update is in progress: during this time reads may return bogus +values and writes fail silently. This includes the RTC alarm registers. +[1] + +cmos_read_alarm() did not take account for that, which caused alarm time +reads to sometimes return bogus values. This can be shown with a test +patch that I am attaching to this patch series. + +Fix this, by using mc146818_avoid_UIP(). + +[1] 7th Generation Intel ® Processor Family I/O for U/Y Platforms [...] +Datasheet, Volume 1 of 2 (Intel's Document Number: 334658-006) +Page 208 +https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/7th-and-8th-gen-core-family-mobile-u-y-processor-lines-i-o-datasheet-vol-1.pdf + "If a RAM read from the ten time and date bytes is attempted + during an update cycle, the value read do not necessarily + represent the true contents of those locations. Any RAM writes + under the same conditions are ignored." + +Signed-off-by: Mateusz Jończyk +Cc: Alessandro Zummo +Cc: Alexandre Belloni +Signed-off-by: Alexandre Belloni +Link: https://lore.kernel.org/r/20211210200131.153887-9-mat.jonczyk@o2.pl +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-cmos.c | 72 ++++++++++++++++++++++++++++-------------- + 1 file changed, 49 insertions(+), 23 deletions(-) + +diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c +index 601e3967e1f0..d419eb988b22 100644 +--- a/drivers/rtc/rtc-cmos.c ++++ b/drivers/rtc/rtc-cmos.c +@@ -249,10 +249,46 @@ static int cmos_set_time(struct device *dev, struct rtc_time *t) + return mc146818_set_time(t); + } + ++struct cmos_read_alarm_callback_param { ++ struct cmos_rtc *cmos; ++ struct rtc_time *time; ++ unsigned char rtc_control; ++}; ++ ++static void cmos_read_alarm_callback(unsigned char __always_unused seconds, ++ void *param_in) ++{ ++ struct cmos_read_alarm_callback_param *p = ++ (struct cmos_read_alarm_callback_param *)param_in; ++ struct rtc_time *time = p->time; ++ ++ time->tm_sec = CMOS_READ(RTC_SECONDS_ALARM); ++ time->tm_min = CMOS_READ(RTC_MINUTES_ALARM); ++ time->tm_hour = CMOS_READ(RTC_HOURS_ALARM); ++ ++ if (p->cmos->day_alrm) { ++ /* ignore upper bits on readback per ACPI spec */ ++ time->tm_mday = CMOS_READ(p->cmos->day_alrm) & 0x3f; ++ if (!time->tm_mday) ++ time->tm_mday = -1; ++ ++ if (p->cmos->mon_alrm) { ++ time->tm_mon = CMOS_READ(p->cmos->mon_alrm); ++ if (!time->tm_mon) ++ time->tm_mon = -1; ++ } ++ } ++ ++ p->rtc_control = CMOS_READ(RTC_CONTROL); ++} ++ + static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) + { + struct cmos_rtc *cmos = dev_get_drvdata(dev); +- unsigned char rtc_control; ++ struct cmos_read_alarm_callback_param p = { ++ .cmos = cmos, ++ .time = &t->time, ++ }; + + /* This not only a rtc_op, but also called directly */ + if (!is_valid_irq(cmos->irq)) +@@ -263,28 +299,18 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) + * the future. + */ + +- spin_lock_irq(&rtc_lock); +- t->time.tm_sec = CMOS_READ(RTC_SECONDS_ALARM); +- t->time.tm_min = CMOS_READ(RTC_MINUTES_ALARM); +- t->time.tm_hour = CMOS_READ(RTC_HOURS_ALARM); +- +- if (cmos->day_alrm) { +- /* ignore upper bits on readback per ACPI spec */ +- t->time.tm_mday = CMOS_READ(cmos->day_alrm) & 0x3f; +- if (!t->time.tm_mday) +- t->time.tm_mday = -1; +- +- if (cmos->mon_alrm) { +- t->time.tm_mon = CMOS_READ(cmos->mon_alrm); +- if (!t->time.tm_mon) +- t->time.tm_mon = -1; +- } +- } +- +- rtc_control = CMOS_READ(RTC_CONTROL); +- spin_unlock_irq(&rtc_lock); ++ /* Some Intel chipsets disconnect the alarm registers when the clock ++ * update is in progress - during this time reads return bogus values ++ * and writes may fail silently. See for example "7th Generation Intel® ++ * Processor Family I/O for U/Y Platforms [...] Datasheet", section ++ * 27.7.1 ++ * ++ * Use the mc146818_avoid_UIP() function to avoid this. ++ */ ++ if (!mc146818_avoid_UIP(cmos_read_alarm_callback, &p)) ++ return -EIO; + +- if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { ++ if (!(p.rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + if (((unsigned)t->time.tm_sec) < 0x60) + t->time.tm_sec = bcd2bin(t->time.tm_sec); + else +@@ -313,7 +339,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) + } + } + +- t->enabled = !!(rtc_control & RTC_AIE); ++ t->enabled = !!(p.rtc_control & RTC_AIE); + t->pending = 0; + + return 0; +-- +2.35.1 + diff --git a/queue-5.10/rtc-cmos-avoid-uip-when-writing-alarm-time.patch b/queue-5.10/rtc-cmos-avoid-uip-when-writing-alarm-time.patch new file mode 100644 index 00000000000..d47b3349264 --- /dev/null +++ b/queue-5.10/rtc-cmos-avoid-uip-when-writing-alarm-time.patch @@ -0,0 +1,178 @@ +From 84f76456ea301647d1e114c5f17b16c62b8d588f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 10 Dec 2021 21:01:31 +0100 +Subject: rtc: cmos: avoid UIP when writing alarm time +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mateusz Jończyk + +[ Upstream commit cd17420ebea580c22dd3a93f7237de3d2cfafc37 ] + +Some Intel chipsets disconnect the time and date RTC registers when the +clock update is in progress: during this time reads may return bogus +values and writes fail silently. This includes the RTC alarm registers. +[1] + +cmos_set_alarm() did not take account for that, fix it. + +[1] 7th Generation Intel ® Processor Family I/O for U/Y Platforms [...] +Datasheet, Volume 1 of 2 (Intel's Document Number: 334658-006) +Page 208 +https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/7th-and-8th-gen-core-family-mobile-u-y-processor-lines-i-o-datasheet-vol-1.pdf + "If a RAM read from the ten time and date bytes is attempted + during an update cycle, the value read do not necessarily + represent the true contents of those locations. Any RAM writes + under the same conditions are ignored." + +Signed-off-by: Mateusz Jończyk +Cc: Alessandro Zummo +Cc: Alexandre Belloni +Signed-off-by: Alexandre Belloni +Link: https://lore.kernel.org/r/20211210200131.153887-10-mat.jonczyk@o2.pl +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-cmos.c | 107 +++++++++++++++++++++++++---------------- + 1 file changed, 66 insertions(+), 41 deletions(-) + +diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c +index 93ffb9eaf63a..601e3967e1f0 100644 +--- a/drivers/rtc/rtc-cmos.c ++++ b/drivers/rtc/rtc-cmos.c +@@ -444,10 +444,57 @@ static int cmos_validate_alarm(struct device *dev, struct rtc_wkalrm *t) + return 0; + } + ++struct cmos_set_alarm_callback_param { ++ struct cmos_rtc *cmos; ++ unsigned char mon, mday, hrs, min, sec; ++ struct rtc_wkalrm *t; ++}; ++ ++/* Note: this function may be executed by mc146818_avoid_UIP() more then ++ * once ++ */ ++static void cmos_set_alarm_callback(unsigned char __always_unused seconds, ++ void *param_in) ++{ ++ struct cmos_set_alarm_callback_param *p = ++ (struct cmos_set_alarm_callback_param *)param_in; ++ ++ /* next rtc irq must not be from previous alarm setting */ ++ cmos_irq_disable(p->cmos, RTC_AIE); ++ ++ /* update alarm */ ++ CMOS_WRITE(p->hrs, RTC_HOURS_ALARM); ++ CMOS_WRITE(p->min, RTC_MINUTES_ALARM); ++ CMOS_WRITE(p->sec, RTC_SECONDS_ALARM); ++ ++ /* the system may support an "enhanced" alarm */ ++ if (p->cmos->day_alrm) { ++ CMOS_WRITE(p->mday, p->cmos->day_alrm); ++ if (p->cmos->mon_alrm) ++ CMOS_WRITE(p->mon, p->cmos->mon_alrm); ++ } ++ ++ if (use_hpet_alarm()) { ++ /* ++ * FIXME the HPET alarm glue currently ignores day_alrm ++ * and mon_alrm ... ++ */ ++ hpet_set_alarm_time(p->t->time.tm_hour, p->t->time.tm_min, ++ p->t->time.tm_sec); ++ } ++ ++ if (p->t->enabled) ++ cmos_irq_enable(p->cmos, RTC_AIE); ++} ++ + static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) + { + struct cmos_rtc *cmos = dev_get_drvdata(dev); +- unsigned char mon, mday, hrs, min, sec, rtc_control; ++ struct cmos_set_alarm_callback_param p = { ++ .cmos = cmos, ++ .t = t ++ }; ++ unsigned char rtc_control; + int ret; + + /* This not only a rtc_op, but also called directly */ +@@ -458,11 +505,11 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) + if (ret < 0) + return ret; + +- mon = t->time.tm_mon + 1; +- mday = t->time.tm_mday; +- hrs = t->time.tm_hour; +- min = t->time.tm_min; +- sec = t->time.tm_sec; ++ p.mon = t->time.tm_mon + 1; ++ p.mday = t->time.tm_mday; ++ p.hrs = t->time.tm_hour; ++ p.min = t->time.tm_min; ++ p.sec = t->time.tm_sec; + + spin_lock_irq(&rtc_lock); + rtc_control = CMOS_READ(RTC_CONTROL); +@@ -470,43 +517,21 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) + + if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { + /* Writing 0xff means "don't care" or "match all". */ +- mon = (mon <= 12) ? bin2bcd(mon) : 0xff; +- mday = (mday >= 1 && mday <= 31) ? bin2bcd(mday) : 0xff; +- hrs = (hrs < 24) ? bin2bcd(hrs) : 0xff; +- min = (min < 60) ? bin2bcd(min) : 0xff; +- sec = (sec < 60) ? bin2bcd(sec) : 0xff; ++ p.mon = (p.mon <= 12) ? bin2bcd(p.mon) : 0xff; ++ p.mday = (p.mday >= 1 && p.mday <= 31) ? bin2bcd(p.mday) : 0xff; ++ p.hrs = (p.hrs < 24) ? bin2bcd(p.hrs) : 0xff; ++ p.min = (p.min < 60) ? bin2bcd(p.min) : 0xff; ++ p.sec = (p.sec < 60) ? bin2bcd(p.sec) : 0xff; + } + +- spin_lock_irq(&rtc_lock); +- +- /* next rtc irq must not be from previous alarm setting */ +- cmos_irq_disable(cmos, RTC_AIE); +- +- /* update alarm */ +- CMOS_WRITE(hrs, RTC_HOURS_ALARM); +- CMOS_WRITE(min, RTC_MINUTES_ALARM); +- CMOS_WRITE(sec, RTC_SECONDS_ALARM); +- +- /* the system may support an "enhanced" alarm */ +- if (cmos->day_alrm) { +- CMOS_WRITE(mday, cmos->day_alrm); +- if (cmos->mon_alrm) +- CMOS_WRITE(mon, cmos->mon_alrm); +- } +- +- if (use_hpet_alarm()) { +- /* +- * FIXME the HPET alarm glue currently ignores day_alrm +- * and mon_alrm ... +- */ +- hpet_set_alarm_time(t->time.tm_hour, t->time.tm_min, +- t->time.tm_sec); +- } +- +- if (t->enabled) +- cmos_irq_enable(cmos, RTC_AIE); +- +- spin_unlock_irq(&rtc_lock); ++ /* ++ * Some Intel chipsets disconnect the alarm registers when the clock ++ * update is in progress - during this time writes fail silently. ++ * ++ * Use mc146818_avoid_UIP() to avoid this. ++ */ ++ if (!mc146818_avoid_UIP(cmos_set_alarm_callback, &p)) ++ return -EIO; + + cmos->alarm_expires = rtc_tm_to_time64(&t->time); + +-- +2.35.1 + diff --git a/queue-5.10/rtc-cmos-remove-stale-revisit-comments.patch b/queue-5.10/rtc-cmos-remove-stale-revisit-comments.patch new file mode 100644 index 00000000000..38784ccf713 --- /dev/null +++ b/queue-5.10/rtc-cmos-remove-stale-revisit-comments.patch @@ -0,0 +1,58 @@ +From 74d206aa5712c743dac13679d2cf585f3d88199d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 16 Jul 2021 23:04:37 +0200 +Subject: rtc: cmos: remove stale REVISIT comments +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mateusz Jończyk + +[ Upstream commit e1aba37569f0aa9c993f740828871e48eea79f98 ] + +It appears mc146818_get_time() and mc146818_set_time() now correctly +use the century register as specified in the ACPI FADT table. It is not +clear what else could be done here. + +These comments were introduced by + commit 7be2c7c96aff ("[PATCH] RTC framework driver for CMOS RTCs") +in 2007, which originally referenced function get_rtc_time() in +include/asm-generic/rtc.h . + +Signed-off-by: Mateusz Jończyk +Signed-off-by: Alexandre Belloni +Link: https://lore.kernel.org/r/20210716210437.29622-1-mat.jonczyk@o2.pl +Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-cmos.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c +index 8e8ce40f6440..ed4f512eabf0 100644 +--- a/drivers/rtc/rtc-cmos.c ++++ b/drivers/rtc/rtc-cmos.c +@@ -229,19 +229,13 @@ static int cmos_read_time(struct device *dev, struct rtc_time *t) + if (!pm_trace_rtc_valid()) + return -EIO; + +- /* REVISIT: if the clock has a "century" register, use +- * that instead of the heuristic in mc146818_get_time(). +- * That'll make Y3K compatility (year > 2070) easy! +- */ + mc146818_get_time(t); + return 0; + } + + static int cmos_set_time(struct device *dev, struct rtc_time *t) + { +- /* REVISIT: set the "century" register if available +- * +- * NOTE: this ignores the issue whereby updating the seconds ++ /* NOTE: this ignores the issue whereby updating the seconds + * takes effect exactly 500ms after we write the register. + * (Also queueing and other delays before we get this far.) + */ +-- +2.35.1 + diff --git a/queue-5.10/rtc-cmos-replace-spin_lock_irqsave-with-spin_lock-in.patch b/queue-5.10/rtc-cmos-replace-spin_lock_irqsave-with-spin_lock-in.patch new file mode 100644 index 00000000000..7a65017e9df --- /dev/null +++ b/queue-5.10/rtc-cmos-replace-spin_lock_irqsave-with-spin_lock-in.patch @@ -0,0 +1,49 @@ +From ad5b5459965ce741f5ad888fa23e74271b21b8c9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 3 Feb 2021 20:39:36 +0800 +Subject: rtc: cmos: Replace spin_lock_irqsave with spin_lock in hard IRQ + +From: Xiaofei Tan + +[ Upstream commit 6950d046eb6eabbc271fda416460c05f7a85698a ] + +It is redundant to do irqsave and irqrestore in hardIRQ context, where +it has been in a irq-disabled context. + +Signed-off-by: Xiaofei Tan +Signed-off-by: Alexandre Belloni +Link: https://lore.kernel.org/r/1612355981-6764-2-git-send-email-tanxiaofei@huawei.com +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-cmos.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c +index d419eb988b22..21f2bdd025b6 100644 +--- a/drivers/rtc/rtc-cmos.c ++++ b/drivers/rtc/rtc-cmos.c +@@ -704,11 +704,10 @@ static struct cmos_rtc cmos_rtc; + + static irqreturn_t cmos_interrupt(int irq, void *p) + { +- unsigned long flags; + u8 irqstat; + u8 rtc_control; + +- spin_lock_irqsave(&rtc_lock, flags); ++ spin_lock(&rtc_lock); + + /* When the HPET interrupt handler calls us, the interrupt + * status is passed as arg1 instead of the irq number. But +@@ -742,7 +741,7 @@ static irqreturn_t cmos_interrupt(int irq, void *p) + hpet_mask_rtc_irq_bit(RTC_AIE); + CMOS_READ(RTC_INTR_FLAGS); + } +- spin_unlock_irqrestore(&rtc_lock, flags); ++ spin_unlock(&rtc_lock); + + if (is_intr(irqstat)) { + rtc_update_irq(p, 1, irqstat); +-- +2.35.1 + diff --git a/queue-5.10/rtc-mc146818-detect-and-handle-broken-rtcs.patch b/queue-5.10/rtc-mc146818-detect-and-handle-broken-rtcs.patch new file mode 100644 index 00000000000..f3cbc3aad4b --- /dev/null +++ b/queue-5.10/rtc-mc146818-detect-and-handle-broken-rtcs.patch @@ -0,0 +1,76 @@ +From 9ef93cd13386ac610b618b84cd2cc715272b215e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 26 Jan 2021 18:02:11 +0100 +Subject: rtc: mc146818: Detect and handle broken RTCs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Thomas Gleixner + +[ Upstream commit 211e5db19d15a721b2953ea54b8f26c2963720eb ] + +The recent fix for handling the UIP bit unearthed another issue in the RTC +code. If the RTC is advertised but the readout is straight 0xFF because +it's not available, the old code just proceeded with crappy values, but the +new code hangs because it waits for the UIP bit to become low. + +Add a sanity check in the RTC CMOS probe function which reads the RTC_VALID +register (Register D) which should have bit 0-6 cleared. If that's not the +case then fail to register the CMOS. + +Add the same check to mc146818_get_time(), warn once when the condition +is true and invalidate the rtc_time data. + +Reported-by: Mickaël Salaün +Signed-off-by: Thomas Gleixner +Tested-by: Mickaël Salaün +Acked-by: Alexandre Belloni +Link: https://lore.kernel.org/r/87tur3fx7w.fsf@nanos.tec.linutronix.de +Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-cmos.c | 8 ++++++++ + drivers/rtc/rtc-mc146818-lib.c | 7 +++++++ + 2 files changed, 15 insertions(+) + +diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c +index 58c6382a2807..cce4b62ffdd0 100644 +--- a/drivers/rtc/rtc-cmos.c ++++ b/drivers/rtc/rtc-cmos.c +@@ -808,6 +808,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) + + spin_lock_irq(&rtc_lock); + ++ /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */ ++ if ((CMOS_READ(RTC_VALID) & 0x7f) != 0) { ++ spin_unlock_irq(&rtc_lock); ++ dev_warn(dev, "not accessible\n"); ++ retval = -ENXIO; ++ goto cleanup1; ++ } ++ + if (!(flags & CMOS_RTC_FLAGS_NOFREQ)) { + /* force periodic irq to CMOS reset default of 1024Hz; + * +diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c +index 8364e4141670..7f01dc41271d 100644 +--- a/drivers/rtc/rtc-mc146818-lib.c ++++ b/drivers/rtc/rtc-mc146818-lib.c +@@ -21,6 +21,13 @@ unsigned int mc146818_get_time(struct rtc_time *time) + + again: + spin_lock_irqsave(&rtc_lock, flags); ++ /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */ ++ if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x7f) != 0)) { ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ memset(time, 0xff, sizeof(*time)); ++ return 0; ++ } ++ + /* + * Check whether there is an update in progress during which the + * readout is unspecified. The maximum update time is ~2ms. Poll +-- +2.35.1 + diff --git a/queue-5.10/rtc-mc146818-dont-test-for-bit-0-5-in-register-d.patch b/queue-5.10/rtc-mc146818-dont-test-for-bit-0-5-in-register-d.patch new file mode 100644 index 00000000000..553d0f8e249 --- /dev/null +++ b/queue-5.10/rtc-mc146818-dont-test-for-bit-0-5-in-register-d.patch @@ -0,0 +1,65 @@ +From 4ef1e2e1376fa1358435ab952fe3ad29ae1082a6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 1 Feb 2021 20:24:17 +0100 +Subject: rtc: mc146818: Dont test for bit 0-5 in Register D + +From: Thomas Gleixner + +[ Upstream commit ebb22a05943666155e6da04407cc6e913974c78c ] + +The recent change to validate the RTC turned out to be overly tight. + +While it cures the problem on the reporters machine it breaks machines +with Intel chipsets which use bit 0-5 of the D register. So check only +for bit 6 being 0 which is the case on these Intel machines as well. + +Fixes: 211e5db19d15 ("rtc: mc146818: Detect and handle broken RTCs") +Reported-by: Serge Belyshev +Reported-by: Dirk Gouders +Reported-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Tested-by: Dirk Gouders +Tested-by: Len Brown +Tested-by: Borislav Petkov +Acked-by: Alexandre Belloni +Link: https://lore.kernel.org/r/87zh0nbnha.fsf@nanos.tec.linutronix.de +Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-cmos.c | 4 ++-- + drivers/rtc/rtc-mc146818-lib.c | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c +index cce4b62ffdd0..8e8ce40f6440 100644 +--- a/drivers/rtc/rtc-cmos.c ++++ b/drivers/rtc/rtc-cmos.c +@@ -808,8 +808,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) + + spin_lock_irq(&rtc_lock); + +- /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */ +- if ((CMOS_READ(RTC_VALID) & 0x7f) != 0) { ++ /* Ensure that the RTC is accessible. Bit 6 must be 0! */ ++ if ((CMOS_READ(RTC_VALID) & 0x40) != 0) { + spin_unlock_irq(&rtc_lock); + dev_warn(dev, "not accessible\n"); + retval = -ENXIO; +diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c +index 7f01dc41271d..6ed2cd5d2bba 100644 +--- a/drivers/rtc/rtc-mc146818-lib.c ++++ b/drivers/rtc/rtc-mc146818-lib.c +@@ -21,8 +21,8 @@ unsigned int mc146818_get_time(struct rtc_time *time) + + again: + spin_lock_irqsave(&rtc_lock, flags); +- /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */ +- if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x7f) != 0)) { ++ /* Ensure that the RTC is accessible. Bit 6 must be 0! */ ++ if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) { + spin_unlock_irqrestore(&rtc_lock, flags); + memset(time, 0xff, sizeof(*time)); + return 0; +-- +2.35.1 + diff --git a/queue-5.10/rtc-mc146818-lib-change-return-values-of-mc146818_ge.patch b/queue-5.10/rtc-mc146818-lib-change-return-values-of-mc146818_ge.patch new file mode 100644 index 00000000000..f764649d5cf --- /dev/null +++ b/queue-5.10/rtc-mc146818-lib-change-return-values-of-mc146818_ge.patch @@ -0,0 +1,51 @@ +From dcb86a8ba704e221a6401c14de2f0d35b725235f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 10 Dec 2021 21:01:24 +0100 +Subject: rtc: mc146818-lib: change return values of mc146818_get_time() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mateusz Jończyk + +[ Upstream commit d35786b3a28dee20b12962ae2dd365892a99ed1a ] + +No function is checking mc146818_get_time() return values yet, so +correct them to make them more customary. + +Signed-off-by: Mateusz Jończyk +Cc: Alessandro Zummo +Cc: Alexandre Belloni +Signed-off-by: Alexandre Belloni +Link: https://lore.kernel.org/r/20211210200131.153887-3-mat.jonczyk@o2.pl +Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-mc146818-lib.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c +index 6ed2cd5d2bba..6262f0680f13 100644 +--- a/drivers/rtc/rtc-mc146818-lib.c ++++ b/drivers/rtc/rtc-mc146818-lib.c +@@ -25,7 +25,7 @@ unsigned int mc146818_get_time(struct rtc_time *time) + if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) { + spin_unlock_irqrestore(&rtc_lock, flags); + memset(time, 0xff, sizeof(*time)); +- return 0; ++ return -EIO; + } + + /* +@@ -116,7 +116,7 @@ unsigned int mc146818_get_time(struct rtc_time *time) + + time->tm_mon--; + +- return RTC_24H; ++ return 0; + } + EXPORT_SYMBOL_GPL(mc146818_get_time); + +-- +2.35.1 + diff --git a/queue-5.10/rtc-mc146818-lib-extract-mc146818_avoid_uip.patch b/queue-5.10/rtc-mc146818-lib-extract-mc146818_avoid_uip.patch new file mode 100644 index 00000000000..508c8a6daf3 --- /dev/null +++ b/queue-5.10/rtc-mc146818-lib-extract-mc146818_avoid_uip.patch @@ -0,0 +1,136 @@ +From 0f4373bd6d8f06e9d571ae76ea8f6be4d684344b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 10 Dec 2021 21:01:27 +0100 +Subject: rtc: mc146818-lib: extract mc146818_avoid_UIP +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mateusz Jończyk + +[ Upstream commit ec5895c0f2d87b9bf4185db1915e40fa6fcfc0ac ] + +Function mc146818_get_time() contains an elaborate mechanism of reading +the RTC time while no RTC update is in progress. It turns out that +reading the RTC alarm clock also requires avoiding the RTC update. +Therefore, the mechanism in mc146818_get_time() should be reused - so +extract it into a separate function. + +The logic in mc146818_avoid_UIP() is same as in mc146818_get_time() +except that after every + + if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { + +there is now "mdelay(1)". + +To avoid producing a very unreadable patch, mc146818_get_time() will be +refactored to use mc146818_avoid_UIP() in the next patch. + +Signed-off-by: Mateusz Jończyk +Cc: Alessandro Zummo +Cc: Alexandre Belloni +Signed-off-by: Alexandre Belloni +Link: https://lore.kernel.org/r/20211210200131.153887-6-mat.jonczyk@o2.pl +Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-mc146818-lib.c | 70 ++++++++++++++++++++++++++++++++++ + include/linux/mc146818rtc.h | 3 ++ + 2 files changed, 73 insertions(+) + +diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c +index 94df6056c5c0..46527a5d3912 100644 +--- a/drivers/rtc/rtc-mc146818-lib.c ++++ b/drivers/rtc/rtc-mc146818-lib.c +@@ -8,6 +8,76 @@ + #include + #endif + ++/* ++ * Execute a function while the UIP (Update-in-progress) bit of the RTC is ++ * unset. ++ * ++ * Warning: callback may be executed more then once. ++ */ ++bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), ++ void *param) ++{ ++ int i; ++ unsigned long flags; ++ unsigned char seconds; ++ ++ for (i = 0; i < 10; i++) { ++ spin_lock_irqsave(&rtc_lock, flags); ++ ++ /* ++ * Check whether there is an update in progress during which the ++ * readout is unspecified. The maximum update time is ~2ms. Poll ++ * every msec for completion. ++ * ++ * Store the second value before checking UIP so a long lasting ++ * NMI which happens to hit after the UIP check cannot make ++ * an update cycle invisible. ++ */ ++ seconds = CMOS_READ(RTC_SECONDS); ++ ++ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ mdelay(1); ++ continue; ++ } ++ ++ /* Revalidate the above readout */ ++ if (seconds != CMOS_READ(RTC_SECONDS)) { ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ continue; ++ } ++ ++ if (callback) ++ callback(seconds, param); ++ ++ /* ++ * Check for the UIP bit again. If it is set now then ++ * the above values may contain garbage. ++ */ ++ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ mdelay(1); ++ continue; ++ } ++ ++ /* ++ * A NMI might have interrupted the above sequence so check ++ * whether the seconds value has changed which indicates that ++ * the NMI took longer than the UIP bit was set. Unlikely, but ++ * possible and there is also virt... ++ */ ++ if (seconds != CMOS_READ(RTC_SECONDS)) { ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ continue; ++ } ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ ++ return true; ++ } ++ return false; ++} ++EXPORT_SYMBOL_GPL(mc146818_avoid_UIP); ++ + /* + * If the UIP (Update-in-progress) bit of the RTC is set for more then + * 10ms, the RTC is apparently broken or not present. +diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h +index c246ce191915..fb042e0e7d76 100644 +--- a/include/linux/mc146818rtc.h ++++ b/include/linux/mc146818rtc.h +@@ -129,4 +129,7 @@ bool mc146818_does_rtc_work(void); + unsigned int mc146818_get_time(struct rtc_time *time); + int mc146818_set_time(struct rtc_time *time); + ++bool mc146818_avoid_UIP(void (*callback)(unsigned char seconds, void *param), ++ void *param); ++ + #endif /* _MC146818RTC_H */ +-- +2.35.1 + diff --git a/queue-5.10/rtc-mc146818-lib-fix-rtc-presence-check.patch b/queue-5.10/rtc-mc146818-lib-fix-rtc-presence-check.patch new file mode 100644 index 00000000000..75a88741ca5 --- /dev/null +++ b/queue-5.10/rtc-mc146818-lib-fix-rtc-presence-check.patch @@ -0,0 +1,168 @@ +From 453e0ae9b265b8eb55cb8d8d60e86f4757a919b8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 10 Dec 2021 21:01:26 +0100 +Subject: rtc: mc146818-lib: fix RTC presence check +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mateusz Jończyk + +[ Upstream commit ea6fa4961aab8f90a8aa03575a98b4bda368d4b6 ] + +To prevent an infinite loop in mc146818_get_time(), +commit 211e5db19d15 ("rtc: mc146818: Detect and handle broken RTCs") +added a check for RTC availability. Together with a later fix, it +checked if bit 6 in register 0x0d is cleared. + +This, however, caused a false negative on a motherboard with an AMD +SB710 southbridge; according to the specification [1], bit 6 of register +0x0d of this chipset is a scratchbit. This caused a regression in Linux +5.11 - the RTC was determined broken by the kernel and not used by +rtc-cmos.c [3]. This problem was also reported in Fedora [4]. + +As a better alternative, check whether the UIP ("Update-in-progress") +bit is set for longer then 10ms. If that is the case, then apparently +the RTC is either absent (and all register reads return 0xff) or broken. +Also limit the number of loop iterations in mc146818_get_time() to 10 to +prevent an infinite loop there. + +The functions mc146818_get_time() and mc146818_does_rtc_work() will be +refactored later in this patch series, in order to fix a separate +problem with reading / setting the RTC alarm time. This is done so to +avoid a confusion about what is being fixed when. + +In a previous approach to this problem, I implemented a check whether +the RTC_HOURS register contains a value <= 24. This, however, sometimes +did not work correctly on my Intel Kaby Lake laptop. According to +Intel's documentation [2], "the time and date RAM locations (0-9) are +disconnected from the external bus" during the update cycle so reading +this register without checking the UIP bit is incorrect. + +[1] AMD SB700/710/750 Register Reference Guide, page 308, +https://developer.amd.com/wordpress/media/2012/10/43009_sb7xx_rrg_pub_1.00.pdf + +[2] 7th Generation Intel ® Processor Family I/O for U/Y Platforms [...] Datasheet +Volume 1 of 2, page 209 +Intel's Document Number: 334658-006, +https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/7th-and-8th-gen-core-family-mobile-u-y-processor-lines-i-o-datasheet-vol-1.pdf + +[3] Functions in arch/x86/kernel/rtc.c apparently were using it. + +[4] https://bugzilla.redhat.com/show_bug.cgi?id=1936688 + +Fixes: 211e5db19d15 ("rtc: mc146818: Detect and handle broken RTCs") +Fixes: ebb22a059436 ("rtc: mc146818: Dont test for bit 0-5 in Register D") +Signed-off-by: Mateusz Jończyk +Cc: Thomas Gleixner +Cc: Alessandro Zummo +Cc: Alexandre Belloni +Signed-off-by: Alexandre Belloni +Link: https://lore.kernel.org/r/20211210200131.153887-5-mat.jonczyk@o2.pl +Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-cmos.c | 10 ++++------ + drivers/rtc/rtc-mc146818-lib.c | 34 ++++++++++++++++++++++++++++++---- + include/linux/mc146818rtc.h | 1 + + 3 files changed, 35 insertions(+), 10 deletions(-) + +diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c +index f8358bb2ae31..93ffb9eaf63a 100644 +--- a/drivers/rtc/rtc-cmos.c ++++ b/drivers/rtc/rtc-cmos.c +@@ -807,16 +807,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) + + rename_region(ports, dev_name(&cmos_rtc.rtc->dev)); + +- spin_lock_irq(&rtc_lock); +- +- /* Ensure that the RTC is accessible. Bit 6 must be 0! */ +- if ((CMOS_READ(RTC_VALID) & 0x40) != 0) { +- spin_unlock_irq(&rtc_lock); +- dev_warn(dev, "not accessible\n"); ++ if (!mc146818_does_rtc_work()) { ++ dev_warn(dev, "broken or not accessible\n"); + retval = -ENXIO; + goto cleanup1; + } + ++ spin_lock_irq(&rtc_lock); ++ + if (!(flags & CMOS_RTC_FLAGS_NOFREQ)) { + /* force periodic irq to CMOS reset default of 1024Hz; + * +diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c +index 3ae5c690f22b..94df6056c5c0 100644 +--- a/drivers/rtc/rtc-mc146818-lib.c ++++ b/drivers/rtc/rtc-mc146818-lib.c +@@ -8,10 +8,36 @@ + #include + #endif + ++/* ++ * If the UIP (Update-in-progress) bit of the RTC is set for more then ++ * 10ms, the RTC is apparently broken or not present. ++ */ ++bool mc146818_does_rtc_work(void) ++{ ++ int i; ++ unsigned char val; ++ unsigned long flags; ++ ++ for (i = 0; i < 10; i++) { ++ spin_lock_irqsave(&rtc_lock, flags); ++ val = CMOS_READ(RTC_FREQ_SELECT); ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ ++ if ((val & RTC_UIP) == 0) ++ return true; ++ ++ mdelay(1); ++ } ++ ++ return false; ++} ++EXPORT_SYMBOL_GPL(mc146818_does_rtc_work); ++ + unsigned int mc146818_get_time(struct rtc_time *time) + { + unsigned char ctrl; + unsigned long flags; ++ unsigned int iter_count = 0; + unsigned char century = 0; + bool retry; + +@@ -20,13 +46,13 @@ unsigned int mc146818_get_time(struct rtc_time *time) + #endif + + again: +- spin_lock_irqsave(&rtc_lock, flags); +- /* Ensure that the RTC is accessible. Bit 6 must be 0! */ +- if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x40) != 0)) { +- spin_unlock_irqrestore(&rtc_lock, flags); ++ if (iter_count > 10) { + memset(time, 0, sizeof(*time)); + return -EIO; + } ++ iter_count++; ++ ++ spin_lock_irqsave(&rtc_lock, flags); + + /* + * Check whether there is an update in progress during which the +diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h +index 1e0205811394..c246ce191915 100644 +--- a/include/linux/mc146818rtc.h ++++ b/include/linux/mc146818rtc.h +@@ -125,6 +125,7 @@ struct cmos_rtc_board_info { + #define RTC_IO_EXTENT_USED RTC_IO_EXTENT + #endif /* ARCH_RTC_LOCATION */ + ++bool mc146818_does_rtc_work(void); + unsigned int mc146818_get_time(struct rtc_time *time); + int mc146818_set_time(struct rtc_time *time); + +-- +2.35.1 + diff --git a/queue-5.10/rtc-mc146818-prevent-reading-garbage.patch b/queue-5.10/rtc-mc146818-prevent-reading-garbage.patch new file mode 100644 index 00000000000..9742058fffd --- /dev/null +++ b/queue-5.10/rtc-mc146818-prevent-reading-garbage.patch @@ -0,0 +1,165 @@ +From fff644f7c81182036f455c49f0d146c802e4ee08 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 6 Dec 2020 22:46:14 +0100 +Subject: rtc: mc146818: Prevent reading garbage + +From: Thomas Gleixner + +[ Upstream commit 05a0302c35481e9b47fb90ba40922b0a4cae40d8 ] + +The MC146818 driver is prone to read garbage from the RTC. There are +several issues all related to the update cycle of the MC146818. The chip +increments seconds obviously once per second and indicates that by a bit in +a register. The bit goes high 244us before the actual update starts. During +the update the readout of the time values is undefined. + +The code just checks whether the update in progress bit (UIP) is set before +reading the clock. If it's set it waits arbitrary 20ms before retrying, +which is ample because the maximum update time is ~2ms. + +But this check does not guarantee that the UIP bit goes high and the actual +update happens during the readout. So the following can happen + + 0.997 UIP = False + -> Interrupt/NMI/preemption + 0.998 UIP -> True + 0.999 Readout <- Undefined + +To prevent this rework the code so it checks UIP before and after the +readout and if set after the readout try again. + +But that's not enough to cover the following: + + 0.997 UIP = False + Readout seconds + -> NMI (or vCPU scheduled out) + 0.998 UIP -> True + update completes + UIP -> False + 1.000 Readout minutes,.... + UIP check succeeds + +That can make the readout wrong up to 59 seconds. + +To prevent this, read the seconds value before the first UIP check, +validate it after checking UIP and after reading out the rest. + +It's amazing that the original i386 code had this actually correct and +the generic implementation of the MC146818 driver got it wrong in 2002 and +it stayed that way until today. + +Signed-off-by: Thomas Gleixner +Acked-by: Alexandre Belloni +Link: https://lore.kernel.org/r/20201206220541.594826678@linutronix.de +Stable-dep-of: cd17420ebea5 ("rtc: cmos: avoid UIP when writing alarm time") +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-mc146818-lib.c | 64 +++++++++++++++++++++------------- + 1 file changed, 39 insertions(+), 25 deletions(-) + +diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c +index b036ff33fbe6..8364e4141670 100644 +--- a/drivers/rtc/rtc-mc146818-lib.c ++++ b/drivers/rtc/rtc-mc146818-lib.c +@@ -8,41 +8,41 @@ + #include + #endif + +-/* +- * Returns true if a clock update is in progress +- */ +-static inline unsigned char mc146818_is_updating(void) +-{ +- unsigned char uip; +- unsigned long flags; +- +- spin_lock_irqsave(&rtc_lock, flags); +- uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP); +- spin_unlock_irqrestore(&rtc_lock, flags); +- return uip; +-} +- + unsigned int mc146818_get_time(struct rtc_time *time) + { + unsigned char ctrl; + unsigned long flags; + unsigned char century = 0; ++ bool retry; + + #ifdef CONFIG_MACH_DECSTATION + unsigned int real_year; + #endif + ++again: ++ spin_lock_irqsave(&rtc_lock, flags); + /* +- * read RTC once any update in progress is done. The update +- * can take just over 2ms. We wait 20ms. There is no need to +- * to poll-wait (up to 1s - eeccch) for the falling edge of RTC_UIP. +- * If you need to know *exactly* when a second has started, enable +- * periodic update complete interrupts, (via ioctl) and then +- * immediately read /dev/rtc which will block until you get the IRQ. +- * Once the read clears, read the RTC time (again via ioctl). Easy. ++ * Check whether there is an update in progress during which the ++ * readout is unspecified. The maximum update time is ~2ms. Poll ++ * every msec for completion. ++ * ++ * Store the second value before checking UIP so a long lasting NMI ++ * which happens to hit after the UIP check cannot make an update ++ * cycle invisible. + */ +- if (mc146818_is_updating()) +- mdelay(20); ++ time->tm_sec = CMOS_READ(RTC_SECONDS); ++ ++ if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP) { ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ mdelay(1); ++ goto again; ++ } ++ ++ /* Revalidate the above readout */ ++ if (time->tm_sec != CMOS_READ(RTC_SECONDS)) { ++ spin_unlock_irqrestore(&rtc_lock, flags); ++ goto again; ++ } + + /* + * Only the values that we read from the RTC are set. We leave +@@ -50,8 +50,6 @@ unsigned int mc146818_get_time(struct rtc_time *time) + * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated + * by the RTC when initially set to a non-zero value. + */ +- spin_lock_irqsave(&rtc_lock, flags); +- time->tm_sec = CMOS_READ(RTC_SECONDS); + time->tm_min = CMOS_READ(RTC_MINUTES); + time->tm_hour = CMOS_READ(RTC_HOURS); + time->tm_mday = CMOS_READ(RTC_DAY_OF_MONTH); +@@ -66,8 +64,24 @@ unsigned int mc146818_get_time(struct rtc_time *time) + century = CMOS_READ(acpi_gbl_FADT.century); + #endif + ctrl = CMOS_READ(RTC_CONTROL); ++ /* ++ * Check for the UIP bit again. If it is set now then ++ * the above values may contain garbage. ++ */ ++ retry = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP; ++ /* ++ * A NMI might have interrupted the above sequence so check whether ++ * the seconds value has changed which indicates that the NMI took ++ * longer than the UIP bit was set. Unlikely, but possible and ++ * there is also virt... ++ */ ++ retry |= time->tm_sec != CMOS_READ(RTC_SECONDS); ++ + spin_unlock_irqrestore(&rtc_lock, flags); + ++ if (retry) ++ goto again; ++ + if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD) + { + time->tm_sec = bcd2bin(time->tm_sec); +-- +2.35.1 + diff --git a/queue-5.10/rtc-mc146818-reduce-spinlock-section-in-mc146818_set.patch b/queue-5.10/rtc-mc146818-reduce-spinlock-section-in-mc146818_set.patch new file mode 100644 index 00000000000..239656f9598 --- /dev/null +++ b/queue-5.10/rtc-mc146818-reduce-spinlock-section-in-mc146818_set.patch @@ -0,0 +1,54 @@ +From 454cde155ee62939e82a056e9fbb2bb5ab8190e8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 6 Dec 2020 22:46:15 +0100 +Subject: rtc: mc146818: Reduce spinlock section in mc146818_set_time() + +From: Thomas Gleixner + +[ Upstream commit dcf257e92622ba0e25fdc4b6699683e7ae67e2a1 ] + +No need to hold the lock and disable interrupts for doing math. + +Signed-off-by: Thomas Gleixner +Acked-by: Alexandre Belloni +Link: https://lore.kernel.org/r/20201206220541.709243630@linutronix.de +Signed-off-by: Sasha Levin +--- + drivers/rtc/rtc-mc146818-lib.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c +index 46527a5d3912..1ca866461d10 100644 +--- a/drivers/rtc/rtc-mc146818-lib.c ++++ b/drivers/rtc/rtc-mc146818-lib.c +@@ -249,7 +249,6 @@ int mc146818_set_time(struct rtc_time *time) + if (yrs > 255) /* They are unsigned */ + return -EINVAL; + +- spin_lock_irqsave(&rtc_lock, flags); + #ifdef CONFIG_MACH_DECSTATION + real_yrs = yrs; + leap_yr = ((!((yrs + 1900) % 4) && ((yrs + 1900) % 100)) || +@@ -278,10 +277,8 @@ int mc146818_set_time(struct rtc_time *time) + /* These limits and adjustments are independent of + * whether the chip is in binary mode or not. + */ +- if (yrs > 169) { +- spin_unlock_irqrestore(&rtc_lock, flags); ++ if (yrs > 169) + return -EINVAL; +- } + + if (yrs >= 100) + yrs -= 100; +@@ -297,6 +294,7 @@ int mc146818_set_time(struct rtc_time *time) + century = bin2bcd(century); + } + ++ spin_lock_irqsave(&rtc_lock, flags); + save_control = CMOS_READ(RTC_CONTROL); + CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL); + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); +-- +2.35.1 + diff --git a/queue-5.10/series b/queue-5.10/series new file mode 100644 index 00000000000..ac2bfc8c755 --- /dev/null +++ b/queue-5.10/series @@ -0,0 +1,45 @@ +mm-mlock-remove-lru_lock-on-testclearpagemlocked.patch +mm-mlock-remove-__munlock_isolate_lru_page.patch +mm-lru-introduce-testclearpagelru.patch +mm-compaction-do-page-isolation-first-in-compaction.patch +mm-vmscan-__isolate_lru_page_prepare-cleanup.patch +mm-__isolate_lru_page_prepare-in-isolate_migratepage.patch +mm-migrate-fix-thp-s-mapcount-on-isolation.patch +arm64-dts-rockchip-keep-i2s1-disabled-for-gpio-funct.patch +arm-dts-rockchip-fix-node-name-for-hym8563-rtc.patch +arm-dts-rockchip-fix-ir-receiver-node-names.patch +arm64-dts-rockchip-fix-ir-receiver-node-names.patch +arm-dts-rockchip-rk3188-fix-lcdc1-rgb24-node-name.patch +arm-9251-1-perf-fix-stacktraces-for-tracepoint-event.patch +arm-9266-1-mm-fix-no-mmu-zero_page-implementation.patch +asoc-wm8962-wait-for-updated-value-of-wm8962_clockin.patch +arm-dts-rockchip-disable-arm_global_timer-on-rk3066-.patch +9p-fd-use-p9_hdrsz-for-header-size.patch +regulator-slg51000-wait-after-asserting-cs-pin.patch +alsa-seq-fix-function-prototype-mismatch-in-snd_seq_.patch +btrfs-send-avoid-unaligned-encoded-writes-when-attem.patch +asoc-soc-pcm-add-null-check-in-be-reparenting.patch +regulator-twl6030-fix-get-status-of-twl6032-regulato.patch +fbcon-use-kzalloc-in-fbcon_prepare_logo.patch +usb-dwc3-gadget-disable-gusb2phycfg.susphy-for-end-t.patch +9p-xen-check-logical-size-for-buffer-size.patch +net-usb-qmi_wwan-add-u-blox-0x1342-composition.patch +mm-khugepaged-take-the-right-locks-for-page-table-re.patch +mm-khugepaged-fix-gup-fast-interaction-by-sending-ip.patch +mm-khugepaged-invoke-mmu-notifiers-in-shmem-file-col.patch +rtc-mc146818-prevent-reading-garbage.patch +rtc-mc146818-detect-and-handle-broken-rtcs.patch +rtc-mc146818-dont-test-for-bit-0-5-in-register-d.patch +rtc-cmos-remove-stale-revisit-comments.patch +rtc-mc146818-lib-change-return-values-of-mc146818_ge.patch +rtc-check-return-value-from-mc146818_get_time.patch +rtc-mc146818-lib-fix-rtc-presence-check.patch +rtc-mc146818-lib-extract-mc146818_avoid_uip.patch +rtc-cmos-avoid-uip-when-writing-alarm-time.patch +rtc-cmos-avoid-uip-when-reading-alarm-time.patch +rtc-cmos-replace-spin_lock_irqsave-with-spin_lock-in.patch +rtc-mc146818-reduce-spinlock-section-in-mc146818_set.patch +xen-netback-ensure-protocol-headers-don-t-fall-in-th.patch +xen-netback-do-some-code-cleanup.patch +xen-netback-don-t-call-kfree_skb-with-interrupts-dis.patch +media-videobuf2-core-take-mmap_lock-in-vb2_get_unmap.patch diff --git a/queue-5.10/usb-dwc3-gadget-disable-gusb2phycfg.susphy-for-end-t.patch b/queue-5.10/usb-dwc3-gadget-disable-gusb2phycfg.susphy-for-end-t.patch new file mode 100644 index 00000000000..e5ae8272601 --- /dev/null +++ b/queue-5.10/usb-dwc3-gadget-disable-gusb2phycfg.susphy-for-end-t.patch @@ -0,0 +1,47 @@ +From 7b5d9450baa4cd437176ba3ecc980430f344197c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Nov 2022 17:58:50 -0800 +Subject: usb: dwc3: gadget: Disable GUSB2PHYCFG.SUSPHY for End Transfer + +From: Thinh Nguyen + +[ Upstream commit 3aa07f72894d209fcf922ad686cbb28cf005aaad ] + +If there's a disconnection while operating in eSS, there may be a delay +in VBUS drop response from the connector. In that case, the internal +link state may drop to operate in usb2 speed while the controller thinks +the VBUS is still high. The driver must make sure to disable +GUSB2PHYCFG.SUSPHY when sending endpoint command while in usb2 speed. +The End Transfer command may be called, and only that command needs to +go through at this point. Let's keep it simple and unconditionally +disable GUSB2PHYCFG.SUSPHY whenever we issue the command. + +This scenario is not seen in real hardware. In a rare case, our +prototype type-c controller/interface may have a slow response +triggerring this issue. + +Signed-off-by: Thinh Nguyen +Link: https://lore.kernel.org/r/5651117207803c26e2f22ddf4e5ce9e865dcf7c7.1668045468.git.Thinh.Nguyen@synopsys.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/usb/dwc3/gadget.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c +index a9a43d649478..28a1194f849f 100644 +--- a/drivers/usb/dwc3/gadget.c ++++ b/drivers/usb/dwc3/gadget.c +@@ -291,7 +291,8 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, + * + * DWC_usb3 3.30a and DWC_usb31 1.90a programming guide section 3.2.2 + */ +- if (dwc->gadget->speed <= USB_SPEED_HIGH) { ++ if (dwc->gadget->speed <= USB_SPEED_HIGH || ++ DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_ENDTRANSFER) { + reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) { + saved_config |= DWC3_GUSB2PHYCFG_SUSPHY; +-- +2.35.1 + diff --git a/queue-5.10/xen-netback-do-some-code-cleanup.patch b/queue-5.10/xen-netback-do-some-code-cleanup.patch new file mode 100644 index 00000000000..98f77fd8141 --- /dev/null +++ b/queue-5.10/xen-netback-do-some-code-cleanup.patch @@ -0,0 +1,147 @@ +From 603ae7055010466a085118b324f735574c43aa56 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 8 Jun 2022 06:37:26 +0200 +Subject: xen/netback: do some code cleanup + +From: Juergen Gross + +[ Upstream commit 5834e72eda0b7e5767eb107259d98eef19ebd11f ] + +Remove some unused macros and functions, make local functions static. + +Signed-off-by: Juergen Gross +Acked-by: Wei Liu +Link: https://lore.kernel.org/r/20220608043726.9380-1-jgross@suse.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: 74e7e1efdad4 ("xen/netback: don't call kfree_skb() with interrupts disabled") +Signed-off-by: Sasha Levin +--- + drivers/net/xen-netback/common.h | 12 ------------ + drivers/net/xen-netback/interface.c | 16 +--------------- + drivers/net/xen-netback/netback.c | 4 +++- + drivers/net/xen-netback/rx.c | 2 +- + 4 files changed, 5 insertions(+), 29 deletions(-) + +diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h +index 6a9178896c90..945647128c0e 100644 +--- a/drivers/net/xen-netback/common.h ++++ b/drivers/net/xen-netback/common.h +@@ -48,7 +48,6 @@ + #include + + typedef unsigned int pending_ring_idx_t; +-#define INVALID_PENDING_RING_IDX (~0U) + + struct pending_tx_info { + struct xen_netif_tx_request req; /* tx request */ +@@ -82,8 +81,6 @@ struct xenvif_rx_meta { + /* Discriminate from any valid pending_idx value. */ + #define INVALID_PENDING_IDX 0xFFFF + +-#define MAX_BUFFER_OFFSET XEN_PAGE_SIZE +- + #define MAX_PENDING_REQS XEN_NETIF_TX_RING_SIZE + + /* The maximum number of frags is derived from the size of a grant (same +@@ -367,11 +364,6 @@ void xenvif_free(struct xenvif *vif); + int xenvif_xenbus_init(void); + void xenvif_xenbus_fini(void); + +-int xenvif_schedulable(struct xenvif *vif); +- +-int xenvif_queue_stopped(struct xenvif_queue *queue); +-void xenvif_wake_queue(struct xenvif_queue *queue); +- + /* (Un)Map communication rings. */ + void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue); + int xenvif_map_frontend_data_rings(struct xenvif_queue *queue, +@@ -394,7 +386,6 @@ int xenvif_dealloc_kthread(void *data); + irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); + + bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); +-void xenvif_rx_action(struct xenvif_queue *queue); + void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); + + void xenvif_carrier_on(struct xenvif *vif); +@@ -402,9 +393,6 @@ void xenvif_carrier_on(struct xenvif *vif); + /* Callback from stack when TX packet can be released */ + void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success); + +-/* Unmap a pending page and release it back to the guest */ +-void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); +- + static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue) + { + return MAX_PENDING_REQS - +diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c +index 7ce9807fc24c..645a804ab788 100644 +--- a/drivers/net/xen-netback/interface.c ++++ b/drivers/net/xen-netback/interface.c +@@ -70,7 +70,7 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue) + wake_up(&queue->dealloc_wq); + } + +-int xenvif_schedulable(struct xenvif *vif) ++static int xenvif_schedulable(struct xenvif *vif) + { + return netif_running(vif->dev) && + test_bit(VIF_STATUS_CONNECTED, &vif->status) && +@@ -178,20 +178,6 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id) + return IRQ_HANDLED; + } + +-int xenvif_queue_stopped(struct xenvif_queue *queue) +-{ +- struct net_device *dev = queue->vif->dev; +- unsigned int id = queue->id; +- return netif_tx_queue_stopped(netdev_get_tx_queue(dev, id)); +-} +- +-void xenvif_wake_queue(struct xenvif_queue *queue) +-{ +- struct net_device *dev = queue->vif->dev; +- unsigned int id = queue->id; +- netif_tx_wake_queue(netdev_get_tx_queue(dev, id)); +-} +- + static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev) + { +diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c +index 06fd61b71d37..fed0f7458e18 100644 +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -112,6 +112,8 @@ static void make_tx_response(struct xenvif_queue *queue, + s8 st); + static void push_tx_responses(struct xenvif_queue *queue); + ++static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); ++ + static inline int tx_work_todo(struct xenvif_queue *queue); + + static inline unsigned long idx_to_pfn(struct xenvif_queue *queue, +@@ -1440,7 +1442,7 @@ static void push_tx_responses(struct xenvif_queue *queue) + notify_remote_via_irq(queue->tx_irq); + } + +-void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) ++static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) + { + int ret; + struct gnttab_unmap_grant_ref tx_unmap_op; +diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c +index a0335407be42..932762177110 100644 +--- a/drivers/net/xen-netback/rx.c ++++ b/drivers/net/xen-netback/rx.c +@@ -486,7 +486,7 @@ static void xenvif_rx_skb(struct xenvif_queue *queue) + + #define RX_BATCH_SIZE 64 + +-void xenvif_rx_action(struct xenvif_queue *queue) ++static void xenvif_rx_action(struct xenvif_queue *queue) + { + struct sk_buff_head completed_skbs; + unsigned int work_done = 0; +-- +2.35.1 + diff --git a/queue-5.10/xen-netback-don-t-call-kfree_skb-with-interrupts-dis.patch b/queue-5.10/xen-netback-don-t-call-kfree_skb-with-interrupts-dis.patch new file mode 100644 index 00000000000..04102772266 --- /dev/null +++ b/queue-5.10/xen-netback-don-t-call-kfree_skb-with-interrupts-dis.patch @@ -0,0 +1,105 @@ +From 6156d032440fa7a7dcdd503d76769bc4f8389d4a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Dec 2022 08:54:24 +0100 +Subject: xen/netback: don't call kfree_skb() with interrupts disabled + +From: Juergen Gross + +[ Upstream commit 74e7e1efdad45580cc3839f2a155174cf158f9b5 ] + +It is not allowed to call kfree_skb() from hardware interrupt +context or with interrupts being disabled. So remove kfree_skb() +from the spin_lock_irqsave() section and use the already existing +"drop" label in xenvif_start_xmit() for dropping the SKB. At the +same time replace the dev_kfree_skb() call there with a call of +dev_kfree_skb_any(), as xenvif_start_xmit() can be called with +disabled interrupts. + +This is XSA-424 / CVE-2022-42328 / CVE-2022-42329. + +Fixes: be81992f9086 ("xen/netback: don't queue unlimited number of packages") +Reported-by: Yang Yingliang +Signed-off-by: Juergen Gross +Reviewed-by: Jan Beulich +Signed-off-by: Juergen Gross +Signed-off-by: Sasha Levin +--- + drivers/net/xen-netback/common.h | 2 +- + drivers/net/xen-netback/interface.c | 6 ++++-- + drivers/net/xen-netback/rx.c | 8 +++++--- + 3 files changed, 10 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h +index 945647128c0e..1ba974969216 100644 +--- a/drivers/net/xen-netback/common.h ++++ b/drivers/net/xen-netback/common.h +@@ -386,7 +386,7 @@ int xenvif_dealloc_kthread(void *data); + irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); + + bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); +-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); ++bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); + + void xenvif_carrier_on(struct xenvif *vif); + +diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c +index 645a804ab788..97cf5bc48902 100644 +--- a/drivers/net/xen-netback/interface.c ++++ b/drivers/net/xen-netback/interface.c +@@ -255,14 +255,16 @@ xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) + if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) + skb_clear_hash(skb); + +- xenvif_rx_queue_tail(queue, skb); ++ if (!xenvif_rx_queue_tail(queue, skb)) ++ goto drop; ++ + xenvif_kick_thread(queue); + + return NETDEV_TX_OK; + + drop: + vif->dev->stats.tx_dropped++; +- dev_kfree_skb(skb); ++ dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + +diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c +index 932762177110..0ba754ebc5ba 100644 +--- a/drivers/net/xen-netback/rx.c ++++ b/drivers/net/xen-netback/rx.c +@@ -82,9 +82,10 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) + return false; + } + +-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) ++bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) + { + unsigned long flags; ++ bool ret = true; + + spin_lock_irqsave(&queue->rx_queue.lock, flags); + +@@ -92,8 +93,7 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) + struct net_device *dev = queue->vif->dev; + + netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); +- kfree_skb(skb); +- queue->vif->dev->stats.rx_dropped++; ++ ret = false; + } else { + if (skb_queue_empty(&queue->rx_queue)) + xenvif_update_needed_slots(queue, skb); +@@ -104,6 +104,8 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) + } + + spin_unlock_irqrestore(&queue->rx_queue.lock, flags); ++ ++ return ret; + } + + static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) +-- +2.35.1 + diff --git a/queue-5.10/xen-netback-ensure-protocol-headers-don-t-fall-in-th.patch b/queue-5.10/xen-netback-ensure-protocol-headers-don-t-fall-in-th.patch new file mode 100644 index 00000000000..b8c41239522 --- /dev/null +++ b/queue-5.10/xen-netback-ensure-protocol-headers-don-t-fall-in-th.patch @@ -0,0 +1,390 @@ +From 18dc22277fdfe02b0cfaa39d8737dc56df6a13b9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Nov 2022 09:16:59 +0000 +Subject: xen/netback: Ensure protocol headers don't fall in the non-linear + area + +From: Ross Lagerwall + +[ Upstream commit ad7f402ae4f466647c3a669b8a6f3e5d4271c84a ] + +In some cases, the frontend may send a packet where the protocol headers +are spread across multiple slots. This would result in netback creating +an skb where the protocol headers spill over into the non-linear area. +Some drivers and NICs don't handle this properly resulting in an +interface reset or worse. + +This issue was introduced by the removal of an unconditional skb pull in +the tx path to improve performance. Fix this without reintroducing the +pull by setting up grant copy ops for as many slots as needed to reach +the XEN_NETBACK_TX_COPY_LEN size. Adjust the rest of the code to handle +multiple copy operations per skb. + +This is XSA-423 / CVE-2022-3643. + +Fixes: 7e5d7753956b ("xen-netback: remove unconditional __pskb_pull_tail() in guest Tx path") +Signed-off-by: Ross Lagerwall +Reviewed-by: Paul Durrant +Signed-off-by: Juergen Gross +Signed-off-by: Sasha Levin +--- + drivers/net/xen-netback/netback.c | 223 ++++++++++++++++-------------- + 1 file changed, 123 insertions(+), 100 deletions(-) + +diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c +index b0cbc7fead74..06fd61b71d37 100644 +--- a/drivers/net/xen-netback/netback.c ++++ b/drivers/net/xen-netback/netback.c +@@ -330,10 +330,13 @@ static int xenvif_count_requests(struct xenvif_queue *queue, + + + struct xenvif_tx_cb { +- u16 pending_idx; ++ u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1]; ++ u8 copy_count; + }; + + #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) ++#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i]) ++#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count) + + static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue, + u16 pending_idx, +@@ -368,31 +371,93 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) + return skb; + } + +-static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue, +- struct sk_buff *skb, +- struct xen_netif_tx_request *txp, +- struct gnttab_map_grant_ref *gop, +- unsigned int frag_overflow, +- struct sk_buff *nskb) ++static void xenvif_get_requests(struct xenvif_queue *queue, ++ struct sk_buff *skb, ++ struct xen_netif_tx_request *first, ++ struct xen_netif_tx_request *txfrags, ++ unsigned *copy_ops, ++ unsigned *map_ops, ++ unsigned int frag_overflow, ++ struct sk_buff *nskb, ++ unsigned int extra_count, ++ unsigned int data_len) + { + struct skb_shared_info *shinfo = skb_shinfo(skb); + skb_frag_t *frags = shinfo->frags; +- u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; +- int start; ++ u16 pending_idx; + pending_ring_idx_t index; + unsigned int nr_slots; ++ struct gnttab_copy *cop = queue->tx_copy_ops + *copy_ops; ++ struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops; ++ struct xen_netif_tx_request *txp = first; ++ ++ nr_slots = shinfo->nr_frags + 1; ++ ++ copy_count(skb) = 0; ++ ++ /* Create copy ops for exactly data_len bytes into the skb head. */ ++ __skb_put(skb, data_len); ++ while (data_len > 0) { ++ int amount = data_len > txp->size ? txp->size : data_len; ++ ++ cop->source.u.ref = txp->gref; ++ cop->source.domid = queue->vif->domid; ++ cop->source.offset = txp->offset; ++ ++ cop->dest.domid = DOMID_SELF; ++ cop->dest.offset = (offset_in_page(skb->data + ++ skb_headlen(skb) - ++ data_len)) & ~XEN_PAGE_MASK; ++ cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb) ++ - data_len); ++ ++ cop->len = amount; ++ cop->flags = GNTCOPY_source_gref; + +- nr_slots = shinfo->nr_frags; ++ index = pending_index(queue->pending_cons); ++ pending_idx = queue->pending_ring[index]; ++ callback_param(queue, pending_idx).ctx = NULL; ++ copy_pending_idx(skb, copy_count(skb)) = pending_idx; ++ copy_count(skb)++; ++ ++ cop++; ++ data_len -= amount; + +- /* Skip first skb fragment if it is on same page as header fragment. */ +- start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); ++ if (amount == txp->size) { ++ /* The copy op covered the full tx_request */ ++ ++ memcpy(&queue->pending_tx_info[pending_idx].req, ++ txp, sizeof(*txp)); ++ queue->pending_tx_info[pending_idx].extra_count = ++ (txp == first) ? extra_count : 0; ++ ++ if (txp == first) ++ txp = txfrags; ++ else ++ txp++; ++ queue->pending_cons++; ++ nr_slots--; ++ } else { ++ /* The copy op partially covered the tx_request. ++ * The remainder will be mapped. ++ */ ++ txp->offset += amount; ++ txp->size -= amount; ++ } ++ } + +- for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots; +- shinfo->nr_frags++, txp++, gop++) { ++ for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++ shinfo->nr_frags++, gop++) { + index = pending_index(queue->pending_cons++); + pending_idx = queue->pending_ring[index]; +- xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop); ++ xenvif_tx_create_map_op(queue, pending_idx, txp, ++ txp == first ? extra_count : 0, gop); + frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); ++ ++ if (txp == first) ++ txp = txfrags; ++ else ++ txp++; + } + + if (frag_overflow) { +@@ -413,7 +478,8 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *que + skb_shinfo(skb)->frag_list = nskb; + } + +- return gop; ++ (*copy_ops) = cop - queue->tx_copy_ops; ++ (*map_ops) = gop - queue->tx_map_ops; + } + + static inline void xenvif_grant_handle_set(struct xenvif_queue *queue, +@@ -449,7 +515,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, + struct gnttab_copy **gopp_copy) + { + struct gnttab_map_grant_ref *gop_map = *gopp_map; +- u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; ++ u16 pending_idx; + /* This always points to the shinfo of the skb being checked, which + * could be either the first or the one on the frag_list + */ +@@ -460,24 +526,37 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, + struct skb_shared_info *first_shinfo = NULL; + int nr_frags = shinfo->nr_frags; + const bool sharedslot = nr_frags && +- frag_get_pending_idx(&shinfo->frags[0]) == pending_idx; ++ frag_get_pending_idx(&shinfo->frags[0]) == ++ copy_pending_idx(skb, copy_count(skb) - 1); + int i, err; + +- /* Check status of header. */ +- err = (*gopp_copy)->status; +- if (unlikely(err)) { +- if (net_ratelimit()) +- netdev_dbg(queue->vif->dev, +- "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", +- (*gopp_copy)->status, +- pending_idx, +- (*gopp_copy)->source.u.ref); +- /* The first frag might still have this slot mapped */ +- if (!sharedslot) +- xenvif_idx_release(queue, pending_idx, +- XEN_NETIF_RSP_ERROR); ++ for (i = 0; i < copy_count(skb); i++) { ++ int newerr; ++ ++ /* Check status of header. */ ++ pending_idx = copy_pending_idx(skb, i); ++ ++ newerr = (*gopp_copy)->status; ++ if (likely(!newerr)) { ++ /* The first frag might still have this slot mapped */ ++ if (i < copy_count(skb) - 1 || !sharedslot) ++ xenvif_idx_release(queue, pending_idx, ++ XEN_NETIF_RSP_OKAY); ++ } else { ++ err = newerr; ++ if (net_ratelimit()) ++ netdev_dbg(queue->vif->dev, ++ "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", ++ (*gopp_copy)->status, ++ pending_idx, ++ (*gopp_copy)->source.u.ref); ++ /* The first frag might still have this slot mapped */ ++ if (i < copy_count(skb) - 1 || !sharedslot) ++ xenvif_idx_release(queue, pending_idx, ++ XEN_NETIF_RSP_ERROR); ++ } ++ (*gopp_copy)++; + } +- (*gopp_copy)++; + + check_frags: + for (i = 0; i < nr_frags; i++, gop_map++) { +@@ -524,14 +603,6 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, + if (err) + continue; + +- /* First error: if the header haven't shared a slot with the +- * first frag, release it as well. +- */ +- if (!sharedslot) +- xenvif_idx_release(queue, +- XENVIF_TX_CB(skb)->pending_idx, +- XEN_NETIF_RSP_OKAY); +- + /* Invalidate preceding fragments of this skb. */ + for (j = 0; j < i; j++) { + pending_idx = frag_get_pending_idx(&shinfo->frags[j]); +@@ -801,7 +872,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, + unsigned *copy_ops, + unsigned *map_ops) + { +- struct gnttab_map_grant_ref *gop = queue->tx_map_ops; + struct sk_buff *skb, *nskb; + int ret; + unsigned int frag_overflow; +@@ -883,8 +953,12 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, + continue; + } + ++ data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ? ++ XEN_NETBACK_TX_COPY_LEN : txreq.size; ++ + ret = xenvif_count_requests(queue, &txreq, extra_count, + txfrags, work_to_do); ++ + if (unlikely(ret < 0)) + break; + +@@ -910,9 +984,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, + index = pending_index(queue->pending_cons); + pending_idx = queue->pending_ring[index]; + +- data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN && +- ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? +- XEN_NETBACK_TX_COPY_LEN : txreq.size; ++ if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size) ++ data_len = txreq.size; + + skb = xenvif_alloc_skb(data_len); + if (unlikely(skb == NULL)) { +@@ -923,8 +996,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, + } + + skb_shinfo(skb)->nr_frags = ret; +- if (data_len < txreq.size) +- skb_shinfo(skb)->nr_frags++; + /* At this point shinfo->nr_frags is in fact the number of + * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. + */ +@@ -986,54 +1057,19 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, + type); + } + +- XENVIF_TX_CB(skb)->pending_idx = pending_idx; +- +- __skb_put(skb, data_len); +- queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref; +- queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid; +- queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset; +- +- queue->tx_copy_ops[*copy_ops].dest.u.gmfn = +- virt_to_gfn(skb->data); +- queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; +- queue->tx_copy_ops[*copy_ops].dest.offset = +- offset_in_page(skb->data) & ~XEN_PAGE_MASK; +- +- queue->tx_copy_ops[*copy_ops].len = data_len; +- queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref; +- +- (*copy_ops)++; +- +- if (data_len < txreq.size) { +- frag_set_pending_idx(&skb_shinfo(skb)->frags[0], +- pending_idx); +- xenvif_tx_create_map_op(queue, pending_idx, &txreq, +- extra_count, gop); +- gop++; +- } else { +- frag_set_pending_idx(&skb_shinfo(skb)->frags[0], +- INVALID_PENDING_IDX); +- memcpy(&queue->pending_tx_info[pending_idx].req, +- &txreq, sizeof(txreq)); +- queue->pending_tx_info[pending_idx].extra_count = +- extra_count; +- } +- +- queue->pending_cons++; +- +- gop = xenvif_get_requests(queue, skb, txfrags, gop, +- frag_overflow, nskb); ++ xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops, ++ map_ops, frag_overflow, nskb, extra_count, ++ data_len); + + __skb_queue_tail(&queue->tx_queue, skb); + + queue->tx.req_cons = idx; + +- if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) || ++ if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) || + (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops))) + break; + } + +- (*map_ops) = gop - queue->tx_map_ops; + return; + } + +@@ -1112,9 +1148,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) + while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) { + struct xen_netif_tx_request *txp; + u16 pending_idx; +- unsigned data_len; + +- pending_idx = XENVIF_TX_CB(skb)->pending_idx; ++ pending_idx = copy_pending_idx(skb, 0); + txp = &queue->pending_tx_info[pending_idx].req; + + /* Check the remap error code. */ +@@ -1133,18 +1168,6 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) + continue; + } + +- data_len = skb->len; +- callback_param(queue, pending_idx).ctx = NULL; +- if (data_len < txp->size) { +- /* Append the packet payload as a fragment. */ +- txp->offset += data_len; +- txp->size -= data_len; +- } else { +- /* Schedule a response immediately. */ +- xenvif_idx_release(queue, pending_idx, +- XEN_NETIF_RSP_OKAY); +- } +- + if (txp->flags & XEN_NETTXF_csum_blank) + skb->ip_summed = CHECKSUM_PARTIAL; + else if (txp->flags & XEN_NETTXF_data_validated) +@@ -1330,7 +1353,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) + /* Called after netfront has transmitted */ + int xenvif_tx_action(struct xenvif_queue *queue, int budget) + { +- unsigned nr_mops, nr_cops = 0; ++ unsigned nr_mops = 0, nr_cops = 0; + int work_done, ret; + + if (unlikely(!tx_work_todo(queue))) +-- +2.35.1 + -- 2.47.3